diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index b4ce76b7..5ca26951 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -261,13 +261,26 @@ class ContinuationContext(BaseModel): """Pydantic model for continuation context information.""" section_count: int delivered_summary: str - cut_off_element: Optional[str] = None - element_before_cutoff: Optional[str] = None template_structure: Optional[str] = None last_complete_part: Optional[str] = None incomplete_part: Optional[str] = None - structure_context: Optional[str] = None last_raw_json: Optional[str] = None + overlap_context: Optional[str] = None # From jsonContinuation.getContexts() - innermost element containing cut + hierarchy_context: Optional[str] = None # From jsonContinuation.getContexts() - full structure from root to cut + + +class JsonContinuationContexts(BaseModel): + """ + Pydantic model for JSON continuation contexts. + + Contains three contexts for truncated JSON strings: + - overlapContext: The innermost object/array element containing the cut point (for merging) + - hierarchyContext: Full structure from root to cut with budget-limited values + - completePart: Valid JSON with all structures properly closed + """ + overlapContext: str = Field(description="The innermost object/array element containing the cut point (for merging)") + hierarchyContext: str = Field(description="Full structure from root to cut with budget-limited values") + completePart: str = Field(description="Valid JSON with all structures properly closed") class SectionPromptArgs(BaseModel): diff --git a/modules/services/serviceAi/merge_1.txt b/modules/services/serviceAi/merge_1.txt index 0a9a9895..1b08b35b 100644 --- a/modules/services/serviceAi/merge_1.txt +++ b/modules/services/serviceAi/merge_1.txt @@ -1,64 +1,57 @@ ================================================================================ JSON MERGE OPERATION #1 ================================================================================ -Timestamp: 2026-01-04T23:08:13.252204 +Timestamp: 2026-01-05T08:30:55.469646 INPUT: - Accumulated length: 31737 chars - New Fragment length: 10178 chars - Accumulated: 409 lines (showing first 5 and last 5) + Accumulated length: 419 chars + New Fragment length: 120 chars + Accumulated: 20 lines (showing first 5 and last 5) { - "elements": [ - { - "type": "table", - "content": { - ... (399 lines omitted) ... - [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579], - [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691], - [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831], - [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957], - [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039 - New Fragment: 135 lines (showing first 5 and last 5) - ```json - { - "elements": [ - { - "type": "table", - ... (125 lines omitted) ... - } - } - ] - } - ``` + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + ... (10 lines omitted) ... + { + "type": "heading", + "content": { + "text": "Introduction", + + New Fragment: 8 lines (showing first 5 and last 5) + "level": 1 + } + } + ] + }, + { + "id": "sec2", + "conten - Normalized Accumulated (31737 chars) - (showing first 5 and last 5 of 409 lines) + Normalized Accumulated (407 chars) + (showing first 5 and last 5 of 19 lines) { - "elements": [ - { - "type": "table", - "content": { - ... (399 lines omitted) ... - [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579], - [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691], - [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831], - [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957], - [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039 + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + ... (9 lines omitted) ... + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction", - Normalized New Fragment (10166 chars) - (showing first 5 and last 5 of 133 lines) - { - "elements": [ - { - "type": "table", - "content": { - ... (123 lines omitted) ... - ] - } - } - ] - } + Normalized New Fragment (115 chars) + "level": 1 + } + } + ] + }, + { + "id": "sec2", + "conten STEP: PHASE 1 Description: Finding overlap between JSON strings ⏳ In progress... @@ -70,832 +63,52 @@ STEP: PHASE 1 ⚠️ NO OVERLAP FOUND - This indicates iterations should stop Closing JSON and returning final result - Closed JSON (31743 chars): + Closed JSON (414 chars): ============================================================================== { - "elements": [ + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + }, + "documents": [ { - "type": "table", - "content": { - "headers": ["Spalte1", "Spalte2", "Spalte3", "Spalte4", "Spalte5", "Spalte6", "Spalte7", "Spalte8", "Spalte9", "Spalte10"], - "rows": [ - [2, 3, 5, 7, 11, 13, 17, 19, 23, 29], - [31, 37, 41, 43, 47, 53, 59, 61, 67, 71], - [73, 79, 83, 89, 97, 101, 103, 107, 109, 113], - [127, 131, 137, 139, 149, 151, 157, 163, 167, 173], - [179, 181, 191, 193, 197, 199, 211, 223, 227, 229], - [233, 239, 241, 251, 257, 263, 269, 271, 277, 281], - [283, 293, 307, 311, 313, 317, 331, 337, 347, 349], - [353, 359, 367, 373, 379, 383, 389, 397, 401, 409], - [419, 421, 431, 433, 439, 443, 449, 457, 461, 463], - [467, 479, 487, 491, 499, 503, 509, 521, 523, 541], - [547, 557, 563, 569, 571, 577, 587, 593, 599, 601], - [607, 613, 617, 619, 631, 641, 643, 647, 653, 659], - [661, 673, 677, 683, 691, 701, 709, 719, 727, 733], - [739, 743, 751, 757, 761, 769, 773, 787, 797, 809], - [811, 821, 823, 827, 829, 839, 853, 857, 859, 863], - [877, 881, 883, 887, 907, 911, 919, 929, 937, 941], - [947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013], - [1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069], - [1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151], - [1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223], - [1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291], - [1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373], - [1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451], - [1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511], - [1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583], - [1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657], - [1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733], - [1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811], - [1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889], - [1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987], - [1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053], - [2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129], - [2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213], - [2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287], - [2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357], - [2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423], - [2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531], - [2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617], - [2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687], - [2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741], - [2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819], - [2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903], - [2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999], - [3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079], - [3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181], - [3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257], - [3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331], - [3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413], - [3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511], - [3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571], - [3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643], - [3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727], - [3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821], - [3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907], - [3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989], - [4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057], - [4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139], - [4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231], - [4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297], - [4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409], - [4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493], - [4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583], - [4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657], - [4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751], - [4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831], - [4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937], - [4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003], - [5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087], - [5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179], - [5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279], - [5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387], - [5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443], - [5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521], - [5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639], - [5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693], - [5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791], - [5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857], - [5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939], - [5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053], - [6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133], - [6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221], - [6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301], - [6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367], - [6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473], - [6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571], - [6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673], - [6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761], - [6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833], - [6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917], - [6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997], - [7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103], - [7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207], - [7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297], - [7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411], - [7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499], - [7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561], - [7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643], - [7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723], - [7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829], - [7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919], - [7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017], - [8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111], - [8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 8209, 8219], - [8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291], - [8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387], - [8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 8467, 8501], - [8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597], - [8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677], - [8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741], - [8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831], - [8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929], - [8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011], - [9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109], - [9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 9187, 9199], - [9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283], - [9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377], - [9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 9437, 9439], - [9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533], - [9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, 9631], - [9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733], - [9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811], - [9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887], - [9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007], - [10009, 10037, 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099], - [10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 10169, 10177], - [10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271], - [10273, 10289, 10301, 10303, 10313, 10321, 10331, 10333, 10337, 10343], - [10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459], - [10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567], - [10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, 10657], - [10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739], - [10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859], - [10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949], - [10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059], - [11069, 11071, 11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149], - [11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251], - [11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329], - [11351, 11353, 11369, 11383, 11393, 11399, 11411, 11423, 11437, 11443], - [11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527], - [11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657], - [11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, 11777], - [11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833], - [11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933], - [11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011], - [12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109], - [12113, 12119, 12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211], - [12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289], - [12301, 12323, 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401], - [12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 12479, 12487], - [12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553], - [12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, 12637, 12641], - [12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739], - [12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829], - [12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923], - [12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007], - [13009, 13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109], - [13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187], - [13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309], - [13313, 13327, 13331, 13337, 13339, 13367, 13381, 13397, 13399, 13411], - [13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499], - [13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619], - [13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, 13693, 13697], - [13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781], - [13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879], - [13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967], - [13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081], - [14083, 14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197], - [14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323], - [14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419], - [14423, 14431, 14437, 14447, 14449, 14461, 14479, 14489, 14503, 14519], - [14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593], - [14621, 14627, 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699], - [14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 14759, 14767], - [14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851], - [14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947], - [14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073], - [15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149], - [15161, 15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259], - [15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319], - [15329, 15331, 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401], - [15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 15493, 15497], - [15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607], - [15619, 15629, 15641, 15643, 15647, 15649, 15661, 15667, 15671, 15679], - [15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773], - [15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881], - [15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971], - [15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069], - [16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183], - [16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267], - [16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381], - [16411, 16417, 16421, 16427, 16433, 16447, 16451, 16453, 16477, 16481], - [16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603], - [16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691], - [16693, 16699, 16703, 16729, 16741, 16747, 16759, 16763, 16787, 16811], - [16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903], - [16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993], - [17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093], - [17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191], - [17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317], - [17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389], - [17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477], - [17483, 17489, 17491, 17497, 17509, 17519, 17539, 17551, 17569, 17573], - [17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669], - [17681, 17683, 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783], - [17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 17881, 17891], - [17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971], - [17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, 18059], - [18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143], - [18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233], - [18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313], - [18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427], - [18433, 18439, 18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517], - [18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593, 18617, 18637], - [18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749], - [18757, 18773, 18787, 18793, 18797, 18803, 18839, 18859, 18869, 18899], - [18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009], - [19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121], - [19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, 19219], - [19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319], - [19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423], - [19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477], - [19483, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577], - [19583, 19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, 19709], - [19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793, 19801], - [19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, 19913], - [19919, 19927, 19937, 19949, 19961, 19963, 19973, 19979, 19991, 19993], - [19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, 20089], - [20101, 20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, 20161], - [20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269], - [20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359], - [20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477], - [20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563], - [20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707], - [20717, 20719, 20731, 20743, 20747, 20749, 20753, 20759, 20771, 20773], - [20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, 20899], - [20903, 20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, 21001], - [21011, 21013, 21017, 21019, 21023, 21031, 21059, 21061, 21067, 21089], - [21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, 21179], - [21187, 21191, 21193, 21211, 21221, 21227, 21247, 21269, 21277, 21283], - [21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383, 21391], - [21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, 21493], - [21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, 21569], - [21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, 21649], - [21661, 21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, 21757], - [21767, 21773, 21787, 21799, 21803, 21817, 21821, 21839, 21841, 21851], - [21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, 21961], - [21977, 21991, 21997, 22003, 22013, 22027, 22031, 22037, 22039, 22051], - [22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123, 22129], - [22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, 22247], - [22259, 22271, 22273, 22277, 22279, 22283, 22291, 22303, 22307, 22343], - [22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, 22447], - [22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, 22549], - [22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, 22651], - [22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, 22739], - [22741, 22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, 22853], - [22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943, 22961], - [22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, 23039], - [23041, 23053, 23057, 23059, 23063, 23071, 23081, 23087, 23099, 23117], - [23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, 23209], - [23227, 23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, 23327], - [23333, 23339, 23357, 23369, 23371, 23399, 23417, 23431, 23447, 23459], - [23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, 23563], - [23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, 23633], - [23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, 23747], - [23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, 23831], - [23833, 23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, 23911], - [23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, 24019], - [24023, 24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, 24097], - [24103, 24107, 24109, 24113, 24121, 24133, 24137, 24151, 24169, 24179], - [24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, 24317], - [24329, 24337, 24359, 24371, 24373, 24379, 24391, 24407, 24413, 24419], - [24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517, 24527], - [24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, 24671], - [24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, 24781], - [24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, 24889], - [24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, 24979], - [24989, 25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, 25111], - [25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, 25189], - [25219, 25229, 25237, 25243, 25247, 25253, 25261, 25301, 25303, 25307], - [25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391, 25409], - [25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, 25523], - [25537, 25541, 25561, 25577, 25579, 25583, 25589, 25601, 25603, 25609], - [25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, 25703], - [25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, 25801], - [25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, 25919], - [25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, 26003], - [26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, 26113], - [26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, 26209], - [26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, 26309], - [26317, 26321, 26339, 26347, 26357, 26371, 26387, 26393, 26399, 26407], - [26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, 26501], - [26513, 26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, 26641], - [26647, 26669, 26681, 26683, 26687, 26693, 26699, 26701, 26711, 26713], - [26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, 26813], - [26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, 26891, 26893], - [26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, 26993], - [27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, 27091], - [27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, 27239], - [27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, 27337], - [27361, 27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, 27457], - [27479, 27481, 27487, 27509, 27527, 27529, 27539, 27541, 27551, 27581], - [27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, 27697], - [27701, 27733, 27737, 27739, 27743, 27749, 27751, 27763, 27767, 27773], - [27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827, 27847], - [27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, 27953], - [27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, 28051, 28057], - [28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, 28163], - [28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, 28289], - [28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, 28409], - [28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, 28513], - [28517, 28537, 28541, 28547, 28549, 28559, 28571, 28573, 28579, 28591], - [28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649, 28657], - [28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, 28751], - [28753, 28759, 28771, 28789, 28793, 28807, 28813, 28817, 28837, 28843], - [28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, 28949], - [28961, 28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, 29063], - [29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, 29167, 29173], - [29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, 29269], - [29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, 29383], - [29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, 29453], - [29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, 29581], - [29587, 29599, 29611, 29629, 29633, 29641, 29663, 29669, 29671, 29683], - [29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, 29833], - [29837, 29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, 29927], - [29947, 29959, 29983, 29989, 30011, 30013, 30029, 30047, 30059, 30071], - [30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, 30139], - [30161, 30169, 30181, 30187, 30197, 30203, 30211, 30223, 30241, 30253], - [30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341, 30347], - [30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, 30491], - [30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, 30577], - [30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, 30697], - [30703, 30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, 30809], - [30817, 30829, 30839, 30841, 30851, 30853, 30859, 30869, 30871, 30881], - [30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, 31013], - [31019, 31033, 31039, 31051, 31063, 31069, 31079, 31081, 31091, 31121], - [31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183, 31189], - [31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, 31267], - [31271, 31277, 31307, 31319, 31321, 31327, 31333, 31337, 31357, 31379], - [31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, 31513], - [31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, 31607], - [31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, 31723], - [31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, 31847], - [31849, 31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, 31981], - [31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063, 32069], - [32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, 32173], - [32183, 32189, 32191, 32203, 32213, 32233, 32237, 32251, 32257, 32261], - [32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, 32359], - [32363, 32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, 32429], - [32441, 32443, 32467, 32479, 32491, 32497, 32503, 32507, 32531, 32533], - [32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, 32611], - [32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, 32719], - [32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, 32833], - [32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, 32941], - [32957, 32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, 33029], - [33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, 33119], - [33149, 33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, 33223], - [33247, 33287, 33289, 33301, 33311, 33317, 33329, 33331, 33343, 33347], - [33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, 33457], - [33461, 33469, 33479, 33487, 33493, 33503, 33521, 33529, 33533, 33547], - [33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613, 33617], - [33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, 33721], - [33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, 33809], - [33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, 33911], - [33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, 34033], - [34039, 34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, 34159], - [34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, 34267], - [34273, 34283, 34297, 34301, 34303, 34313, 34319, 34327, 34337, 34351], - [34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457, 34469], - [34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, 34543], - [34549, 34583, 34589, 34591, 34603, 34607, 34613, 34631, 34649, 34651], - [34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, 34747], - [34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, 34849], - [34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, 34963], - [34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, 35089], - [35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, 35171], - [35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, 35311], - [35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, 35407], - [35419, 35423, 35437, 35447, 35449, 35461, 35491, 35507, 35509, 35521], - [35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, 35597], - [35603, 35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, 35771], - [35797, 35801, 35803, 35809, 35831, 35837, 35839, 35851, 35863, 35869], - [35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, 35977], - [35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, 36061, 36067], - [36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, 36187], - [36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, 36293], - [36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, 36389], - [36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, 36523], - [36527, 36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, 36599], - [36607, 36629, 36637, 36643, 36653, 36671, 36677, 36683, 36691, 36697], - [36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, 36787], - [36791, 36793, 36809, 36821, 36833, 36847, 36857, 36871, 36877, 36887], - [36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947, 36973], - [36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, 37061], - [37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, 37189, 37199], - [37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, 37313], - [37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, 37409], - [37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, 37511], - [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579], - [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691], - [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831], - [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957], - [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039]]}}]} + "id": "doc1", + "title": "Document 1", + "sections": [ + { + "id": "sec1", + "content_type": "heading", + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction"}}]}]}]} ============================================================================== ================================================================================ MERGE RESULT: ✅ SUCCESS ================================================================================ -Final result length: 31743 chars +Final result length: 414 chars Final result (COMPLETE): ================================================================================ { - "elements": [ + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + }, + "documents": [ { - "type": "table", - "content": { - "headers": ["Spalte1", "Spalte2", "Spalte3", "Spalte4", "Spalte5", "Spalte6", "Spalte7", "Spalte8", "Spalte9", "Spalte10"], - "rows": [ - [2, 3, 5, 7, 11, 13, 17, 19, 23, 29], - [31, 37, 41, 43, 47, 53, 59, 61, 67, 71], - [73, 79, 83, 89, 97, 101, 103, 107, 109, 113], - [127, 131, 137, 139, 149, 151, 157, 163, 167, 173], - [179, 181, 191, 193, 197, 199, 211, 223, 227, 229], - [233, 239, 241, 251, 257, 263, 269, 271, 277, 281], - [283, 293, 307, 311, 313, 317, 331, 337, 347, 349], - [353, 359, 367, 373, 379, 383, 389, 397, 401, 409], - [419, 421, 431, 433, 439, 443, 449, 457, 461, 463], - [467, 479, 487, 491, 499, 503, 509, 521, 523, 541], - [547, 557, 563, 569, 571, 577, 587, 593, 599, 601], - [607, 613, 617, 619, 631, 641, 643, 647, 653, 659], - [661, 673, 677, 683, 691, 701, 709, 719, 727, 733], - [739, 743, 751, 757, 761, 769, 773, 787, 797, 809], - [811, 821, 823, 827, 829, 839, 853, 857, 859, 863], - [877, 881, 883, 887, 907, 911, 919, 929, 937, 941], - [947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013], - [1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069], - [1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151], - [1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223], - [1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291], - [1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373], - [1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451], - [1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511], - [1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583], - [1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657], - [1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733], - [1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811], - [1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889], - [1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987], - [1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053], - [2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129], - [2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213], - [2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287], - [2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357], - [2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423], - [2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531], - [2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617], - [2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687], - [2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741], - [2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819], - [2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903], - [2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999], - [3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079], - [3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181], - [3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257], - [3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331], - [3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413], - [3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511], - [3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571], - [3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643], - [3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727], - [3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821], - [3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907], - [3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989], - [4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057], - [4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139], - [4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231], - [4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297], - [4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409], - [4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493], - [4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583], - [4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657], - [4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751], - [4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831], - [4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937], - [4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003], - [5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087], - [5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179], - [5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279], - [5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387], - [5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443], - [5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521], - [5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639], - [5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693], - [5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791], - [5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857], - [5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939], - [5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053], - [6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133], - [6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221], - [6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301], - [6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367], - [6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473], - [6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571], - [6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673], - [6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761], - [6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833], - [6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917], - [6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997], - [7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103], - [7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207], - [7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297], - [7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411], - [7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499], - [7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561], - [7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643], - [7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723], - [7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829], - [7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919], - [7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017], - [8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111], - [8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 8209, 8219], - [8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291], - [8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387], - [8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 8467, 8501], - [8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597], - [8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677], - [8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741], - [8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831], - [8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929], - [8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011], - [9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109], - [9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 9187, 9199], - [9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283], - [9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377], - [9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 9437, 9439], - [9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533], - [9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, 9631], - [9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733], - [9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811], - [9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887], - [9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007], - [10009, 10037, 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099], - [10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 10169, 10177], - [10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271], - [10273, 10289, 10301, 10303, 10313, 10321, 10331, 10333, 10337, 10343], - [10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459], - [10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567], - [10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, 10657], - [10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739], - [10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859], - [10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949], - [10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059], - [11069, 11071, 11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149], - [11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251], - [11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329], - [11351, 11353, 11369, 11383, 11393, 11399, 11411, 11423, 11437, 11443], - [11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527], - [11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657], - [11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, 11777], - [11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833], - [11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933], - [11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011], - [12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109], - [12113, 12119, 12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211], - [12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289], - [12301, 12323, 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401], - [12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 12479, 12487], - [12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553], - [12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, 12637, 12641], - [12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739], - [12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829], - [12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923], - [12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007], - [13009, 13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109], - [13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187], - [13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309], - [13313, 13327, 13331, 13337, 13339, 13367, 13381, 13397, 13399, 13411], - [13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499], - [13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619], - [13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, 13693, 13697], - [13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781], - [13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879], - [13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967], - [13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081], - [14083, 14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197], - [14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323], - [14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419], - [14423, 14431, 14437, 14447, 14449, 14461, 14479, 14489, 14503, 14519], - [14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593], - [14621, 14627, 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699], - [14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 14759, 14767], - [14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851], - [14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947], - [14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073], - [15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149], - [15161, 15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259], - [15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319], - [15329, 15331, 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401], - [15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 15493, 15497], - [15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607], - [15619, 15629, 15641, 15643, 15647, 15649, 15661, 15667, 15671, 15679], - [15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773], - [15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881], - [15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971], - [15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069], - [16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183], - [16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267], - [16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381], - [16411, 16417, 16421, 16427, 16433, 16447, 16451, 16453, 16477, 16481], - [16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603], - [16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691], - [16693, 16699, 16703, 16729, 16741, 16747, 16759, 16763, 16787, 16811], - [16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903], - [16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993], - [17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093], - [17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191], - [17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317], - [17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389], - [17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477], - [17483, 17489, 17491, 17497, 17509, 17519, 17539, 17551, 17569, 17573], - [17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669], - [17681, 17683, 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783], - [17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 17881, 17891], - [17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971], - [17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, 18059], - [18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143], - [18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233], - [18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313], - [18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427], - [18433, 18439, 18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517], - [18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593, 18617, 18637], - [18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749], - [18757, 18773, 18787, 18793, 18797, 18803, 18839, 18859, 18869, 18899], - [18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009], - [19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121], - [19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, 19219], - [19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319], - [19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423], - [19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477], - [19483, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577], - [19583, 19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, 19709], - [19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793, 19801], - [19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, 19913], - [19919, 19927, 19937, 19949, 19961, 19963, 19973, 19979, 19991, 19993], - [19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, 20089], - [20101, 20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, 20161], - [20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269], - [20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359], - [20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477], - [20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563], - [20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707], - [20717, 20719, 20731, 20743, 20747, 20749, 20753, 20759, 20771, 20773], - [20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, 20899], - [20903, 20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, 21001], - [21011, 21013, 21017, 21019, 21023, 21031, 21059, 21061, 21067, 21089], - [21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, 21179], - [21187, 21191, 21193, 21211, 21221, 21227, 21247, 21269, 21277, 21283], - [21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383, 21391], - [21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, 21493], - [21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, 21569], - [21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, 21649], - [21661, 21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, 21757], - [21767, 21773, 21787, 21799, 21803, 21817, 21821, 21839, 21841, 21851], - [21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, 21961], - [21977, 21991, 21997, 22003, 22013, 22027, 22031, 22037, 22039, 22051], - [22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123, 22129], - [22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, 22247], - [22259, 22271, 22273, 22277, 22279, 22283, 22291, 22303, 22307, 22343], - [22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, 22447], - [22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, 22549], - [22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, 22651], - [22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, 22739], - [22741, 22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, 22853], - [22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943, 22961], - [22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, 23039], - [23041, 23053, 23057, 23059, 23063, 23071, 23081, 23087, 23099, 23117], - [23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, 23209], - [23227, 23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, 23327], - [23333, 23339, 23357, 23369, 23371, 23399, 23417, 23431, 23447, 23459], - [23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, 23563], - [23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, 23633], - [23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, 23747], - [23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, 23831], - [23833, 23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, 23911], - [23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, 24019], - [24023, 24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, 24097], - [24103, 24107, 24109, 24113, 24121, 24133, 24137, 24151, 24169, 24179], - [24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, 24317], - [24329, 24337, 24359, 24371, 24373, 24379, 24391, 24407, 24413, 24419], - [24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517, 24527], - [24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, 24671], - [24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, 24781], - [24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, 24889], - [24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, 24979], - [24989, 25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, 25111], - [25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, 25189], - [25219, 25229, 25237, 25243, 25247, 25253, 25261, 25301, 25303, 25307], - [25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391, 25409], - [25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, 25523], - [25537, 25541, 25561, 25577, 25579, 25583, 25589, 25601, 25603, 25609], - [25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, 25703], - [25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, 25801], - [25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, 25919], - [25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, 26003], - [26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, 26113], - [26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, 26209], - [26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, 26309], - [26317, 26321, 26339, 26347, 26357, 26371, 26387, 26393, 26399, 26407], - [26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, 26501], - [26513, 26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, 26641], - [26647, 26669, 26681, 26683, 26687, 26693, 26699, 26701, 26711, 26713], - [26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, 26813], - [26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, 26891, 26893], - [26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, 26993], - [27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, 27091], - [27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, 27239], - [27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, 27337], - [27361, 27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, 27457], - [27479, 27481, 27487, 27509, 27527, 27529, 27539, 27541, 27551, 27581], - [27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, 27697], - [27701, 27733, 27737, 27739, 27743, 27749, 27751, 27763, 27767, 27773], - [27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827, 27847], - [27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, 27953], - [27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, 28051, 28057], - [28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, 28163], - [28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, 28289], - [28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, 28409], - [28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, 28513], - [28517, 28537, 28541, 28547, 28549, 28559, 28571, 28573, 28579, 28591], - [28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649, 28657], - [28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, 28751], - [28753, 28759, 28771, 28789, 28793, 28807, 28813, 28817, 28837, 28843], - [28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, 28949], - [28961, 28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, 29063], - [29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, 29167, 29173], - [29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, 29269], - [29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, 29383], - [29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, 29453], - [29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, 29581], - [29587, 29599, 29611, 29629, 29633, 29641, 29663, 29669, 29671, 29683], - [29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, 29833], - [29837, 29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, 29927], - [29947, 29959, 29983, 29989, 30011, 30013, 30029, 30047, 30059, 30071], - [30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, 30139], - [30161, 30169, 30181, 30187, 30197, 30203, 30211, 30223, 30241, 30253], - [30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341, 30347], - [30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, 30491], - [30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, 30577], - [30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, 30697], - [30703, 30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, 30809], - [30817, 30829, 30839, 30841, 30851, 30853, 30859, 30869, 30871, 30881], - [30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, 31013], - [31019, 31033, 31039, 31051, 31063, 31069, 31079, 31081, 31091, 31121], - [31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183, 31189], - [31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, 31267], - [31271, 31277, 31307, 31319, 31321, 31327, 31333, 31337, 31357, 31379], - [31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, 31513], - [31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, 31607], - [31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, 31723], - [31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, 31847], - [31849, 31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, 31981], - [31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063, 32069], - [32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, 32173], - [32183, 32189, 32191, 32203, 32213, 32233, 32237, 32251, 32257, 32261], - [32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, 32359], - [32363, 32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, 32429], - [32441, 32443, 32467, 32479, 32491, 32497, 32503, 32507, 32531, 32533], - [32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, 32611], - [32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, 32719], - [32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, 32833], - [32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, 32941], - [32957, 32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, 33029], - [33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, 33119], - [33149, 33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, 33223], - [33247, 33287, 33289, 33301, 33311, 33317, 33329, 33331, 33343, 33347], - [33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, 33457], - [33461, 33469, 33479, 33487, 33493, 33503, 33521, 33529, 33533, 33547], - [33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613, 33617], - [33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, 33721], - [33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, 33809], - [33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, 33911], - [33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, 34033], - [34039, 34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, 34159], - [34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, 34267], - [34273, 34283, 34297, 34301, 34303, 34313, 34319, 34327, 34337, 34351], - [34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457, 34469], - [34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, 34543], - [34549, 34583, 34589, 34591, 34603, 34607, 34613, 34631, 34649, 34651], - [34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, 34747], - [34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, 34849], - [34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, 34963], - [34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, 35089], - [35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, 35171], - [35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, 35311], - [35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, 35407], - [35419, 35423, 35437, 35447, 35449, 35461, 35491, 35507, 35509, 35521], - [35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, 35597], - [35603, 35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, 35771], - [35797, 35801, 35803, 35809, 35831, 35837, 35839, 35851, 35863, 35869], - [35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, 35977], - [35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, 36061, 36067], - [36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, 36187], - [36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, 36293], - [36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, 36389], - [36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, 36523], - [36527, 36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, 36599], - [36607, 36629, 36637, 36643, 36653, 36671, 36677, 36683, 36691, 36697], - [36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, 36787], - [36791, 36793, 36809, 36821, 36833, 36847, 36857, 36871, 36877, 36887], - [36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947, 36973], - [36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, 37061], - [37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, 37189, 37199], - [37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, 37313], - [37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, 37409], - [37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, 37511], - [37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579], - [37589, 37591, 37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691], - [37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813, 37831], - [37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957], - [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039]]}}]} + "id": "doc1", + "title": "Document 1", + "sections": [ + { + "id": "sec1", + "content_type": "heading", + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction"}}]}]}]} ================================================================================ diff --git a/modules/services/serviceAi/merge_2.txt b/modules/services/serviceAi/merge_2.txt new file mode 100644 index 00000000..83374f93 --- /dev/null +++ b/modules/services/serviceAi/merge_2.txt @@ -0,0 +1,121 @@ +================================================================================ +JSON MERGE OPERATION #2 +================================================================================ +Timestamp: 2026-01-05T08:30:55.472639 + +INPUT: + Accumulated length: 414 chars + New Fragment length: 245 chars + Accumulated: 19 lines (showing first 5 and last 5) + { + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + ... (9 lines omitted) ... + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction"}}]}]}]} + New Fragment: 14 lines (showing first 5 and last 5) + t_type": "paragraph", + "elements": [ + { + "type": "paragraph", + "content": { + ... (4 lines omitted) ... + } + ] + } + ] + } + + + Normalized Accumulated (414 chars) + (showing first 5 and last 5 of 19 lines) + { + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + ... (9 lines omitted) ... + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction"}}]}]}]} + + Normalized New Fragment (245 chars) + (showing first 5 and last 5 of 14 lines) + t_type": "paragraph", + "elements": [ + { + "type": "paragraph", + "content": { + ... (4 lines omitted) ... + } + ] + } + ] + } +STEP: PHASE 1 + Description: Finding overlap between JSON strings + ⏳ In progress... + + Overlap Detection (string): + Overlap length: 0 + ⚠️ No overlap detected - appending all + + ⚠️ NO OVERLAP FOUND - This indicates iterations should stop + Closing JSON and returning final result + + Closed JSON (414 chars): + ============================================================================== + { + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + }, + "documents": [ + { + "id": "doc1", + "title": "Document 1", + "sections": [ + { + "id": "sec1", + "content_type": "heading", + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction"}}]}]}]} + ============================================================================== + +================================================================================ +MERGE RESULT: ✅ SUCCESS +================================================================================ +Final result length: 414 chars +Final result (COMPLETE): +================================================================================ +{ + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + }, + "documents": [ + { + "id": "doc1", + "title": "Document 1", + "sections": [ + { + "id": "sec1", + "content_type": "heading", + "elements": [ + { + "type": "heading", + "content": { + "text": "Introduction"}}]}]}]} +================================================================================ diff --git a/modules/services/serviceAi/subAiCallLooping.py b/modules/services/serviceAi/subAiCallLooping.py index 021b1f95..63051d8b 100644 --- a/modules/services/serviceAi/subAiCallLooping.py +++ b/modules/services/serviceAi/subAiCallLooping.py @@ -324,6 +324,8 @@ class AiCallLooper: # JSON is already closed by mergeJsonStringsWithOverlap when no overlap # Use the merged (closed) JSON string directly result = mergedJsonString + # CRITICAL: Update lastRawResponse with merged result for next iteration + lastRawResponse = mergedJsonString # Try to parse it to get parsedJsonForUseCase try: extracted = extractJsonString(mergedJsonString) @@ -333,6 +335,8 @@ class AiCallLooper: normalized = self._normalizeJsonStructure(parsed, useCase) parsedJsonForUseCase = normalized result = json.dumps(normalized, indent=2, ensure_ascii=False) + # CRITICAL: Update lastRawResponse with final result + lastRawResponse = result else: # Parsing failed - try to repair JSON from modules.shared.jsonUtils import repairBrokenJson @@ -346,6 +350,8 @@ class AiCallLooper: normalized = self._normalizeJsonStructure(repairedJson, useCase) parsedJsonForUseCase = normalized result = json.dumps(normalized, indent=2, ensure_ascii=False) + # CRITICAL: Update lastRawResponse with final result + lastRawResponse = result logger.info(f"Iteration {iteration}: Successfully repaired JSON after no-overlap merge") except Exception as e: # Last resort: try repair on the original merged string @@ -379,6 +385,8 @@ class AiCallLooper: normalized = self._normalizeJsonStructure(parsed, useCase) parsedJsonForUseCase = normalized result = json.dumps(normalized, indent=2, ensure_ascii=False) + # CRITICAL: Update lastRawResponse with merged result + lastRawResponse = result else: # Parsing failed - try to extract partial data using Deep-Structure-Merging # This fallback works for all use cases: parse what we can from each part @@ -404,9 +412,13 @@ class AiCallLooper: parsedJsonForUseCase = mergedJsonObj result = json.dumps(mergedJsonObj, indent=2, ensure_ascii=False) + # CRITICAL: Update lastRawResponse with merged result + lastRawResponse = result else: # All parsing failed - use string merge result result = mergedJsonString + # CRITICAL: Update lastRawResponse with merged result + lastRawResponse = mergedJsonString except Exception as e: logger.warning(f"Failed data-based merge, falling back to string merging: {e}") # Fallback to string merging @@ -424,6 +436,8 @@ class AiCallLooper: hasOverlap = False logger.info(f"Iteration {iteration}: No overlap found in final fallback merge - stopping iterations") result = mergedJsonString + # CRITICAL: Update lastRawResponse with merged result + lastRawResponse = mergedJsonString # If no overlap was found, mark as complete and use closed JSON if not hasOverlap: diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index f6f3032c..5918d641 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -2198,75 +2198,15 @@ Output requirements: incompletePart = continuationContext.incomplete_part lastRawJson = continuationContext.last_raw_json - # Build overlap context: extract cut part and full part before (same level) for overlap + # Generate both overlap context and hierarchy context using jsonContinuation overlapContext = "" - if lastRawJson: - # Find break position in raw JSON - lastCompletePart = continuationContext.last_complete_part - breakPos = len(lastRawJson.rstrip()) - - if lastCompletePart: - from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText - normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip() - normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip() - - # Find where normalizedComplete ends in normalizedRaw - pos = normalizedRaw.find(normalizedComplete) - if pos >= 0: - breakPos = pos + len(normalizedComplete) - else: - pos = lastRawJson.find(lastCompletePart) - if pos >= 0: - breakPos = pos + len(lastCompletePart) - elif incompletePart: - pos = lastRawJson.find(incompletePart) - if pos >= 0: - breakPos = pos - - # Extract cut part and full part before (same level) - overlapContext = self._extractOverlapContext(lastRawJson, breakPos) - - # Build unified context showing structure hierarchy with cut point unifiedContext = "" if lastRawJson: - # Find break position in raw JSON - # Use last_complete_part length to find where complete part ends - lastCompletePart = continuationContext.last_complete_part - if lastCompletePart: - # Break position is where the complete part ends - # Normalize lastRawJson to match the normalized lastCompletePart - from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText - normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip() - normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip() - - # Find where normalizedComplete ends in normalizedRaw - breakPos = normalizedRaw.find(normalizedComplete) - if breakPos >= 0: - breakPos = breakPos + len(normalizedComplete) - else: - # Fallback: use length of lastCompletePart in original string - breakPos = lastRawJson.find(lastCompletePart) - if breakPos >= 0: - breakPos = breakPos + len(lastCompletePart) - else: - # Last resort: use incompletePart position - if incompletePart: - breakPos = lastRawJson.find(incompletePart) - if breakPos == -1: - breakPos = len(lastRawJson.rstrip()) - else: - breakPos = len(lastRawJson.rstrip()) - elif incompletePart: - # If no complete part, find where incomplete part starts - breakPos = lastRawJson.find(incompletePart) - if breakPos == -1: - breakPos = len(lastRawJson.rstrip()) - else: - breakPos = len(lastRawJson.rstrip()) - - # Build intelligent context showing hierarchy - from modules.shared.jsonUtils import buildIncompleteContext - unifiedContext = buildIncompleteContext(lastRawJson, breakPos) + # Get contexts directly from jsonContinuation + from modules.shared.jsonContinuation import getContexts + contexts = getContexts(lastRawJson) + overlapContext = contexts.overlapContext + unifiedContext = contexts.hierarchyContext elif incompletePart: unifiedContext = incompletePart else: @@ -2308,14 +2248,6 @@ CRITICAL: - Complete the incomplete element and continue with remaining elements""" return continuationPrompt - def _extractOverlapContext(self, jsonContent: str, breakPosition: int) -> str: - """ - Extract overlap context: cut part and full part before (same level). - Delegates to shared function in jsonUtils for consistency. - """ - from modules.shared.jsonUtils import extractOverlapContext - return extractOverlapContext(jsonContent, breakPosition) - def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]: """ Extract multiple JSON blocks from response and merge them appropriately. diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index 44b4a76d..fca65197 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -128,33 +128,15 @@ class StructureGenerator: incompletePart = continuationContext.incomplete_part lastRawJson = continuationContext.last_raw_json - # Build overlap context: extract cut part and full part before (same level) for overlap + # Generate both overlap context and hierarchy context using jsonContinuation overlapContext = "" - if lastRawJson: - # Find break position - breakPos = len(lastRawJson.rstrip()) - if incompletePart: - pos = lastRawJson.find(incompletePart) - if pos >= 0: - breakPos = pos - - # Extract cut part and full part before (same level) - overlapContext = StructureGenerator._extractOverlapContext(lastRawJson, breakPos) - - # Build unified context showing structure hierarchy with cut point unifiedContext = "" if lastRawJson: - # Find break position in raw JSON - if incompletePart: - breakPos = lastRawJson.find(incompletePart) - if breakPos == -1: - breakPos = len(lastRawJson.rstrip()) - else: - breakPos = len(lastRawJson.rstrip()) - - # Build intelligent context showing hierarchy - from modules.shared.jsonUtils import buildIncompleteContext - unifiedContext = buildIncompleteContext(lastRawJson, breakPos) + # Get contexts directly from jsonContinuation + from modules.shared.jsonContinuation import getContexts + contexts = getContexts(lastRawJson) + overlapContext = contexts.overlapContext + unifiedContext = contexts.hierarchyContext elif incompletePart: unifiedContext = incompletePart else: @@ -195,144 +177,6 @@ CRITICAL: - Start with overlap context (cut part and full part before at same level) then continue seamlessly - Complete the incomplete element and continue with remaining elements""" return continuationPrompt - """ - Extract overlap context: cut part and full part before (same level). - - Returns a string showing: - 1. The last complete element at the same level before the cut point - 2. The cut part (incomplete element at the cut point) - """ - if not jsonContent or breakPosition <= 0: - return jsonContent[-200:].strip() if jsonContent else "" - - from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece - - # Find structure hierarchy - hierarchy = findStructureHierarchy(jsonContent, breakPosition) - if not hierarchy: - # Fallback: show last 200 chars before break - start = max(0, breakPosition - 200) - return jsonContent[start:breakPosition + 100].strip() - - # Get cut level (the array/object containing the cut piece) - cutLevel = hierarchy[-1] - cutLevelStart = cutLevel['start_pos'] - cutLevelType = cutLevel['type'] - - # Extract cut piece (incomplete element) - cutPiece = extractCutPiece(jsonContent, breakPosition) - - # Find the last complete element at the same level before the cut point - overlapParts = [] - - if cutLevelType == 'array': - # Find the last complete array element before breakPosition - i = breakPosition - 1 - depth = 0 - inString = False - escapeNext = False - elementStart = breakPosition - - # Find the start of the incomplete element (or last complete element) - while i >= cutLevelStart: - char = jsonContent[i] - - if escapeNext: - escapeNext = False - i -= 1 - continue - - if char == '\\': - escapeNext = True - i -= 1 - continue - - if char == '"': - inString = not inString - i -= 1 - continue - - if not inString: - if char == ']': - depth += 1 - elif char == '[': - depth -= 1 - if depth < 0: - elementStart = i + 1 - break - elif char == ',' and depth == 0: - elementStart = i + 1 - break - - i -= 1 - - # Extract the last complete element (if exists) and the cut part - if elementStart < breakPosition: - contentBeforeBreak = jsonContent[max(cutLevelStart, elementStart - 500):breakPosition].strip() - - # Find the last complete element by looking for balanced brackets/braces - lastCompleteEnd = breakPosition - braceCount = 0 - bracketCount = 0 - inString = False - escapeNext = False - - # Go backwards from breakPosition to find where last complete element ends - for j in range(breakPosition - 1, max(cutLevelStart, breakPosition - 1000), -1): - char = jsonContent[j] - - if escapeNext: - escapeNext = False - continue - - if char == '\\': - escapeNext = True - continue - - if char == '"': - inString = not inString - continue - - if not inString: - if char == '}': - braceCount += 1 - elif char == '{': - braceCount -= 1 - if braceCount == 0 and bracketCount == 0: - lastCompleteEnd = j - break - elif char == ']': - bracketCount += 1 - elif char == '[': - bracketCount -= 1 - if bracketCount == 0 and braceCount == 0: - lastCompleteEnd = j + 1 - break - elif char == ',' and braceCount == 0 and bracketCount == 0: - lastCompleteEnd = j + 1 - break - - # Extract last complete element and cut part - if lastCompleteEnd < breakPosition: - lastCompleteElement = jsonContent[max(cutLevelStart, lastCompleteEnd - 300):lastCompleteEnd].strip() - cutPart = jsonContent[lastCompleteEnd:breakPosition + len(cutPiece)].strip() - - if lastCompleteElement: - overlapParts.append(f"Last complete element at same level:\n{lastCompleteElement}") - if cutPart: - overlapParts.append(f"Cut part (incomplete):\n{cutPart}") - else: - contextStart = max(cutLevelStart, breakPosition - 300) - overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip()) - else: - contextStart = max(cutLevelStart, breakPosition - 300) - overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip()) - else: - # For objects or other types, show context around break point - contextStart = max(cutLevelStart, breakPosition - 300) - overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip()) - - return "\n\n".join(overlapParts) if overlapParts else jsonContent[max(0, breakPosition - 200):breakPosition + 100].strip() # Call AI with looping support # NOTE: Do NOT pass contentParts here - we only need metadata for structure generation @@ -457,14 +301,6 @@ CRITICAL: raise @staticmethod - def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str: - """ - Extract overlap context: cut part and full part before (same level). - Delegates to shared function in jsonUtils for consistency. - """ - from modules.shared.jsonUtils import extractOverlapContext - return extractOverlapContext(jsonContent, breakPosition) - def _buildChapterStructurePrompt( self, userPrompt: str, diff --git a/modules/services/serviceAi/test_json_merger.py b/modules/services/serviceAi/test_json_merger.py deleted file mode 100644 index 13fa780c..00000000 --- a/modules/services/serviceAi/test_json_merger.py +++ /dev/null @@ -1,594 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Test cases for JSON merger with different use cases and random cuts. - -Tests the robustness of the JSON merger by: -1. Creating test JSON for different use cases -2. Cutting it randomly at various points -3. Running the merger for each piece -4. Checking completeness against original -""" - -import json -import random -import logging -import sys -import os -from typing import Dict, Any, List, Tuple - -# Add project root to Python path -# Find project root by looking for gateway/modules structure -currentFile = os.path.abspath(__file__) -currentDir = os.path.dirname(currentFile) - -# Navigate up from: gateway/modules/services/serviceAi/test_json_merger.py -# To project root: D:\Athi\Local\Web\poweron -# Try different levels up -candidates = [ - os.path.abspath(os.path.join(currentDir, '../../../../')), # From gateway/modules/services/serviceAi - os.path.abspath(os.path.join(currentDir, '../../..')), # Alternative - os.path.abspath(os.path.join(currentDir, '../..')), # Another alternative -] - -projectRoot = None -for candidate in candidates: - gatewayModulesPath = os.path.join(candidate, 'gateway', 'modules') - if os.path.exists(gatewayModulesPath): - projectRoot = candidate - break - -# If still not found, try to find by looking for gateway directory -if projectRoot is None: - searchDir = currentDir - for _ in range(10): # Max 10 levels up - gatewayPath = os.path.join(searchDir, 'gateway') - if os.path.exists(gatewayPath) and os.path.exists(os.path.join(gatewayPath, 'modules')): - projectRoot = searchDir - break - parent = os.path.dirname(searchDir) - if parent == searchDir: # Reached root - break - searchDir = parent - -if projectRoot is None: - raise RuntimeError(f"Could not find project root. Current file: {currentFile}") - -# Add gateway directory to Python path (not project root) -gatewayPath = os.path.join(projectRoot, 'gateway') -if gatewayPath not in sys.path: - sys.path.insert(0, gatewayPath) - -# Verify the path is correct -modulesPath = os.path.join(projectRoot, 'gateway', 'modules') -if not os.path.exists(modulesPath): - raise RuntimeError(f"Project root verification failed. Expected gateway/modules at: {modulesPath}") - -try: - from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler - from modules.services.serviceAi.subJsonMerger import JsonMergeLogger - from modules.shared.jsonUtils import ( - normalizeJsonText, stripCodeFences, closeJsonStructures, tryParseJson, - extractJsonStructureContext - ) -except ImportError as e: - # Try to help debug - print(f"Import error: {e}") - print(f"Project root: {projectRoot}") - print(f"Gateway path: {gatewayPath}") - print(f"Python path (first 3): {sys.path[:3]}") - print(f"Looking for modules at: {modulesPath}") - print(f"Exists: {os.path.exists(modulesPath)}") - if os.path.exists(modulesPath): - print(f"Contents: {os.listdir(modulesPath)[:5]}") - raise - -logger = logging.getLogger(__name__) - - -def createTestJsonForUseCase(useCaseId: str, size: int = 100) -> Dict[str, Any]: - """ - Create test JSON for a specific use case. - - Args: - useCaseId: Use case ID (section_content, chapter_structure, etc.) - size: Size of test data (number of elements/rows/items) - - Returns: - Test JSON dictionary - """ - if useCaseId == "section_content": - # Create table with rows - elements = [{ - "type": "table", - "content": { - "headers": ["Year", "Value"], - "rows": [[str(1947 + i), str(10000 + i * 100)] for i in range(size)] - } - }] - return {"elements": elements} - - elif useCaseId == "chapter_structure": - chapters = [{ - "id": f"chapter_{i}", - "title": f"Chapter {i}", - "level": 1 - } for i in range(size)] - return {"documents": [{"chapters": chapters}]} - - elif useCaseId == "code_structure": - files = [{ - "id": f"file_{i}", - "filename": f"file_{i}.py", - "fileType": "python", - "functions": [f"function_{i}_{j}" for j in range(5)] - } for i in range(size)] - return {"files": files} - - elif useCaseId == "code_content": - files = [{ - "id": f"file_{i}", - "content": f"# File {i}\ndef function_{i}():\n pass\n" * 10, - "functions": [{"name": f"function_{i}_{j}", "line": j * 3} for j in range(5)] - } for i in range(size)] - return {"files": files} - - else: - raise ValueError(f"Unknown use case: {useCaseId}") - - -def cutJsonRandomly(jsonString: str, numCuts: int = 5, overlapSize: int = 100) -> List[str]: - """ - Cut JSON string RANDOMLY at different points WITH OVERLAP between fragments. - Each fragment overlaps with the previous one to help merging. - - Args: - jsonString: JSON string to cut - numCuts: Number of cuts to make - overlapSize: Size of overlap between fragments (in characters) - - Returns: - List of JSON fragments with overlap - """ - fragments = [] - currentPos = 0 - totalLength = len(jsonString) - - if totalLength == 0: - return [] - - # First fragment: from start to first cut point - if numCuts > 0: - # First cut point (between 20% and 40% of total) - firstCutPoint = random.randint(int(totalLength * 0.2), int(totalLength * 0.4)) - fragment = jsonString[:firstCutPoint] - fragments.append(fragment) - currentPos = firstCutPoint - else: - # No cuts - return whole string - return [jsonString] - - # Subsequent fragments: each starts with overlap from previous, then continues - for i in range(numCuts - 1): - if currentPos >= totalLength: - break - - # Calculate overlap start (go back overlapSize from current position) - overlapStart = max(0, currentPos - overlapSize) - - # Calculate next cut point (between 20% and 40% of remaining) - remaining = totalLength - currentPos - if remaining < overlapSize * 2: - # Not enough remaining - add rest as last fragment - fragment = jsonString[overlapStart:] - fragments.append(fragment) - break - - # Next cut point from current position - nextCutPoint = currentPos + random.randint(int(remaining * 0.2), int(remaining * 0.4)) - nextCutPoint = min(nextCutPoint, totalLength) - - # Fragment: from overlap start to next cut point - fragment = jsonString[overlapStart:nextCutPoint] - fragments.append(fragment) - - currentPos = nextCutPoint - - # Add remaining as last fragment (with overlap) - if currentPos < totalLength: - overlapStart = max(0, currentPos - overlapSize) - fragment = jsonString[overlapStart:] - fragments.append(fragment) - - return fragments - - -def testMergerWithFragments( - originalJson: Dict[str, Any], - fragments: List[str], - useCaseId: str -) -> Tuple[bool, Dict[str, Any], str]: - """ - Test merger by merging fragments sequentially. - - Args: - originalJson: Original complete JSON - fragments: List of JSON fragments to merge - useCaseId: Use case ID - - Returns: - Tuple of (success, merged_json, error_message) - """ - if not fragments: - return False, {}, "No fragments provided" - - # Log structure context for each fragment (especially incomplete ones) - print(f"\n{'='*60}") - print(f"FRAGMENT ANALYSIS (use case: {useCaseId})") - print(f"{'='*60}") - - for fragIdx, fragment in enumerate(fragments): - print(f"\nFragment {fragIdx + 1}/{len(fragments)}:") - print(f" Length: {len(fragment)} chars") - - # Extract structure context for this fragment - try: - structureContext = extractJsonStructureContext(fragment, useCaseId) - - templateStructure = structureContext.get("template_structure", "") - lastCompletePart = structureContext.get("last_complete_part", "") - incompletePart = structureContext.get("incomplete_part", "") - structureContextJson = structureContext.get("structure_context", "") - - # Check if fragment is incomplete - normalized = stripCodeFences(normalizeJsonText(fragment)).strip() - parsed, parseErr, _ = tryParseJson(normalized) - isIncomplete = parseErr is not None or (parsed is None) - - if isIncomplete: - print(f" Status: INCOMPLETE (cut off)") - print(f" Template Structure:") - if templateStructure: - # Show first few lines of template - templateLines = templateStructure.split('\n') - templateLinesToShow = templateLines[:5] - for line in templateLinesToShow: - print(f" {line}") - if len(templateLines) > 5: - remainingLines = len(templateLines) - 5 - print(f" ... ({remainingLines} more lines)") - else: - print(f" (not available)") - - print(f" Structure Context:") - if structureContextJson: - # Show structure context - contextLines = structureContextJson.split('\n') - contextLinesToShow = contextLines[:5] - for line in contextLinesToShow: - print(f" {line}") - if len(contextLines) > 5: - remainingContextLines = len(contextLines) - 5 - print(f" ... ({remainingContextLines} more lines)") - else: - print(f" (not available)") - - print(f" Last Complete Part:") - if lastCompletePart: - # Show last complete part (truncated if too long) - if len(lastCompletePart) > 200: - print(f" {lastCompletePart[:200]}... ({len(lastCompletePart)} chars total)") - else: - print(f" {lastCompletePart}") - else: - print(f" (not available)") - - print(f" Incomplete Part:") - if incompletePart: - # Show incomplete part (truncated if too long) - if len(incompletePart) > 200: - print(f" {incompletePart[:200]}... ({len(incompletePart)} chars total)") - else: - print(f" {incompletePart}") - else: - print(f" (not available)") - else: - print(f" Status: COMPLETE") - if structureContextJson: - print(f" Structure Context:") - contextLines = structureContextJson.split('\n') - contextLinesToShow = contextLines[:3] - for line in contextLinesToShow: - print(f" {line}") - if len(contextLines) > 3: - remainingContextLines = len(contextLines) - 3 - print(f" ... ({remainingContextLines} more lines)") - except Exception as e: - print(f" Error extracting structure context: {e}") - - print(f"\n{'='*60}\n") - - # Start with first fragment - accumulated = fragments[0] - - # Merge each subsequent fragment - for i, fragment in enumerate(fragments[1:], 1): - try: - accumulated, hasOverlap = JsonResponseHandler.mergeJsonStringsWithOverlap( - accumulated, fragment - ) - # Log if no overlap was found (iterations would stop in real scenario) - if not hasOverlap: - print(f" ⚠️ Fragment {i}: No overlap found - iterations would stop here") - - # Check if result is empty (should never happen) - if not accumulated or accumulated.strip() in ['{"elements": []}', '{}', '']: - return False, {}, f"Merge {i} returned empty JSON" - - except Exception as e: - return False, {}, f"Merge {i} failed with error: {str(e)}" - - # Parse merged result - try: - # Normalize and try to parse - normalized = stripCodeFences(normalizeJsonText(accumulated)).strip() - - # Try to parse directly - parsed, parseErr, _ = tryParseJson(normalized) - - if parseErr is not None: - # Try closing structures if incomplete - try: - closed = closeJsonStructures(normalized) - parsed, parseErr2, _ = tryParseJson(closed) - if parseErr2 is not None: - # Try to extract valid JSON prefix - # JsonResponseHandler is already imported at module level - validPrefix = JsonResponseHandler._extractValidJsonPrefix(normalized) - if validPrefix: - parsed, parseErr3, _ = tryParseJson(validPrefix) - if parseErr3 is not None: - return False, {}, f"Final parse error: {str(parseErr3)}" - else: - return False, {}, f"Final parse error: {str(parseErr2)}" - except Exception as parseErr: - return False, {}, f"Final parse error: {str(parseErr)}" - - if not parsed: - return False, {}, "Final parse returned None" - - # CRITICAL: Ensure parsed is a dict, not a list - # If it's a list, wrap it in the expected structure based on use case - if isinstance(parsed, list): - # Try to normalize list to expected structure - if useCaseId == "section_content": - # List of elements - wrap in elements structure - parsed = {"elements": parsed} - elif useCaseId == "chapter_structure": - # List of chapters - wrap in documents structure - parsed = {"documents": [{"chapters": parsed}]} - elif useCaseId == "code_structure": - # List of files - wrap in files structure - parsed = {"files": parsed} - elif useCaseId == "code_content": - # List of files - wrap in files structure - parsed = {"files": parsed} - else: - # Unknown use case - try to wrap as elements - parsed = {"elements": parsed} - - # Ensure it's a dict now - if not isinstance(parsed, dict): - return False, {}, f"Final parse returned unexpected type: {type(parsed).__name__}" - - return True, parsed, "" - - except Exception as e: - return False, {}, f"Final parse failed: {str(e)}" - - -def compareJsonCompleteness( - original: Dict[str, Any], - merged: Dict[str, Any], - useCaseId: str -) -> Tuple[bool, str]: - """ - Compare merged JSON with original to check completeness. - - Args: - original: Original JSON - merged: Merged JSON (must be a dict) - useCaseId: Use case ID - - Returns: - Tuple of (is_complete, message) - """ - # CRITICAL: Ensure merged is a dict - if not isinstance(merged, dict): - return False, f"Merged JSON is not a dict, got {type(merged).__name__}" - - if useCaseId == "section_content": - origElements = original.get("elements", []) - mergedElements = merged.get("elements", []) - - if not isinstance(origElements, list): - return False, f"Original elements is not a list: {type(origElements).__name__}" - if not isinstance(mergedElements, list): - return False, f"Merged elements is not a list: {type(mergedElements).__name__}" - - if len(mergedElements) < len(origElements): - return False, f"Missing elements: {len(origElements)} expected, {len(mergedElements)} found" - - # Check table rows - if origElements and mergedElements: - origTable = origElements[0] if isinstance(origElements[0], dict) else {} - mergedTable = mergedElements[0] if isinstance(mergedElements[0], dict) else {} - - if not origTable or not mergedTable: - return False, f"Table structure missing: origTable={bool(origTable)}, mergedTable={bool(mergedTable)}" - - origRows = origTable.get("content", {}).get("rows", []) if isinstance(origTable.get("content"), dict) else origTable.get("rows", []) - mergedRows = mergedTable.get("content", {}).get("rows", []) if isinstance(mergedTable.get("content"), dict) else mergedTable.get("rows", []) - - if not isinstance(origRows, list): - return False, f"Original rows is not a list: {type(origRows).__name__}" - if not isinstance(mergedRows, list): - return False, f"Merged rows is not a list: {type(mergedRows).__name__}" - - if len(mergedRows) < len(origRows): - return False, f"Missing rows: {len(origRows)} expected, {len(mergedRows)} found" - - return True, "Complete" - - elif useCaseId == "chapter_structure": - origChapters = original.get("documents", [{}])[0].get("chapters", []) - mergedChapters = merged.get("documents", [{}])[0].get("chapters", []) - - if len(mergedChapters) < len(origChapters): - return False, f"Missing chapters: {len(origChapters)} expected, {len(mergedChapters)} found" - - return True, "Complete" - - elif useCaseId == "code_structure": - origFiles = original.get("files", []) - mergedFiles = merged.get("files", []) - - if len(mergedFiles) < len(origFiles): - return False, f"Missing files: {len(origFiles)} expected, {len(mergedFiles)} found" - - return True, "Complete" - - elif useCaseId == "code_content": - origFiles = original.get("files", []) - mergedFiles = merged.get("files", []) - - if len(mergedFiles) < len(origFiles): - return False, f"Missing files: {len(origFiles)} expected, {len(mergedFiles)} found" - - return True, "Complete" - - else: - return False, f"Unknown use case: {useCaseId}" - - -def runTestForUseCase(useCaseId: str, size: int = 50, numTests: int = 10) -> Dict[str, Any]: - """ - Run multiple tests for a use case with random cuts. - - Args: - useCaseId: Use case ID - size: Size of test data - numTests: Number of test runs - - Returns: - Test results dictionary - """ - results = { - "useCaseId": useCaseId, - "size": size, - "numTests": numTests, - "passed": 0, - "failed": 0, - "errors": [] - } - - for testNum in range(numTests): - try: - # Create test JSON - originalJson = createTestJsonForUseCase(useCaseId, size) - originalString = json.dumps(originalJson, indent=2, ensure_ascii=False) - - # Cut randomly - fragments = cutJsonRandomly(originalString, numCuts=random.randint(3, 7)) - - # Test merger - success, mergedJson, errorMsg = testMergerWithFragments( - originalJson, fragments, useCaseId - ) - - if not success: - results["failed"] += 1 - results["errors"].append(f"Test {testNum + 1}: {errorMsg}") - continue - - # Check completeness - isComplete, completenessMsg = compareJsonCompleteness( - originalJson, mergedJson, useCaseId - ) - - if isComplete: - results["passed"] += 1 - else: - results["failed"] += 1 - results["errors"].append(f"Test {testNum + 1}: {completenessMsg}") - - except Exception as e: - results["failed"] += 1 - results["errors"].append(f"Test {testNum + 1}: Exception - {str(e)}") - - return results - - -def runAllTests(): - """Run tests for all use cases.""" - useCases = [ - "section_content", - "chapter_structure", - "code_structure", - "code_content" - ] - - allResults = [] - - for useCaseId in useCases: - print(f"\n{'='*60}") - print(f"Testing use case: {useCaseId}") - print(f"{'='*60}") - - # Initialize log file for this use case - # Initialize log file (overwrite on each test run) - logFileName = f"json_merger_{useCaseId}.txt" - JsonMergeLogger.initializeLogFile(logFileName) - print(f"Log file: {logFileName}") - - results = runTestForUseCase(useCaseId, size=50, numTests=10) - allResults.append(results) - - print(f"Passed: {results['passed']}/{results['numTests']}") - print(f"Failed: {results['failed']}/{results['numTests']}") - - if results["errors"]: - print("\nErrors:") - for error in results["errors"][:5]: # Show first 5 errors - print(f" - {error}") - - # Summary - print(f"\n{'='*60}") - print("SUMMARY") - print(f"{'='*60}") - - totalPassed = sum(r["passed"] for r in allResults) - totalFailed = sum(r["failed"] for r in allResults) - totalTests = sum(r["numTests"] for r in allResults) - - print(f"Total tests: {totalTests}") - print(f"Passed: {totalPassed}") - print(f"Failed: {totalFailed}") - print(f"Success rate: {totalPassed / totalTests * 100:.1f}%") - - return allResults - - -if __name__ == "__main__": - # Set up logging - use WARNING level to reduce noise from jsonUtils - logging.basicConfig(level=logging.WARNING) - - # Run tests - results = runAllTests() - - # Save results to file (in project root) - resultsFile = os.path.join(projectRoot, "test_json_merger_results.json") - with open(resultsFile, "w", encoding="utf-8") as f: - json.dump(results, f, indent=2, ensure_ascii=False) - - print(f"\nResults saved to {resultsFile}") diff --git a/modules/services/serviceGeneration/paths/codePath.py b/modules/services/serviceGeneration/paths/codePath.py index b385c192..273d6229 100644 --- a/modules/services/serviceGeneration/paths/codePath.py +++ b/modules/services/serviceGeneration/paths/codePath.py @@ -26,14 +26,6 @@ class CodeGenerationPath: def __init__(self, services): self.services = services - @staticmethod - def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str: - """ - Extract overlap context: cut part and full part before (same level). - Delegates to shared function in jsonUtils for consistency. - """ - from modules.shared.jsonUtils import extractOverlapContext - return extractOverlapContext(jsonContent, breakPosition) async def generateCode( self, @@ -346,25 +338,15 @@ Return ONLY valid JSON matching the request above. incompletePart = continuationContext.incomplete_part lastRawJson = continuationContext.last_raw_json - # Build overlap context: extract last ~100 characters from the response for overlap + # Generate both overlap context and hierarchy context using jsonContinuation overlapContext = "" - if lastRawJson: - overlapContext = lastRawJson[-100:].strip() - - # Build unified context showing structure hierarchy with cut point unifiedContext = "" if lastRawJson: - # Find break position in raw JSON - if incompletePart: - breakPos = lastRawJson.find(incompletePart) - if breakPos == -1: - breakPos = len(lastRawJson.rstrip()) - else: - breakPos = len(lastRawJson.rstrip()) - - # Build intelligent context showing hierarchy - from modules.shared.jsonUtils import buildIncompleteContext - unifiedContext = buildIncompleteContext(lastRawJson, breakPos) + # Get contexts directly from jsonContinuation + from modules.shared.jsonContinuation import getContexts + contexts = getContexts(lastRawJson) + overlapContext = contexts.overlapContext + unifiedContext = contexts.hierarchyContext elif incompletePart: unifiedContext = incompletePart else: @@ -808,33 +790,15 @@ Return ONLY valid JSON in this format: incompletePart = continuationContext.incomplete_part lastRawJson = continuationContext.last_raw_json - # Build overlap context: extract cut part and full part before (same level) for overlap + # Generate both overlap context and hierarchy context using jsonContinuation overlapContext = "" - if lastRawJson: - # Find break position - breakPos = len(lastRawJson.rstrip()) - if incompletePart: - pos = lastRawJson.find(incompletePart) - if pos >= 0: - breakPos = pos - - # Extract cut part and full part before (same level) - overlapContext = CodeGenerationPath._extractOverlapContext(lastRawJson, breakPos) - - # Build unified context showing structure hierarchy with cut point unifiedContext = "" if lastRawJson: - # Find break position in raw JSON - if incompletePart: - breakPos = lastRawJson.find(incompletePart) - if breakPos == -1: - breakPos = len(lastRawJson.rstrip()) - else: - breakPos = len(lastRawJson.rstrip()) - - # Build intelligent context showing hierarchy - from modules.shared.jsonUtils import buildIncompleteContext - unifiedContext = buildIncompleteContext(lastRawJson, breakPos) + # Get contexts directly from jsonContinuation + from modules.shared.jsonContinuation import getContexts + contexts = getContexts(lastRawJson) + overlapContext = contexts.overlapContext + unifiedContext = contexts.hierarchyContext elif incompletePart: unifiedContext = incompletePart else: diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py index 0ee6fa5e..f0222dce 100644 --- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py +++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py @@ -64,25 +64,27 @@ async def buildGenerationPrompt( ) if hasContinuation: - # CONTINUATION PROMPT - use new summary format from buildContinuationContext + # CONTINUATION PROMPT - use centralized jsonContinuation system delivered_summary = continuationContext.get("delivered_summary", "") - element_before_cutoff = continuationContext.get("element_before_cutoff") - cut_off_element = continuationContext.get("cut_off_element") + + # Use centralized system: overlap_context and hierarchy_context from jsonContinuation.getContexts() + overlap_context = continuationContext.get("overlap_context") + hierarchy_context = continuationContext.get("hierarchy_context") # Build continuation text with delivered summary and cut-off information # CRITICAL: Always include cut-off information if available (per loop_plan.md) continuationText = f"{delivered_summary}\n\n" continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n" - # Add cut-off point information (per loop_plan.md: always add if available) + # Add cut-off point information using centralized jsonContinuation contexts # These are shown ONLY as REFERENCE to know where generation stopped - if element_before_cutoff: - continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n" - continuationText += f"{element_before_cutoff}\n\n" + if hierarchy_context: + continuationText += "# REFERENCE: Structure context (already delivered - DO NOT repeat):\n" + continuationText += f"{hierarchy_context}\n\n" - if cut_off_element: - continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n" - continuationText += f"{cut_off_element}\n\n" + if overlap_context: + continuationText += "# REFERENCE: Overlap context - incomplete element at cut point (DO NOT repeat):\n" + continuationText += f"{overlap_context}\n\n" continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n" continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n" diff --git a/modules/shared/jsonContinuation.md b/modules/shared/jsonContinuation.md new file mode 100644 index 00000000..b7e93cb4 --- /dev/null +++ b/modules/shared/jsonContinuation.md @@ -0,0 +1,164 @@ +# JSON Continuation Context Module + +Ein Python-Modul zur Generierung von Kontextinformationen für abgeschnittene JSON-Strings, um AI-Modellen die Fortsetzung zu ermöglichen. + +## Problem + +Wenn eine AI-Antwort als JSON abgeschnitten wird (z.B. Token-Limit erreicht), muss die nächste Iteration wissen: +- **Wo** der JSON abgeschnitten wurde +- **Was** bereits generiert wurde +- **Was** als nächstes geliefert werden soll + +## Lösung: Drei Kontexte + +### 1. Overlap Context +- Zeigt das **innerste Objekt/Array-Element**, das den Cut-Punkt enthält +- Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu **mergen** +- Exakt so wie im Original-String (für String-Matching beim Merge) + +### 2. Hierarchy Context +- Zeigt die **hierarchische Struktur** vom Root bis zum Cut-Punkt +- Mit **Budget-Logik**: Näher am Cut = vollständige Werte, weiter weg = `"..."` Platzhalter +- Gibt der AI den Kontext der gesamten JSON-Struktur + +### 3. Complete Part (NEU) +- Der **vollständige, valide JSON** bis zum Cut-Punkt +- Alle offenen Strukturen werden geschlossen (`}`, `]`, `"`) +- Unvollständige Keys werden entfernt +- Kann direkt als valides JSON geparst werden + +## Installation + +```bash +# Keine externen Abhängigkeiten erforderlich +cp json_continuation.py /your/project/ +``` + +## Modulkonstanten + +```python +# Diese Konstanten können vor dem Import angepasst werden +BUDGET_LIMIT: int = 500 # Zeichen-Budget für Datenwerte +OVERLAP_MAX_CHARS: int = 1000 # Max Zeichen für Overlap Context +``` + +## Verwendung + +### Grundlegende Verwendung + +```python +from json_continuation import extract_continuation_contexts + +truncated_json = '''{"customers": [ + {"id": 1, "name": "John"}, + {"id": 2, "name": "Jane", "email": "jane@exa''' + +overlap, hierarchy, complete = extract_continuation_contexts(truncated_json) + +print("Overlap Context:") +print(overlap) +# {"id": 2, "name": "Jane", "email": "jane@exa + +print("Hierarchy Context:") +print(hierarchy) +# {"customers": [...structure with budget logic...] + +print("Complete Part (valid JSON):") +print(complete) +# {"customers": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane", "email": "jane@exa"}]} + +import json +parsed = json.loads(complete) # ✓ Funktioniert! +``` + +### Mit Dictionary-Interface + +```python +from json_continuation import get_contexts + +contexts = get_contexts(truncated_json) + +print(contexts['overlap']) +print(contexts['hierarchy']) +print(contexts['complete_part']) +``` + +### Konstanten anpassen + +```python +import json_continuation + +# Budget anpassen bevor Funktionen aufgerufen werden +json_continuation.BUDGET_LIMIT = 200 +json_continuation.OVERLAP_MAX_CHARS = 500 + +overlap, hierarchy, complete = json_continuation.extract_continuation_contexts(truncated_json) +``` + +## Rückgabewerte + +| Rückgabe | Typ | Beschreibung | +|----------|-----|--------------| +| `overlap` | str | Innerstes Element mit Cut-Punkt (für Merge) | +| `hierarchy` | str | Volle Struktur mit Budget-Logik | +| `complete_part` | str | Valides JSON mit geschlossenen Strukturen | + +## Beispiele + +### Verschachtelte Objekte + +```python +json_str = '{"user": {"profile": {"bio": "Hello Wor' + +overlap, hierarchy, complete = extract_continuation_contexts(json_str) + +# Overlap: {"bio": "Hello Wor +# Hierarchy: {"user": {"profile": {"bio": "Hello Wor +# Complete: {"user": {"profile": {"bio": "Hello Wor"}}} ← Valides JSON! +``` + +### Array von Objekten mit unvollständigem Key + +```python +json_str = '''{ + "items": [ + {"id": 1, "name": "First"}, + {"id": 2, "name": "Second"}, + {"id": 3, "name": "Third", "add''' + +overlap, hierarchy, complete = extract_continuation_contexts(json_str) + +# Complete entfernt den unvollständigen Key "add": +# {"items": [{"id": 1, ...}, {"id": 2, ...}, {"id": 3, "name": "Third"}]} +``` + +## Budget-Logik + +Die Budget-Logik funktioniert wie folgt: + +1. **Sammeln**: Alle String-Werte werden mit ihrer Position gesammelt +2. **Sortieren**: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität) +3. **Zuweisen**: Budget wird von hinten nach vorne aufgebraucht +4. **Ersetzen**: Werte außerhalb des Budgets werden durch `"..."` ersetzt + +## Tests ausführen + +```bash +python -m unittest test_json_continuation -v +``` + +## API Referenz + +### `extract_continuation_contexts(truncated_json: str) -> Tuple[str, str, str]` + +Hauptfunktion. Gibt `(overlap, hierarchy, complete_part)` zurück. + +### `get_contexts(truncated_json: str) -> dict` + +Convenience-Funktion. Gibt Dictionary mit Keys `'overlap'`, `'hierarchy'`, `'complete_part'` zurück. + +### Modulkonstanten + +- `BUDGET_LIMIT`: int (default: 500) - Zeichen-Budget für Hierarchy-Context +- `OVERLAP_MAX_CHARS`: int (default: 1000) - Max Zeichen für Overlap-Context + diff --git a/modules/shared/jsonContinuation.py b/modules/shared/jsonContinuation.py new file mode 100644 index 00000000..2fabd103 --- /dev/null +++ b/modules/shared/jsonContinuation.py @@ -0,0 +1,1232 @@ +""" +JSON Continuation Context Module + +Generiert drei Kontexte für abgeschnittene JSON-Strings: +1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält +2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut mit Budget-Logik +3. Complete Part: Der vollständige Teil des JSONs mit allen Strukturen geschlossen + +Hauptfunktionen: +- extractContinuationContexts(truncatedJson: str) -> Tuple[str, str, str] + Extrahiert alle drei Kontexte aus einem abgeschnittenen JSON-String. + +- getContexts(truncatedJson: str) -> JsonContinuationContexts + Gibt alle Kontexte als Pydantic-Modell zurück mit benannten Feldern. + +Modulkonstanten: +- BUDGET_LIMIT: int = 500 + Zeichen-Budget für vollständige Datenwerte im Hierarchy Context + +- OVERLAP_MAX_CHARS: int = 1000 + Maximale Zeichen für den Overlap Context + +Verwendung: + >>> from modules.shared.jsonContinuation import getContexts + >>> jsonStr = '{"users": [{"name": "John", "bio": "Hello Wor' + >>> contexts = getContexts(jsonStr) + >>> print(contexts.overlapContext) + >>> print(contexts.hierarchyContext) + >>> print(contexts.completePart) + +Autor: Claude +Version: 2.0 +""" + +from typing import Tuple, List, Optional, Any +from dataclasses import dataclass +from enum import Enum +from modules.datamodels.datamodelAi import JsonContinuationContexts + + +# ============================================================================= +# MODULE CONSTANTS +# ============================================================================= + +BUDGET_LIMIT: int = 500 +"""Zeichen-Budget für vollständige Datenwerte im Hierarchy Context""" + +OVERLAP_MAX_CHARS: int = 1000 +"""Maximale Zeichen für den Overlap Context""" + + +# ============================================================================= +# TOKEN TYPES AND DATA CLASSES +# ============================================================================= + + +class TokenType(Enum): + """JSON Token Types""" + OBJECT_START = "{" + OBJECT_END = "}" + ARRAY_START = "[" + ARRAY_END = "]" + STRING = "string" + NUMBER = "number" + BOOLEAN = "boolean" + NULL = "null" + COLON = ":" + COMMA = "," + KEY = "key" + EOF = "eof" + TRUNCATED = "truncated" + + +@dataclass +class Token: + """Represents a JSON token with position info""" + type: TokenType + value: Any + start_pos: int + end_pos: int + raw: str # Original string representation + + +@dataclass +class StackFrame: + """Represents a level in the JSON hierarchy""" + type: str # "object" or "array" + start_pos: int + key: Optional[str] = None # Current key for objects + index: int = 0 # Current index for arrays + content: str = "" # Accumulated content for this frame + keys_seen: List[str] = None # Keys seen in this object + + def __post_init__(self): + if self.keys_seen is None: + self.keys_seen = [] + + +class JsonTokenizer: + """Tokenizer for potentially truncated JSON strings""" + + def __init__(self, jsonStr: str): + self.jsonStr = jsonStr + self.pos = 0 + self.length = len(jsonStr) + + def skipWhitespace(self): + """Skip whitespace characters""" + while self.pos < self.length and self.jsonStr[self.pos] in ' \t\n\r': + self.pos += 1 + + def peek(self) -> Optional[str]: + """Peek at current character without consuming""" + if self.pos < self.length: + return self.jsonStr[self.pos] + return None + + def readString(self) -> Token: + """Read a JSON string token""" + start_pos = self.pos + self.pos += 1 # Skip opening quote + + escaped = False + while self.pos < self.length: + char = self.jsonStr[self.pos] + if escaped: + escaped = False + self.pos += 1 + elif char == '\\': + escaped = True + self.pos += 1 + elif char == '"': + self.pos += 1 + raw = self.jsonStr[start_pos:self.pos] + try: + # Try to decode the string value + value = raw[1:-1] # Remove quotes for value + except: + value = raw + return Token(TokenType.STRING, value, start_pos, self.pos, raw) + else: + self.pos += 1 + + # String was truncated + raw = self.jsonStr[start_pos:self.pos] + return Token(TokenType.TRUNCATED, raw[1:] if len(raw) > 1 else "", start_pos, self.pos, raw) + + def readNumber(self) -> Token: + """Read a JSON number token""" + start_pos = self.pos + + # Handle negative + if self.pos < self.length and self.jsonStr[self.pos] == '-': + self.pos += 1 + + # Read digits + while self.pos < self.length and self.jsonStr[self.pos].isdigit(): + self.pos += 1 + + # Decimal part + if self.pos < self.length and self.jsonStr[self.pos] == '.': + self.pos += 1 + while self.pos < self.length and self.jsonStr[self.pos].isdigit(): + self.pos += 1 + + # Exponent + if self.pos < self.length and self.jsonStr[self.pos] in 'eE': + self.pos += 1 + if self.pos < self.length and self.jsonStr[self.pos] in '+-': + self.pos += 1 + while self.pos < self.length and self.jsonStr[self.pos].isdigit(): + self.pos += 1 + + raw = self.jsonStr[start_pos:self.pos] + try: + value = float(raw) if '.' in raw or 'e' in raw.lower() else int(raw) + except ValueError: + value = raw + + return Token(TokenType.NUMBER, value, start_pos, self.pos, raw) + + def readKeyword(self) -> Token: + """Read true, false, or null""" + start_pos = self.pos + + for keyword, token_type in [('true', TokenType.BOOLEAN), + ('false', TokenType.BOOLEAN), + ('null', TokenType.NULL)]: + if self.jsonStr[self.pos:].startswith(keyword): + self.pos += len(keyword) + value = True if keyword == 'true' else (False if keyword == 'false' else None) + return Token(token_type, value, start_pos, self.pos, keyword) + + # Partial keyword (truncated) + while self.pos < self.length and self.jsonStr[self.pos].isalpha(): + self.pos += 1 + raw = self.jsonStr[start_pos:self.pos] + return Token(TokenType.TRUNCATED, raw, start_pos, self.pos, raw) + + def nextToken(self) -> Token: + """Get the next token""" + self.skipWhitespace() + + if self.pos >= self.length: + return Token(TokenType.EOF, None, self.pos, self.pos, "") + + char = self.jsonStr[self.pos] + startPos = self.pos + + if char == '{': + self.pos += 1 + return Token(TokenType.OBJECT_START, '{', startPos, self.pos, '{') + elif char == '}': + self.pos += 1 + return Token(TokenType.OBJECT_END, '}', startPos, self.pos, '}') + elif char == '[': + self.pos += 1 + return Token(TokenType.ARRAY_START, '[', startPos, self.pos, '[') + elif char == ']': + self.pos += 1 + return Token(TokenType.ARRAY_END, ']', startPos, self.pos, ']') + elif char == ':': + self.pos += 1 + return Token(TokenType.COLON, ':', startPos, self.pos, ':') + elif char == ',': + self.pos += 1 + return Token(TokenType.COMMA, ',', startPos, self.pos, ',') + elif char == '"': + return self.readString() + elif char == '-' or char.isdigit(): + return self.readNumber() + elif char.isalpha(): + return self.readKeyword() + else: + # Unknown character, treat as truncated + self.pos += 1 + return Token(TokenType.TRUNCATED, char, startPos, self.pos, char) + + +@dataclass +class HierarchyLevel: + """Represents one level in the parsed hierarchy""" + type: str # "object" or "array" + start_pos: int + end_pos: int # -1 if not closed + key: Optional[str] # Key if this is a value in an object + index: Optional[int] # Index if this is in an array + content: dict # Parsed content at this level + raw_start: str # Raw string from start to children + children_content: List[Any] # For arrays: list of parsed elements + + +def getJsonContinuationContext( + truncatedJson: str, + budgetLimit: Optional[int] = None, + overlapMaxChars: Optional[int] = None +) -> Tuple[str, str, str]: + """ + Generate continuation contexts for a truncated JSON string. + + Generiert drei Kontexte für abgeschnittene JSON-Strings: + 1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält + 2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut mit Budget-Logik + 3. Complete Part: Der vollständige Teil des JSONs mit allen Strukturen geschlossen + + Args: + truncatedJson: The truncated JSON string + budgetLimit: Character budget for data values in hierarchy context (uses BUDGET_LIMIT if None) + overlapMaxChars: Maximum characters for overlap context (uses OVERLAP_MAX_CHARS if None) + + Returns: + Tuple of (overlapContext, hierarchyContext, completePart): + - overlapContext: The innermost object/element containing the cut (for merging) + - hierarchyContext: Full structure from root to cut with budget-limited values + - completePart: Valid JSON with all structures properly closed + """ + if budgetLimit is None: + budgetLimit = BUDGET_LIMIT + if overlapMaxChars is None: + overlapMaxChars = OVERLAP_MAX_CHARS + + analyzer = JsonAnalyzer(truncatedJson, budgetLimit, overlapMaxChars) + return analyzer.analyze() + + +class JsonAnalyzer: + """ + Analyzes truncated JSON and generates continuation contexts. + + Generates three contexts for truncated JSON strings: + 1. Overlap Context: The innermost object/array element containing the cut point + 2. Hierarchy Context: The hierarchical structure from root to cut with budget logic + 3. Complete Part: The complete part of the JSON with all structures properly closed + """ + + def __init__(self, jsonStr: str, budgetLimit: Optional[int] = None, overlapMaxChars: Optional[int] = None): + self.jsonStr = jsonStr + self.budgetLimit = budgetLimit if budgetLimit is not None else BUDGET_LIMIT + self.overlapMaxChars = overlapMaxChars if overlapMaxChars is not None else OVERLAP_MAX_CHARS + self.stack: List[StackFrame] = [] + self.hierarchy: List[dict] = [] # Parsed hierarchy info + + def analyze(self) -> Tuple[str, str, str]: + """ + Analyze the truncated JSON and return all three contexts. + + Returns: + Tuple of (overlapContext, hierarchyContext, completePart) + """ + # Parse and track the structure + self._parseStructure() + + # Generate overlap context + overlapContext = self._generateOverlapContext() + + # Generate hierarchy context (use improved version) + hierarchyContext = self._renderWithBudgetV2() + + # Generate complete part (JSON with all structures closed) + completePart = self._generateCompletePart() + + return overlapContext, hierarchyContext, completePart + + def _generateCompletePart(self) -> str: + """ + Generate the complete part of the JSON with all structures properly closed. + + This creates valid JSON by closing all open strings, brackets/braces. + Unvollständige Keys werden entfernt, damit das Ergebnis valides JSON ist. + + Strategy: + 1. Take the full truncated JSON + 2. If we're in the middle of a string, close it + 3. Remove incomplete key-value pairs (keys without values) + 4. Close all open brackets/braces + """ + result = self.jsonStr.rstrip() + + # Remove trailing comma if present (after stripping) + if result.endswith(','): + result = result[:-1] + + # Check if we need to close an open string + stringClosing = self._getStringClosing(result) + result += stringClosing + + # Check if we're in the middle of a key (after colon) + # If string was just closed and we're after a colon with no value, remove the key + result = self._cleanIncompleteKeyValue(result) + + # Close all open structures + closingBrackets = self._getClosingBrackets(result) + + return result + closingBrackets + + def _getStringClosing(self, jsonStr: str) -> str: + """Check if there's an unclosed string and return closing quote if needed.""" + in_string = False + escaped = False + + for char in jsonStr: + if escaped: + escaped = False + continue + + if char == '\\' and in_string: + escaped = True + continue + + if char == '"': + in_string = not in_string + + return '"' if in_string else "" + + def _cleanIncompleteKeyValue(self, jsonStr: str) -> str: + """ + Clean up incomplete key-value pairs. + Handles cases like: + - {"key": "incompl -> keep (valid truncated value) + - {"key": -> remove key + - {"a": 1, "key -> remove incomplete key (was in middle of key name) + """ + stripped = jsonStr.rstrip() + + # Pattern: ends with colon (possibly with whitespace) - incomplete value + if stripped.endswith(':'): + # Find the start of this key and remove the whole key-value + return self._removeLastKey(stripped) + + # Check if we just closed a string that was an incomplete key + # Pattern: ..., "something" or { "something" where something has no colon after + # This happens when we close a truncated key name like "add" -> "add" + if stripped.endswith('"'): + # Look for the pattern: comma/bracket + whitespace + "string" + # and check if this was supposed to be a key + if self._isIncompleteKey(stripped): + return self._removeLastKey(stripped) + + return jsonStr + + def _isIncompleteKey(self, jsonStr: str) -> bool: + """ + Check if the last string in the JSON is an incomplete key in an object. + This happens when truncation occurred in the middle of a key name. + Only applies to objects, not arrays. + """ + # Find the last complete string + pos = len(jsonStr) - 1 + if jsonStr[pos] != '"': + return False + + # Find the opening quote of this string + stringStart = pos - 1 + while stringStart >= 0: + if jsonStr[stringStart] == '"': + # Check it's not escaped + numBackslashes = 0 + checkPos = stringStart - 1 + while checkPos >= 0 and jsonStr[checkPos] == '\\': + numBackslashes += 1 + checkPos -= 1 + if numBackslashes % 2 == 0: + break + stringStart -= 1 + + if stringStart < 0: + return False + + # Now stringStart points to opening quote + # Check what's before it (skip whitespace) + beforePos = stringStart - 1 + while beforePos >= 0 and jsonStr[beforePos] in ' \t\n\r': + beforePos -= 1 + + if beforePos < 0: + return False + + # For this to be an incomplete key, it must be preceded by { or , + # AND we must be inside an object (not an array) + if jsonStr[beforePos] not in ',{': + return False + + # Now check if we're in an object context (not array) + # Count open braces/brackets to determine context + braceCount = 0 + bracketCount = 0 + inString = False + + for i in range(beforePos + 1): + char = jsonStr[i] + if char == '"' and (i == 0 or jsonStr[i-1] != '\\'): + inString = not inString + elif not inString: + if char == '{': + braceCount += 1 + elif char == '}': + braceCount -= 1 + elif char == '[': + bracketCount += 1 + elif char == ']': + bracketCount -= 1 + + # If we have more open braces than brackets at this point, + # we're in an object context + # Actually, we need to check the innermost container + # Let's track the stack properly + stack = [] + inString = False + + for i in range(beforePos + 1): + char = jsonStr[i] + if char == '"' and (i == 0 or jsonStr[i-1] != '\\'): + inString = not inString + elif not inString: + if char == '{': + stack.append('object') + elif char == '[': + stack.append('array') + elif char == '}': + if stack and stack[-1] == 'object': + stack.pop() + elif char == ']': + if stack and stack[-1] == 'array': + stack.pop() + + # If innermost container is an object, this is an incomplete key + return len(stack) > 0 and stack[-1] == 'object' + + def _removeLastKey(self, jsonStr: str) -> str: + """Remove the last incomplete key-value pair from the JSON string.""" + stripped = jsonStr.rstrip() + + # Find the last comma or opening bracket before the incomplete key + pos = len(stripped) - 1 + + # Skip past the current string/key + in_string = False + while pos >= 0: + char = stripped[pos] + if char == '"' and (pos == 0 or stripped[pos-1] != '\\'): + in_string = not in_string + if not in_string and char in ',{': + break + pos -= 1 + + if pos < 0: + return stripped + + if stripped[pos] == ',': + # Remove from comma onwards + return stripped[:pos] + elif stripped[pos] == '{': + # Keep the opening brace + return stripped[:pos+1] + + return stripped + + def _findLastCompletePosition(self) -> int: + """Find the position of the last complete value in the JSON.""" + tokenizer = JsonTokenizer(self.jsonStr) + last_complete_pos = 0 + stack_depth = 0 + last_value_end = 0 + in_value = False + + while True: + token = tokenizer.nextToken() + + if token.type == TokenType.EOF: + break + + if token.type == TokenType.TRUNCATED: + # Return position before the truncated part + break + + if token.type in (TokenType.OBJECT_START, TokenType.ARRAY_START): + stack_depth += 1 + in_value = True + + elif token.type in (TokenType.OBJECT_END, TokenType.ARRAY_END): + stack_depth -= 1 + last_value_end = token.end_pos + in_value = False + + elif token.type == TokenType.STRING: + # Check if this is a key or a value + saved_pos = tokenizer.pos + tokenizer.skipWhitespace() + next_char = tokenizer.peek() + tokenizer.pos = saved_pos + + if next_char != ':': + # It's a value + last_value_end = token.end_pos + in_value = False + + elif token.type in (TokenType.NUMBER, TokenType.BOOLEAN, TokenType.NULL): + last_value_end = token.end_pos + in_value = False + + elif token.type == TokenType.COMMA: + # After a comma, we've completed a value + last_complete_pos = last_value_end + + # Return the last complete position + return last_value_end if last_value_end > 0 else len(self.jsonStr) + + def _getClosingBrackets(self, jsonStr: str) -> str: + """Determine what closing brackets are needed.""" + stack = [] + in_string = False + escaped = False + + for char in jsonStr: + if escaped: + escaped = False + continue + + if char == '\\' and in_string: + escaped = True + continue + + if char == '"': + in_string = not in_string + continue + + if in_string: + continue + + if char == '{': + stack.append('}') + elif char == '[': + stack.append(']') + elif char == '}': + if stack and stack[-1] == '}': + stack.pop() + elif char == ']': + if stack and stack[-1] == ']': + stack.pop() + + # Return closing brackets in reverse order + return ''.join(reversed(stack)) + + def _parseStructure(self): + """Parse the JSON structure and track hierarchy""" + tokenizer = JsonTokenizer(self.jsonStr) + + while True: + token = tokenizer.nextToken() + + if token.type == TokenType.EOF or token.type == TokenType.TRUNCATED: + break + + if token.type == TokenType.OBJECT_START: + frame = StackFrame( + type="object", + start_pos=token.start_pos, + keys_seen=[] + ) + self.stack.append(frame) + + elif token.type == TokenType.ARRAY_START: + frame = StackFrame( + type="array", + start_pos=token.start_pos, + index=0 + ) + self.stack.append(frame) + + elif token.type == TokenType.OBJECT_END: + if self.stack and self.stack[-1].type == "object": + self.stack.pop() + + elif token.type == TokenType.ARRAY_END: + if self.stack and self.stack[-1].type == "array": + self.stack.pop() + + elif token.type == TokenType.STRING: + # Could be a key or a value + self._handleStringToken(token, tokenizer) + + elif token.type == TokenType.COMMA: + # Increment array index + if self.stack and self.stack[-1].type == "array": + self.stack[-1].index += 1 + + def _handleStringToken(self, token: Token, tokenizer: JsonTokenizer): + """Handle a string token (could be key or value)""" + if self.stack and self.stack[-1].type == "object": + # Check if this is a key (followed by colon) + saved_pos = tokenizer.pos + tokenizer.skipWhitespace() + next_char = tokenizer.peek() + + if next_char == ':': + # This is a key + self.stack[-1].key = token.value + self.stack[-1].keys_seen.append(token.value) + + tokenizer.pos = saved_pos + + def _generateOverlapContext(self) -> str: + """ + Generate the overlap context - the innermost object/array element containing the cut. + + Returns the raw string from the start of that element to the end of the truncated JSON. + Dieser Kontext wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu mergen. + Exakt so wie im Original-String (für String-Matching beim Merge). + """ + if not self.stack: + # No structure, return last overlap_max_chars characters + return self.jsonStr[-self.overlapMaxChars:] + + # Find the innermost container that should be the overlap + # For arrays: the current array element + # For objects: the current object + + innermost_start = self._findInnermostElementStart() + + overlap = self.jsonStr[innermost_start:] + + # Apply max chars limit + if len(overlap) > self.overlapMaxChars: + overlap = self.jsonStr[-self.overlapMaxChars:] + + return overlap + + def _findInnermostElementStart(self) -> int: + """Find the start position of the innermost element for overlap""" + if not self.stack: + return max(0, len(self.jsonStr) - self.overlapMaxChars) + + # Walk through stack to find the innermost array element or object + # We want the innermost "atomic" unit that contains the cut + + # Strategy: + # - If innermost is an object: return its start + # - If innermost is an array: + # - If current element is an object/array: return start of that element + # - If current element is a primitive: return start of array or last N chars + + innermost = self.stack[-1] + + if innermost.type == "object": + return innermost.start_pos + else: + # It's an array - find the start of the current element + element_start = self._findArrayElementStart(innermost) + + # Check if the element is a primitive or complex type + element_content = self.jsonStr[element_start:].strip() + + # If it starts with { or [ it's complex, return the element start + if element_content and element_content[0] in '{[': + return element_start + else: + # Primitive in array - check if there's a parent object + # or return overlap_max_chars from end + for i in range(len(self.stack) - 2, -1, -1): + if self.stack[i].type == "object": + return self.stack[i].start_pos + + # No parent object, return max chars from end + return max(0, len(self.jsonStr) - self.overlapMaxChars) + + def _findArrayElementStart(self, arrayFrame: StackFrame) -> int: + """Find the start position of the current array element""" + # We need to find the start of the current element in the array + # Parse from array start to find element boundaries + + arrayContent = self.jsonStr[arrayFrame.start_pos:] + + # Skip the opening bracket and whitespace + pos = 1 + while pos < len(arrayContent) and arrayContent[pos] in ' \t\n\r': + pos += 1 + + elementStarts = [arrayFrame.start_pos + pos] + depth = 0 + inString = False + escaped = False + + i = pos + while i < len(arrayContent): + char = arrayContent[i] + + if escaped: + escaped = False + i += 1 + continue + + if char == '\\' and inString: + escaped = True + i += 1 + continue + + if char == '"': + inString = not inString + i += 1 + continue + + if inString: + i += 1 + continue + + if char in '{[': + depth += 1 + elif char in '}]': + depth -= 1 + elif char == ',' and depth == 0: + # Found element boundary + i += 1 + # Skip whitespace + while i < len(arrayContent) and arrayContent[i] in ' \t\n\r': + i += 1 + elementStarts.append(arrayFrame.start_pos + i) + + i += 1 + + # Return the start of the current element + if arrayFrame.index < len(elementStarts): + return elementStarts[arrayFrame.index] + elif elementStarts: + return elementStarts[-1] + else: + return arrayFrame.start_pos + + def _generateHierarchyContext(self) -> str: + """ + Generate the hierarchy context with budget logic. + Shows structure from root to cut point with data values limited by budget. + """ + if not self.stack: + # No structure + return self.jsonStr[-self.overlapMaxChars:] + + # We need to rebuild the JSON with budget logic + # Priority: elements closer to cut get full values, distant ones get "..." + + return self._rebuildWithBudget() + + def _rebuildWithBudget(self) -> str: + """Rebuild JSON from root to cut with budget constraints""" + + # Strategy: + # 1. Parse the JSON structure tracking all values + # 2. Calculate total value size + # 3. Apply budget from cut backwards + # 4. Render with "..." for values outside budget + + # First, get a structured representation + structure = self._parseForHierarchy() + + # Now render with budget + return self._renderWithBudget(structure) + + def _parseForHierarchy(self) -> dict: + """Parse JSON into a structure suitable for hierarchy rendering""" + + result = { + 'type': 'root', + 'children': [], + 'raw_positions': [] + } + + tokenizer = JsonTokenizer(self.jsonStr) + stack = [result] + current_key = None + + while True: + token = tokenizer.nextToken() + + if token.type == TokenType.EOF: + break + + if token.type == TokenType.TRUNCATED: + # Mark the truncation point + if stack: + current = stack[-1] + if current.get('type') == 'object': + if current_key: + current['children'].append({ + 'type': 'truncated_value', + 'key': current_key, + 'raw': self.jsonStr[token.start_pos:], + 'start_pos': token.start_pos + }) + elif current.get('type') == 'array': + current['children'].append({ + 'type': 'truncated_value', + 'raw': self.jsonStr[token.start_pos:], + 'start_pos': token.start_pos + }) + break + + if token.type == TokenType.OBJECT_START: + obj = { + 'type': 'object', + 'key': current_key, + 'children': [], + 'start_pos': token.start_pos + } + if stack: + stack[-1]['children'].append(obj) + stack.append(obj) + current_key = None + + elif token.type == TokenType.ARRAY_START: + arr = { + 'type': 'array', + 'key': current_key, + 'children': [], + 'start_pos': token.start_pos + } + if stack: + stack[-1]['children'].append(arr) + stack.append(arr) + current_key = None + + elif token.type == TokenType.OBJECT_END: + if len(stack) > 1 and stack[-1].get('type') == 'object': + stack[-1]['end_pos'] = token.end_pos + stack[-1]['complete'] = True + stack.pop() + + elif token.type == TokenType.ARRAY_END: + if len(stack) > 1 and stack[-1].get('type') == 'array': + stack[-1]['end_pos'] = token.end_pos + stack[-1]['complete'] = True + stack.pop() + + elif token.type == TokenType.STRING: + # Check if it's a key + saved_pos = tokenizer.pos + tokenizer.skipWhitespace() + next_char = tokenizer.peek() + + if next_char == ':' and stack and stack[-1].get('type') == 'object': + current_key = token.value + else: + # It's a value + value_node = { + 'type': 'value', + 'key': current_key, + 'value': token.value, + 'raw': token.raw, + 'start_pos': token.start_pos, + 'end_pos': token.end_pos, + 'value_type': 'string' + } + if stack: + stack[-1]['children'].append(value_node) + current_key = None + + tokenizer.pos = saved_pos + + elif token.type in (TokenType.NUMBER, TokenType.BOOLEAN, TokenType.NULL): + value_node = { + 'type': 'value', + 'key': current_key, + 'value': token.value, + 'raw': token.raw, + 'start_pos': token.start_pos, + 'end_pos': token.end_pos, + 'value_type': str(token.type.value) + } + if stack: + stack[-1]['children'].append(value_node) + current_key = None + + return result + + def _renderWithBudget(self, structure: dict) -> str: + """Render the structure with budget constraints""" + + # First, collect all value nodes with their distances from cut + cutPos = len(self.jsonStr) + allValues = self._collectValuesWithDistance(structure, cutPos) + + # Sort by distance (closest to cut first) + allValues.sort(key=lambda x: x['distance']) + + # Determine which values get full rendering + budgetRemaining = self.budgetLimit + valuesWithBudget = set() + + for valInfo in allValues: + valSize = len(str(valInfo['raw'])) + if budgetRemaining >= valSize: + valuesWithBudget.add(valInfo['id']) + budgetRemaining -= valSize + + # Now render the structure + return self._renderNode(structure, valuesWithBudget, indent=0) + + def _collectValuesWithDistance(self, node: dict, cutPos: int, depth: int = 0) -> list: + """Collect all value nodes with their distance from cut point""" + values = [] + + if node.get('type') == 'value': + endPos = node.get('end_pos', cutPos) + distance = cutPos - endPos + values.append({ + 'id': id(node), + 'node': node, + 'distance': distance, + 'raw': node.get('raw', ''), + 'depth': depth + }) + elif node.get('type') == 'truncated_value': + values.append({ + 'id': id(node), + 'node': node, + 'distance': 0, # Truncated values are at the cut + 'raw': node.get('raw', ''), + 'depth': depth + }) + + for child in node.get('children', []): + values.extend(self._collectValuesWithDistance(child, cutPos, depth + 1)) + + return values + + def _renderNode(self, node: dict, valuesWithBudget: set, indent: int = 0) -> str: + """Render a node with budget constraints""" + indent_str = " " * indent + + node_type = node.get('type') + + if node_type == 'root': + parts = [] + for child in node.get('children', []): + parts.append(self._renderNode(child, valuesWithBudget, indent)) + return '\n'.join(parts) + + elif node_type == 'object': + return self._renderObject(node, valuesWithBudget, indent) + + elif node_type == 'array': + return self._renderArray(node, valuesWithBudget, indent) + + elif node_type == 'value': + return self._renderValue(node, valuesWithBudget, indent) + + elif node_type == 'truncated_value': + return node.get('raw', '') + + return '' + + def _renderObject(self, node: dict, valuesWithBudget: set, indent: int) -> str: + """Render an object node""" + indent_str = " " * indent + inner_indent = " " * (indent + 1) + + key_prefix = "" + if node.get('key'): + key_prefix = f'"{node["key"]}": ' + + if not node.get('children'): + if node.get('complete'): + return f"{key_prefix}{{}}" + else: + return f"{key_prefix}{{" + + parts = [f"{key_prefix}{{"] + + children = node.get('children', []) + for i, child in enumerate(children): + child_rendered = self._renderNode(child, valuesWithBudget, indent + 1) + + # Add comma if not last and next sibling exists + if i < len(children) - 1: + if child.get('type') != 'truncated_value': + parts.append(f"{inner_indent}{child_rendered},") + else: + parts.append(f"{inner_indent}{child_rendered}") + else: + parts.append(f"{inner_indent}{child_rendered}") + + if node.get('complete'): + parts.append(f"{indent_str}}}") + + return '\n'.join(parts) + + def _renderArray(self, node: dict, valuesWithBudget: set, indent: int) -> str: + """Render an array node""" + indent_str = " " * indent + inner_indent = " " * (indent + 1) + + key_prefix = "" + if node.get('key'): + key_prefix = f'"{node["key"]}": ' + + if not node.get('children'): + if node.get('complete'): + return f"{key_prefix}[]" + else: + return f"{key_prefix}[" + + parts = [f"{key_prefix}["] + + children = node.get('children', []) + for i, child in enumerate(children): + child_rendered = self._renderNode(child, valuesWithBudget, indent + 1) + + if i < len(children) - 1: + if child.get('type') != 'truncated_value': + parts.append(f"{inner_indent}{child_rendered},") + else: + parts.append(f"{inner_indent}{child_rendered}") + else: + parts.append(f"{inner_indent}{child_rendered}") + + if node.get('complete'): + parts.append(f"{indent_str}]") + + return '\n'.join(parts) + + def _renderValue(self, node: dict, valuesWithBudget: set, indent: int) -> str: + """Render a value node""" + key_prefix = "" + if node.get('key'): + key_prefix = f'"{node["key"]}": ' + + if id(node) in valuesWithBudget: + # Full value + default_raw = '"...\"' + raw_value = node.get('raw', default_raw) + return f"{key_prefix}{raw_value}" + else: + # Placeholder + return f'{key_prefix}"..."' + + def _renderWithBudgetV2(self) -> str: + """ + Generate hierarchy context with budget logic. + + Alternative rendering that stays closer to the original truncated string. + Shows full context near the cut, replaces distant values with "...". + + Budget-Logik: + 1. Sammeln: Alle String-Werte werden mit ihrer Position gesammelt + 2. Sortieren: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität) + 3. Zuweisen: Budget wird von hinten nach vorne aufgebraucht + 4. Ersetzen: Werte außerhalb des Budgets werden durch "..." ersetzt + """ + # Parse to understand structure, but render from original string with modifications + structure = self._parseForHierarchy() + + # Collect all complete value nodes with positions + allValues = self._collectCompleteValues(structure) + + # Sort by end position (furthest from cut = first to be truncated) + allValues.sort(key=lambda x: x['end_pos']) + + # Apply budget: replace values from the start until budget exhausted + budgetUsed = 0 + totalAvailable = sum(len(v['raw']) for v in allValues) + + valuesToReplace = [] + + for val in allValues: + valSize = len(val['raw']) + if totalAvailable - budgetUsed > self.budgetLimit: + # This value should be replaced with "..." + valuesToReplace.append(val) + budgetUsed += valSize + else: + break + + # Build the modified string + result = self.jsonStr + + # Replace from end to start to preserve positions + valuesToReplace.sort(key=lambda x: x['start_pos'], reverse=True) + + for val in valuesToReplace: + start = val['start_pos'] + end = val['end_pos'] + result = result[:start] + '"..."' + result[end:] + + return result + + def _collectCompleteValues(self, node: dict) -> list: + """Collect all complete (non-truncated) value nodes""" + values = [] + + if node.get('type') == 'value' and node.get('value_type') == 'string': + values.append({ + 'start_pos': node['start_pos'], + 'end_pos': node['end_pos'], + 'raw': node['raw'], + 'key': node.get('key') + }) + + for child in node.get('children', []): + values.extend(self._collectCompleteValues(child)) + + return values + + +def extractContinuationContexts( + truncatedJson: str +) -> Tuple[str, str, str]: + """ + Main entry point: Extract all three continuation contexts from a truncated JSON. + + Generiert drei Kontexte für abgeschnittene JSON-Strings: + 1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält + - Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu mergen + - Exakt so wie im Original-String (für String-Matching beim Merge) + + 2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut-Punkt + - Mit Budget-Logik: Näher am Cut = vollständige Werte, weiter weg = "..." Platzhalter + - Gibt der AI den Kontext der gesamten JSON-Struktur + + 3. Complete Part: Der vollständige, valide JSON bis zum Cut-Punkt + - Alle offenen Strukturen werden geschlossen (}, ], ") + - Unvollständige Keys werden entfernt + - Kann direkt als valides JSON geparst werden + + Uses module constants BUDGET_LIMIT and OVERLAP_MAX_CHARS. + + Args: + truncatedJson: The truncated JSON string + + Returns: + Tuple of (overlapContext, hierarchyContext, completePart): + - overlapContext: The innermost object/element containing the cut (for merging) + - hierarchyContext: Full structure from root to cut with budget-limited values + - completePart: Valid JSON with all structures properly closed + + Example: + >>> jsonStr = '{"users": [{"name": "John", "bio": "Hello Wor' + >>> overlap, hierarchy, complete = extractContinuationContexts(jsonStr) + >>> import json + >>> parsed = json.loads(complete) # ✓ Funktioniert! + """ + return getJsonContinuationContext(truncatedJson) + + +# Convenience function with named results +def getContexts( + truncatedJson: str +) -> JsonContinuationContexts: + """ + Get all contexts as a Pydantic model with named fields. + + Uses module constants BUDGET_LIMIT and OVERLAP_MAX_CHARS. + + Args: + truncatedJson: The truncated JSON string + + Returns: + JsonContinuationContexts Pydantic model with: + - overlapContext: The innermost object/element containing the cut + - hierarchyContext: Full structure with budget-limited values + - completePart: Valid JSON with all structures properly closed + + Example: + >>> json_str = '{"users": [{"name": "John", "bio": "Hello Wor' + >>> contexts = getContexts(json_str) + >>> print(contexts.overlapContext) + >>> print(contexts.hierarchyContext) + >>> print(contexts.completePart) + """ + overlap, hierarchy, completePart = extractContinuationContexts(truncatedJson) + return JsonContinuationContexts( + overlapContext=overlap, + hierarchyContext=hierarchy, + completePart=completePart + ) diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index d89b2f76..2e60ce69 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -703,144 +703,6 @@ def extractSectionsFromDocument(documentData: Dict[str, Any]) -> List[Dict[str, return [] -def _extractOverlapFromElement(elem: Dict[str, Any], elemType: str) -> Optional[Dict[str, Any]]: - """ - GENERIC function to extract overlap portion from an element. - - Handles elements of any size, including very long strings: - - Paragraphs: Extract last N characters/words - - Code blocks: Extract last N lines - - Tables: Extract last N rows - - Lists: Extract last N items - - Other elements: Extract representative portion - - Args: - elem: Element dictionary - elemType: Element type (table, paragraph, code_block, etc.) - - Returns: - Overlap element dictionary with size-limited content, or None - """ - if not isinstance(elem, dict): - return None - - # Get content (handle both flat and nested structures) - content = elem.get("content", {}) if isinstance(elem.get("content"), dict) else {} - - if elemType == "table": - rows = elem.get("rows", []) or content.get("rows", []) - headers = elem.get("headers", []) or content.get("headers", []) - - if rows: - # Extract last 3-5 rows as overlap (enough for context, not too large) - overlapRowCount = min(5, len(rows)) - overlapRows = rows[-overlapRowCount:] - - overlapElem = { - "type": "table", - "content": { - "headers": headers, - "rows": overlapRows - } - } - return overlapElem - - elif elemType in ["bullet_list", "numbered_list"]: - items = elem.get("items", []) or content.get("items", []) - - if items: - # Extract last 5-10 items as overlap - overlapItemCount = min(10, len(items)) - overlapItems = items[-overlapItemCount:] - - overlapElem = { - "type": elemType, - "content": { - "items": overlapItems - } - } - return overlapElem - - elif elemType == "paragraph": - text = elem.get("text", "") or content.get("text", "") - - if text: - # Extract last portion of text - # For very long text, use last 300-500 characters - # For shorter text, use all of it - maxOverlapChars = 500 - minOverlapChars = 100 - - if len(text) > maxOverlapChars: - # Very long text - extract last portion - # Try to break at word boundary for readability - textSnippet = text[-maxOverlapChars:] - # Find first space/newline to start from word boundary - firstSpace = textSnippet.find(' ') - if firstSpace > 0 and firstSpace < 50: - textSnippet = textSnippet[firstSpace + 1:] - overlapText = textSnippet - elif len(text) > minOverlapChars: - # Medium text - use last portion - overlapText = text[-minOverlapChars:] - else: - # Short text - use all - overlapText = text - - overlapElem = { - "type": "paragraph", - "content": { - "text": overlapText - } - } - return overlapElem - - elif elemType == "code_block": - code = elem.get("code", "") or content.get("code", "") - - if code: - # Extract last N lines of code - codeLines = code.split('\n') - # Use last 10-20 lines as overlap (enough context for continuation) - overlapLineCount = min(20, len(codeLines)) - overlapLines = codeLines[-overlapLineCount:] - overlapCode = '\n'.join(overlapLines) - - overlapElem = { - "type": "code_block", - "content": { - "code": overlapCode - } - } - return overlapElem - - elif elemType == "heading": - # Headings are usually short - return as-is - return elem - - elif elemType == "image": - # Images are usually small - return as-is - return elem - - else: - # Generic element - try to extract a representative portion - # Convert to JSON and limit size - elemJson = json.dumps(elem, ensure_ascii=False) - - # If element is very large, try to extract key fields only - if len(elemJson) > 1000: - # Extract only essential fields - overlapElem = { - "type": elemType, - "id": elem.get("id"), - "content": "..." # Indicate truncated content - } - return overlapElem - - # Small element - return as-is - return elem - - def buildContinuationContext( allSections: List[Dict[str, Any]], lastRawResponse: Optional[str] = None, @@ -980,1989 +842,52 @@ def buildContinuationContext( delivered_summary = "\n".join(summary_lines) - # Extract cut-off point using new algorithm - # 1. Loop over all sections until finding incomplete section - # 2. In incomplete section, loop through elements until finding cut-off element - # CRITICAL: There is always only ONE section incomplete (JSON cut-off point) - cut_off_element = None - element_before_cutoff = None - - if lastRawResponse: - try: - # CRITICAL: Always try to find incomplete section from raw JSON - # Even if JSON can be parsed, it might be incomplete (cut off mid-element) - raw_stripped = stripCodeFences(lastRawResponse.strip()).strip() - - # Check if response is just a fragment (not full JSON structure) - # Fragments are continuation content that should be appended to the last incomplete element - is_fragment = not (raw_stripped.strip().startswith('{') or raw_stripped.strip().startswith('[')) - - if is_fragment: - # Response is a fragment - it continues the last incomplete element - # Find the last incomplete element from allSections - if allSections: - last_section = allSections[-1] - elements = last_section.get("elements", []) - if isinstance(elements, list) and elements: - # Get the last element (which should be incomplete) - last_elem = elements[-1] - if isinstance(last_elem, dict): - # The fragment continues this element - # Show the fragment as cut_off_element - cut_off_element = raw_stripped - # Show the element before (if there is one) - if len(elements) > 1: - element_before_cutoff = json.dumps(elements[-2]) - else: - element_before_cutoff = json.dumps(last_elem) - else: - # Response is full JSON - use standard extraction - # Strategy 1: Try to find incomplete section using structured parsing - incomplete_section = _findIncompleteSectionInRaw(raw_stripped) - if incomplete_section: - cut_off_element, element_before_cutoff = _extractCutOffElements(incomplete_section, raw_stripped) - - # Strategy 2: If no incomplete section found, extract directly from raw JSON - # This handles cases where JSON is cut off mid-element within a complete section - if not cut_off_element: - cut_off_element, element_before_cutoff = _extractCutOffElementsFromRaw(raw_stripped, allSections) - except Exception as e: - logger.debug(f"Error extracting cut-off point: {e}") - - # Extract overlap information for continuation prompt - # GENERIC overlap extraction: handles elements of any size, including long strings - # Strategy: Extract last N elements, but if an element is very large, extract only a portion - overlapElements = [] - overlapString = "" - - if allSections: - # Get last section - lastSection = allSections[-1] - elements = lastSection.get("elements", []) - - if isinstance(elements, list) and len(elements) > 0: - # Extract last 2-3 complete elements as overlap context - # This helps the AI understand what was already delivered - overlapCount = min(3, len(elements)) - overlapElements = elements[-overlapCount:] - - # Build overlap string showing these elements (with size limits for large elements) - overlapStrings = [] - for elem in overlapElements: - if isinstance(elem, dict): - elemType = elem.get("type", "unknown") - overlapElem = _extractOverlapFromElement(elem, elemType) - if overlapElem: - overlapStrings.append(json.dumps(overlapElem, ensure_ascii=False)) - else: - # Non-dict element - show as-is (but limit size) - elemStr = json.dumps(elem, ensure_ascii=False) - if len(elemStr) > 500: - elemStr = elemStr[:500] + "..." - overlapStrings.append(elemStr) - - if overlapStrings: - overlapString = ",\n".join(overlapStrings) - - # Store raw JSON response and extract structure context + # Extract continuation contexts using centralized jsonContinuation module + # This is the single source of truth for handling cut-off JSON strings last_raw_json = lastRawResponse or "" last_complete_part = "" incomplete_part = "" - structure_context = "" + overlap_context = "" + hierarchy_context = "" if lastRawResponse: - # Extract JSON structure context for continuation prompt - # This provides: last complete part, incomplete part, structure context - # NOTE: template_structure is now passed as parameter, not extracted try: - structureContext = extractJsonStructureContext(lastRawResponse, useCaseId) - last_complete_part = structureContext.get("last_complete_part", "") - incomplete_part = structureContext.get("incomplete_part", "") - structure_context = structureContext.get("structure_context", "") + from modules.shared.jsonContinuation import getContexts + + # Normalize JSON string + normalized = stripCodeFences(normalizeJsonText(lastRawResponse)).strip() + if normalized: + # Find first '{' or '[' to start + startIdx = -1 + for i, char in enumerate(normalized): + if char in '{[': + startIdx = i + break + + if startIdx >= 0: + jsonContent = normalized[startIdx:] + contexts = getContexts(jsonContent) + + # Store all contexts from centralized module + last_complete_part = contexts.completePart + incomplete_part = jsonContent[len(contexts.completePart):].strip() + overlap_context = contexts.overlapContext + hierarchy_context = contexts.hierarchyContext except Exception as e: - logger.warning(f"Error extracting JSON structure context: {e}", exc_info=True) + logger.warning(f"Error extracting JSON continuation contexts: {e}", exc_info=True) # Return ContinuationContext Pydantic model return ContinuationContext( section_count=section_count, delivered_summary=delivered_summary, - cut_off_element=cut_off_element, - element_before_cutoff=element_before_cutoff, - template_structure=templateStructure, # Use passed parameter, not extracted + template_structure=templateStructure, last_complete_part=last_complete_part, incomplete_part=incomplete_part, - structure_context=structure_context, - last_raw_json=last_raw_json + last_raw_json=last_raw_json, + overlap_context=overlap_context, + hierarchy_context=hierarchy_context ) - -def extractJsonStructureContext( - incompleteJson: str, - useCaseId: Optional[str] = None -) -> Dict[str, Any]: - """ - Extract JSON structure context from incomplete JSON for continuation prompts. - - Extracts: - 1. Template JSON structure of the complete object (structure only, no content) - 2. Last complete part (last complete element/object) - 3. Incomplete part (the cut-off portion) - 4. Structure context (parent structure metadata only, no content) - - Args: - incompleteJson: Incomplete JSON string (may be cut off mid-element) - useCaseId: Optional use case ID to determine expected structure - - Returns: - Dict with: - - template_structure: Template JSON structure (structure only) - - last_complete_part: Last complete element/object as JSON string - - incomplete_part: Incomplete/cut-off portion as JSON string - - structure_context: Parent structure metadata (keys only, no content) - """ - from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText - - result = { - "template_structure": "", - "last_complete_part": "", - "incomplete_part": "", - "structure_context": "" - } - - if not incompleteJson or not incompleteJson.strip(): - return result - - # Normalize JSON string - normalized = stripCodeFences(normalizeJsonText(incompleteJson)).strip() - if not normalized: - return result - - # Find first '{' or '[' to start - startIdx = -1 - for i, char in enumerate(normalized): - if char in '{[': - startIdx = i - break - - if startIdx == -1: - return result - - jsonContent = normalized[startIdx:] - - # Step 1: Extract template structure (structure only, no content) - templateStructure = _extractTemplateStructure(jsonContent, useCaseId) - result["template_structure"] = templateStructure - - # Step 2: Find last complete part and incomplete part - lastComplete, incompletePart = _extractLastCompleteAndIncomplete(jsonContent) - result["last_complete_part"] = lastComplete - result["incomplete_part"] = incompletePart - - # Step 3: Extract structure context (parent structure metadata only) - # Pass both incomplete part and last complete part to show positions - structureContext = _extractStructureContext(jsonContent, incompletePart, lastComplete) - result["structure_context"] = structureContext - - return result - - -def _extractTemplateStructure(jsonContent: str, useCaseId: Optional[str] = None) -> str: - """ - Extract template JSON structure (structure only, no content). - - Examples: - - {"documents": [{"chapters": [{"sections": [...]}]}]} - - {"elements": [{"type": "...", "content": {...}}]} - """ - import json - import re - - # Try to parse JSON to understand structure - try: - # Try to close and parse - closed = closeJsonStructures(jsonContent) - parsed = json.loads(closed) - - # Build template structure (keys only, no content) - template = _buildStructureTemplate(parsed) - return json.dumps(template, indent=2, ensure_ascii=False) - except Exception: - # If parsing fails, try to extract structure from string - # Look for top-level keys - topLevelKeys = [] - - # Pattern: "key": { or "key": [ - keyPattern = r'"([^"]+)"\s*:\s*[{\[]' - matches = re.findall(keyPattern, jsonContent) - if matches: - topLevelKeys = matches[:3] # Take first 3 keys - - # Build template based on use case or detected keys - if useCaseId == "chapter_structure": - return json.dumps({"documents": [{"chapters": [{"id": "", "title": "", "level": 0}]}]}, indent=2, ensure_ascii=False) - elif useCaseId == "section_content": - return json.dumps({"elements": [{"type": "", "content": {}}]}, indent=2, ensure_ascii=False) - elif useCaseId == "code_structure": - return json.dumps({"files": [{"id": "", "filename": "", "fileType": ""}]}, indent=2, ensure_ascii=False) - elif topLevelKeys: - # Build generic template - template = {} - for key in topLevelKeys: - template[key] = [] - return json.dumps(template, indent=2, ensure_ascii=False) - else: - return json.dumps({}, indent=2, ensure_ascii=False) - - -def _buildStructureTemplate(obj: Any, maxDepth: int = 3) -> Any: - """ - Build structure template from parsed JSON (keys only, no content). - """ - if isinstance(obj, dict): - template = {} - for key, value in obj.items(): - if isinstance(value, (dict, list)): - template[key] = _buildStructureTemplate(value, maxDepth - 1) if maxDepth > 0 else None - else: - # Keep key but use empty value of same type - if isinstance(value, str): - template[key] = "" - elif isinstance(value, (int, float)): - template[key] = 0 - elif isinstance(value, bool): - template[key] = False - else: - template[key] = None - return template - elif isinstance(obj, list) and obj: - # Use first element as template - return [_buildStructureTemplate(obj[0], maxDepth - 1) if maxDepth > 0 else None] - else: - return None - - -def _extractLastCompleteAndIncomplete(jsonContent: str) -> Tuple[str, str]: - """ - Extract last complete part and incomplete part from JSON. - - Returns: - Tuple of (last_complete_part, incomplete_part) as JSON strings - """ - import json - - # Try to find the last complete element/object - # Strategy: Parse backwards, find where structures are balanced - - # Count braces and brackets to find where JSON becomes incomplete - braceCount = 0 - bracketCount = 0 - lastCompleteEnd = -1 - - inString = False - escapeNext = False - - for i, char in enumerate(jsonContent): - if escapeNext: - escapeNext = False - continue - - if char == '\\': - escapeNext = True - continue - - if char == '"': - inString = not inString - continue - - if not inString: - if char == '{': - braceCount += 1 - elif char == '}': - braceCount -= 1 - if braceCount == 0 and bracketCount == 0: - # Found end of complete structure - lastCompleteEnd = i + 1 - elif char == '[': - bracketCount += 1 - elif char == ']': - bracketCount -= 1 - if braceCount == 0 and bracketCount == 0: - # Found end of complete structure - lastCompleteEnd = i + 1 - - # Extract parts - if lastCompleteEnd > 0: - lastCompletePart = jsonContent[:lastCompleteEnd] - incompletePart = jsonContent[lastCompleteEnd:].strip() - - # Try to find last complete element within the structure - # Look for last complete object/array element - lastCompleteElement = _findLastCompleteElement(lastCompletePart) - if lastCompleteElement: - # Build context for incomplete part - show structure around the break - incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd) - return lastCompleteElement, incompleteWithContext - else: - # Build context for incomplete part - incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd) - return lastCompletePart, incompleteWithContext - else: - # No complete structure found - everything is incomplete - # Still try to show context - incompleteWithContext = buildIncompleteContext(jsonContent, 0) - return "", incompleteWithContext - - -def _findLastCompleteElement(jsonStr: str) -> str: - """ - Find the last complete element in JSON string. - """ - import json - - # Try to parse and extract last element - try: - closed = closeJsonStructures(jsonStr) - parsed = json.loads(closed) - - # If it's a dict with arrays, get last element from first array - if isinstance(parsed, dict): - for key, value in parsed.items(): - if isinstance(value, list) and value: - lastElem = value[-1] - return json.dumps(lastElem, indent=2, ensure_ascii=False) - - # If it's a list, get last element - if isinstance(parsed, list) and parsed: - lastElem = parsed[-1] - return json.dumps(lastElem, indent=2, ensure_ascii=False) - except Exception: - pass - - # Fallback: try to find last complete object using brace matching - braceCount = 0 - startPos = -1 - lastCompleteEnd = -1 - - for i, char in enumerate(jsonStr): - if char == '{': - if braceCount == 0: - startPos = i - braceCount += 1 - elif char == '}': - braceCount -= 1 - if braceCount == 0 and startPos >= 0: - lastCompleteEnd = i + 1 - - if lastCompleteEnd > 0: - return jsonStr[startPos:lastCompleteEnd] - - return "" - - -def buildIncompleteContext(jsonContent: str, breakPosition: int) -> str: - """ - Build hierarchical context showing incomplete JSON structure. - - Shows: - - Full hierarchy structure (always shown) - - Complete elements before cut (within 200 char DATA budget) - - Cut piece marked with <-- CUT POINT (incomplete) - - Does NOT close open structures - """ - if breakPosition <= 0 or breakPosition > len(jsonContent): - return jsonContent - - hierarchy = findStructureHierarchy(jsonContent, breakPosition) - if not hierarchy: - return jsonContent[:breakPosition] - - cutPiece = extractCutPiece(jsonContent, breakPosition) - resultLines = [] - DATA_BUDGET = 500 - - # Build hierarchy level by level - show actual JSON structure - for levelIndex, level in enumerate(hierarchy): - levelType = level['type'] - levelStart = level['start_pos'] - levelDepth = level['depth'] - indent = " " * levelDepth - isCutLevel = (levelIndex == len(hierarchy) - 1) - isParentOfCutLevel = (levelIndex == len(hierarchy) - 2) - - # Get next level info - if levelIndex < len(hierarchy) - 1: - nextLevel = hierarchy[levelIndex + 1] - nextLevelStart = nextLevel['start_pos'] - else: - nextLevelStart = breakPosition - - # Show opening structure for this level - # For cut level, check if cut piece already starts with bracket/brace - if so, don't duplicate - if isCutLevel and cutPiece: - cutPieceStripped = cutPiece.strip() - if (levelType == 'array' and cutPieceStripped.startswith('[')) or \ - (levelType == 'object' and cutPieceStripped.startswith('{')): - # Cut piece already includes opening bracket, don't add it separately - # Use parent level's child indent (cut element is a child of parent, not a separate level) - if levelIndex > 0: - parentLevel = hierarchy[levelIndex - 1] - parentIndent = " " * parentLevel['depth'] - childIndent = parentIndent + " " - else: - childIndent = indent + " " - for line in cutPiece.split('\n'): - stripped = line.strip() - if stripped: - resultLines.append(f'{childIndent}{stripped}') - resultLines[-1] += ' <-- CUT POINT (incomplete)' - else: - # Cut piece doesn't start with matching bracket, add opening structure - resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[') - childIndent = indent + " " - for line in cutPiece.split('\n'): - stripped = line.strip() - if stripped: - resultLines.append(f'{childIndent}{stripped}') - resultLines[-1] += ' <-- CUT POINT (incomplete)' - elif isCutLevel: - # Cut level but no cut piece - add opening structure - resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[') - childIndent = indent + " " - resultLines.append(f'{childIndent}... <-- CUT POINT (incomplete)') - elif isParentOfCutLevel: - # Parent of cut level: add opening structure, then show complete elements with budget - # Works for both arrays and objects - resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[') - childIndent = indent + " " - completeElements = _findCompleteElementsAtLevel( - jsonContent, levelStart, nextLevelStart, levelDepth - ) - - dataBudget = DATA_BUDGET - for elementStart, elementEnd in reversed(completeElements): - elementData = jsonContent[elementStart:elementEnd].strip() - elementSize = len(elementData) - - if elementSize == 0: - continue - - if elementSize > dataBudget: - break - - for line in elementData.split('\n'): - stripped = line.strip() - if stripped: - resultLines.append(f'{childIndent}{stripped}') - if elementEnd < nextLevelStart: - resultLines[-1] += ',' - - dataBudget -= elementSize - - if dataBudget <= 0: - break - - else: - # Other parent levels: add opening structure, then show path content - resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[') - childIndent = indent + " " - pathContent = jsonContent[levelStart + 1:nextLevelStart].strip() - if pathContent: - # Show all path content (structure is always shown, not truncated) - for line in pathContent.split('\n'): - stripped = line.strip() - if stripped: - resultLines.append(f'{childIndent}{stripped}') - - return "\n".join(resultLines) - - -def _buildNestedHierarchy( - resultLines: List[str], - jsonContent: str, - hierarchy: List[Dict[str, Any]], - levelIndex: int, - breakPosition: int, - cutPiece: str, - cutLevel: Dict[str, Any] -) -> None: - """ - Recursively build nested hierarchy from root to cut level. - This ensures proper nesting where each level contains the next level. - """ - if levelIndex >= len(hierarchy): - return - - level = hierarchy[levelIndex] - levelType = level['type'] - levelStart = level['start_pos'] - levelKey = level.get('key') - levelDepth = level['depth'] - indent = " " * levelDepth - - isCutLevel = (levelIndex == len(hierarchy) - 1) - - # Show opening structure for this level - if levelKey: - resultLines.append(f'{indent}"{levelKey}": {{' if levelType == 'object' else f'{indent}"{levelKey}": [') - else: - resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[') - - childIndent = indent + " " - - if isCutLevel: - # Cut level - show content (complete elements + cut piece) - if levelType == 'array': - charBudget = 1000 - completeElements = _findCompleteElementsAtLevel( - jsonContent, levelStart, breakPosition, levelDepth - ) - - # Show complete elements (working backwards from the cut) - for elementStart, elementEnd in reversed(completeElements): - elementSize = elementEnd - elementStart - if charBudget >= elementSize: - element = jsonContent[elementStart:elementEnd].strip() - if element: - elementLines = element.split('\n') - for line in elementLines: - if line.strip(): - resultLines.append(f'{childIndent}{line}') - if elementEnd < breakPosition: - resultLines[-1] += ',' - charBudget -= elementSize - else: - break - - # Show cut piece - if cutPiece: - cutPieceLines = cutPiece.split('\n') - for line in cutPieceLines: - if line.strip(): - resultLines.append(f'{childIndent}{line}') - resultLines[-1] += ' <-- CUT POINT (incomplete)' - else: - cutPart = jsonContent[max(0, breakPosition-50):breakPosition] - resultLines.append(f'{childIndent}{cutPart} <-- CUT POINT (incomplete)') - - else: - # Object at cut level - previewSize = breakPosition - levelStart - maxPreviewSize = 500 - if previewSize > maxPreviewSize: - previewStart = breakPosition - maxPreviewSize - preview = jsonContent[previewStart:breakPosition] - else: - preview = jsonContent[levelStart:breakPosition] - - previewLines = preview.split('\n') - for line in previewLines: - if line.strip(): - resultLines.append(f'{childIndent}{line}') - - cutPart = jsonContent[breakPosition:min(breakPosition + 50, len(jsonContent))] - resultLines.append(f'{childIndent}... {cutPart} <-- CUT POINT (incomplete)') - - else: - # Parent level - show path to next level, then recursively build next level - nextLevel = hierarchy[levelIndex + 1] - nextLevelKey = nextLevel.get('key') - nextLevelStart = nextLevel['start_pos'] - nextLevelType = nextLevel['type'] - - # Extract content between this level's opening and next level's start - # This shows any keys/values that come before the next level - pathContent = jsonContent[levelStart + 1:nextLevelStart].strip() - - # Show the path content (keys/values before next level) - if len(pathContent) > 0 and len(pathContent) <= 500: - pathLines = pathContent.split('\n') - nonEmptyLines = [line for line in pathLines if line.strip()] - if nonEmptyLines: - for line in nonEmptyLines[:20]: # Show more lines - if line.strip(): - resultLines.append(f'{childIndent}{line}') - if len(nonEmptyLines) > 20: - resultLines.append(f'{childIndent}... ({len(nonEmptyLines) - 20} more lines) ...') - elif len(pathContent) > 500: - # Content too large - show placeholder - resultLines.append(f'{childIndent}... (content too large, {len(pathContent)} chars) ...') - - # Always show the key leading to next level if it exists - # The recursive call will show the opening bracket/brace, so we just show the key here - if nextLevelKey: - # Show the key (the recursive call will add the opening bracket/brace) - # Actually, the recursive call already shows the full opening with key, - # so we don't need to show it here - just let the recursive call handle it - pass - - # Recursively build next level (this will show its opening structure and content) - _buildNestedHierarchy(resultLines, jsonContent, hierarchy, levelIndex + 1, breakPosition, cutPiece, cutLevel) - - # Close this level - resultLines.append(f'{indent}}}' if levelType == 'object' else f'{indent}]') - - -def _findCompleteElementsAtLevel( - jsonContent: str, - levelStart: int, - breakPosition: int, - targetDepth: int -) -> List[Tuple[int, int]]: - """ - Find all complete elements at a specific depth level. - - Elements inside the structure at targetDepth are at targetDepth + 1. - We track depth relative to the start of the structure. - - Returns list of (start, end) tuples for complete elements. - """ - completeElements = [] - - # Track depth relative to the level start - # When we're at levelStart, we're at the opening bracket/brace (depth = targetDepth) - # Elements inside are at depth = targetDepth + 1 - relativeDepth = 0 # Depth relative to level start (0 = at opening bracket/brace) - inString = False - escapeNext = False - currentElementStart = None - - # Find the first non-whitespace character after the opening bracket/brace - for i in range(levelStart + 1, min(breakPosition, len(jsonContent))): - if jsonContent[i] not in [' ', '\n', '\r', '\t']: - currentElementStart = i - break - - if currentElementStart is None: - return completeElements - - for i in range(currentElementStart, min(breakPosition, len(jsonContent))): - char = jsonContent[i] - - if escapeNext: - escapeNext = False - continue - - if char == '\\': - escapeNext = True - continue - - if char == '"': - inString = not inString - continue - - if not inString: - if char == '{': - relativeDepth += 1 - elif char == '}': - relativeDepth -= 1 - # Element is complete when we return to the level's depth (relativeDepth == 0) - if relativeDepth == 0: - # Found end of complete element - if currentElementStart is not None: - completeElements.append((currentElementStart, i + 1)) - # Find start of next element - j = i + 1 - while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']: - j += 1 - if j < breakPosition: - currentElementStart = j - else: - currentElementStart = None - elif char == '[': - relativeDepth += 1 - elif char == ']': - relativeDepth -= 1 - # Element is complete when we return to the level's depth (relativeDepth == 0) - if relativeDepth == 0: - # Found end of complete element - if currentElementStart is not None: - completeElements.append((currentElementStart, i + 1)) - # Find start of next element - j = i + 1 - while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']: - j += 1 - if j < breakPosition: - currentElementStart = j - else: - currentElementStart = None - elif char == ',': - # Comma at relativeDepth == 0 means we're between elements at the cut level - if relativeDepth == 0: - # Element boundary - check if we have a complete element - if currentElementStart is not None and currentElementStart < i: - # Simple value (string, number, boolean, null) - complete at comma - completeElements.append((currentElementStart, i)) - # Find start of next element - j = i + 1 - while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']: - j += 1 - if j < breakPosition: - currentElementStart = j - else: - currentElementStart = None - - return completeElements - - -def extractCutPiece(jsonContent: str, breakPosition: int) -> str: - """ - Extract the incomplete piece at the cut point. - Generic function that works with all JSON types: arrays, objects, strings, numbers, booleans, null. - - CRITICAL: Uses findStructureHierarchy to identify the cut level, then parses from the cut level start - to correctly identify which element contains the break position. - This approach handles all JSON structures generically, including: - - Nested objects and arrays - - Strings containing brackets, braces, commas - - Complex nested structures - - Returns the incomplete element from its start to the break position. - """ - if breakPosition <= 0 or breakPosition > len(jsonContent): - return "" - - # First, find the structure hierarchy to identify the cut level - hierarchy = findStructureHierarchy(jsonContent, breakPosition) - if not hierarchy: - # Fallback: return content before break - return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip() - - # Get the cut level (the structure containing the break position) - cutLevel = hierarchy[-1] - cutLevelStart = cutLevel['start_pos'] - cutLevelDepth = cutLevel.get('depth', 0) - - # Parse from cutLevelStart to breakPosition to find element boundaries - braceDepth = 0 # Absolute brace depth - bracketDepth = 0 # Absolute bracket depth - inString = False - escapeNext = False - - # Track element start at the cut level - currentElementStart = cutLevelStart # Start of current element - - # Parse from cut level start to break position - for i in range(cutLevelStart, min(breakPosition, len(jsonContent))): - char = jsonContent[i] - - if escapeNext: - escapeNext = False - continue - - if char == '\\': - escapeNext = True - continue - - if char == '"': - inString = not inString - continue - - if not inString: - if char == '{': - braceDepth += 1 - elif char == '}': - braceDepth -= 1 - elif char == '[': - bracketDepth += 1 - elif char == ']': - bracketDepth -= 1 - elif char == ',': - # Comma at cut level separates elements - currentDepth = braceDepth + bracketDepth - if currentDepth == cutLevelDepth: - # This comma is at the cut level - next element starts after it - j = i + 1 - while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']: - j += 1 - if j < breakPosition: - currentElementStart = j - elif char == ':': - # Colon at cut level separates key from value - currentDepth = braceDepth + bracketDepth - if currentDepth == cutLevelDepth: - # This colon is at the cut level - value starts after it - j = i + 1 - while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']: - j += 1 - if j < breakPosition: - currentElementStart = j - - # The element containing breakPosition starts at currentElementStart - # Find the actual start by skipping leading whitespace - actualStart = currentElementStart - for i in range(currentElementStart, min(breakPosition, len(jsonContent))): - char = jsonContent[i] - if char not in [' ', '\n', '\r', '\t']: - actualStart = i - break - - # Extract the incomplete piece from actualStart to breakPosition - # Preserve trailing whitespace - it's needed for merging - cutPiece = jsonContent[actualStart:breakPosition] - - # Remove leading whitespace but preserve trailing whitespace - cutPiece = cutPiece.lstrip() - - return cutPiece if cutPiece else jsonContent[actualStart:breakPosition] - - -def findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]: - """ - Find the structure hierarchy backwards from break point to root. - - Returns list of level info dicts, from root to cut level. - Each level has: type, start_pos, end_pos, depth, key - - CRITICAL: Returns the path from root to cut point. - - For closed structures: uses actual end position - - For open structures: uses breakPosition - """ - hierarchy = [] - - # Track depth and positions - braceDepth = 0 - bracketDepth = 0 - inString = False - escapeNext = False - - # Track ALL structures (both closed and open) to get correct end positions - # Stack of (type, start_pos, depth, end_pos) - # end_pos is None until structure is closed - structureStack = [] # Stack of (type, start_pos, depth, end_pos) - closedStructures = [] # List of closed structures with their end positions - - for i in range(breakPosition): - if i >= len(jsonContent): - break - - char = jsonContent[i] - - if escapeNext: - escapeNext = False - continue - - if char == '\\': - escapeNext = True - continue - - if char == '"': - inString = not inString - continue - - if not inString: - if char == '{': - # Store depth BEFORE incrementing (this is the level of the structure being opened) - currentDepth = braceDepth + bracketDepth - structureStack.append(('object', i, currentDepth, None)) - braceDepth += 1 - elif char == '}': - # When closing, record the end position and move to closed structures - if structureStack and structureStack[-1][0] == 'object': - structType, start, depth, _ = structureStack.pop() - closedStructures.append({ - 'type': structType, - 'start_pos': start, - 'end_pos': i + 1, # Actual end position - 'depth': depth, - 'key': findKeyBefore(jsonContent, start) - }) - braceDepth -= 1 - elif char == '[': - # Store depth BEFORE incrementing - currentDepth = braceDepth + bracketDepth - structureStack.append(('array', i, currentDepth, None)) - bracketDepth += 1 - elif char == ']': - # When closing, record the end position - if structureStack and structureStack[-1][0] == 'array': - structType, start, depth, _ = structureStack.pop() - closedStructures.append({ - 'type': structType, - 'start_pos': start, - 'end_pos': i + 1, # Actual end position - 'depth': depth, - 'key': findKeyBefore(jsonContent, start) - }) - bracketDepth -= 1 - - # Build hierarchy: we need the actual path from root to cut level - # CRITICAL: Only include structures that are actually on the path - # A structure is on the path if it contains the next level's start position - - if not structureStack: - # No open structures - all were closed before break - # Return path to deepest closed structure - if closedStructures: - maxDepth = max(s['depth'] for s in closedStructures) - # Build path: each level must contain the next level - path = [] - for depth in range(maxDepth + 1): - candidates = [s for s in closedStructures if s['depth'] == depth] - if candidates: - # If multiple at same depth, use the one that contains structures at deeper depths - if depth < maxDepth: - # Find the one that contains a structure at depth + 1 - nextDepthCandidates = [s for s in closedStructures if s['depth'] == depth + 1] - if nextDepthCandidates: - nextStart = min(s['start_pos'] for s in nextDepthCandidates) - # Find candidate that contains nextStart - for candidate in candidates: - if candidate['start_pos'] < nextStart < candidate['end_pos']: - path.append(candidate) - break - else: - # Fallback: use first candidate - path.append(candidates[0]) - else: - path.append(candidates[0]) - else: - path.append(candidates[0]) - return path - return [] - - # We have open structures - build path from root to deepest open structure - # Strategy: Start from deepest open structure and work backwards to root, - # ensuring each level contains the next level - - openByDepth = {} - for structType, start, depth, _ in structureStack: - openByDepth[depth] = { - 'type': structType, - 'start_pos': start, - 'end_pos': breakPosition, - 'depth': depth, - 'key': findKeyBefore(jsonContent, start) - } - - maxOpenDepth = max(openByDepth.keys()) - - # Build path backwards from deepest to root - path = [] - currentDepth = maxOpenDepth - currentStart = openByDepth[maxOpenDepth]['start_pos'] - - while currentDepth >= 0: - # Look for structure at currentDepth that contains currentStart - # First check open structures - if currentDepth in openByDepth: - struct = openByDepth[currentDepth] - if struct['start_pos'] <= currentStart: - path.insert(0, struct) - currentStart = struct['start_pos'] - currentDepth -= 1 - continue - - # Check closed structures - candidates = [s for s in closedStructures if s['depth'] == currentDepth and s['start_pos'] <= currentStart < s['end_pos']] - if candidates: - # Use the one that ends latest (most recent) - struct = max(candidates, key=lambda x: x['end_pos']) - path.insert(0, struct) - currentStart = struct['start_pos'] - currentDepth -= 1 - else: - # No structure found at this depth - break - break - - return path - - # Return the hierarchy (path from root to cut level) - if hierarchy: - return hierarchy - - # Fallback: if JSON starts with { or [, create a root level - if jsonContent and jsonContent.strip(): - firstChar = jsonContent.strip()[0] - if firstChar == '{': - return [{ - 'type': 'object', - 'start_pos': 0, - 'end_pos': breakPosition, - 'depth': 0, - 'key': None - }] - elif firstChar == '[': - return [{ - 'type': 'array', - 'start_pos': 0, - 'end_pos': breakPosition, - 'depth': 0, - 'key': None - }] - - return [] - - -def extractOverlapContext(jsonContent: str, breakPosition: int) -> str: - """ - Extract overlap context: the object containing the cut element. - - Returns ONLY the object containing the cut element (the incomplete element itself). - This is what the continuation should start with for proper merging. - - CRITICAL: Preserves trailing whitespace for proper merging. - - Args: - jsonContent: The incomplete JSON string - breakPosition: Position where JSON was cut - - Returns: - String with the object containing the cut element - """ - if not jsonContent or breakPosition <= 0: - return jsonContent[-200:].strip() if jsonContent else "" - - # Extract cut piece (incomplete element) - this is the object containing the cut element - cutPiece = extractCutPiece(jsonContent, breakPosition) - - # Return only the cut piece - the object containing the cut element - if cutPiece: - return cutPiece - - # Fallback: show content before break - return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip() - - -def findKeyBefore(jsonContent: str, pos: int) -> Optional[str]: - """Find the key name before a structure start position.""" - # Look backwards for "key": pattern - before = jsonContent[max(0, pos - 100):pos] - match = re.search(r'"([^"]+)"\s*:\s*[{\[]\s*$', before) - if match: - return match.group(1) - return None - - -def _formatLevelContext(level: Dict[str, Any], jsonContent: str, maxContentChars: int = 1000) -> str: - """Format a level in the hierarchy for display.""" - levelType = level['type'] - start = level['start_pos'] - end = level['end_pos'] - key = level.get('key') - - # Get content for this level - levelContent = jsonContent[start:end] - - # If content is too large, show only metadata - if len(levelContent) > maxContentChars: - # Show opening and key if available - if key: - return f' "{key}": {levelType} (content too large, {len(levelContent)} chars)' - else: - return f' {levelType} (content too large, {len(levelContent)} chars)' - else: - # Show full content (formatted) - indent = " " * level['depth'] - if key: - return f'{indent}"{key}": {levelContent[:maxContentChars]}' - else: - return f'{indent}{levelContent[:maxContentChars]}' - - -def _formatCutLevelContextDetailed(level: Dict[str, Any], cutPiece: str, jsonContent: str, breakPosition: int) -> str: - """ - Format the cut level showing detailed hierarchy as per user instruction: - 1. Cut piece level: element of a list (the incomplete element) - 2. Parent of the cut element: the list containing the cut piece (with cut point shown) - 3. Last complete object on the same level like the cut object (if exists) PLUS further - previous content from the json string (maximum 1000 characters) - """ - levelType = level['type'] - start = level['start_pos'] - key = level.get('key') - - # Get content before break point in this level - contentBeforeBreak = jsonContent[start:breakPosition] - - result = [] - - if levelType == 'array': - # Step 3: Show last complete elements on same level + previous content (max 1000 chars) - # Extract last complete array elements with context (up to 1000 chars) - lastCompleteElements = _extractLastCompleteArrayElementsWithContext( - contentBeforeBreak, jsonContent, start, maxChars=1000 - ) - if lastCompleteElements: - result.append("3. Last complete elements on same level (plus previous content, max 1000 chars):") - result.append(lastCompleteElements) - result.append("") - - # Step 2: Show parent container (the list) with cut piece - # Find the array element that contains the cut piece - cutArrayElement = _findCutArrayElement(jsonContent, breakPosition, start) - if cutArrayElement: - result.append("2. Parent container (list containing cut piece):") - result.append(f" {cutArrayElement}") - else: - # Fallback: show cut piece directly - cutPart = jsonContent[breakPosition:breakPosition + 200].strip() - result.append("2. Parent container (list containing cut piece):") - result.append(f" {cutPart}") - result.append("") - - # Step 1: Show cut piece (incomplete element at cut point) - result.append("1. Cut piece level (incomplete element at cut point):") - if cutPiece: - result.append(f" {cutPiece}") - else: - cutPart = jsonContent[breakPosition:breakPosition + 50].strip() - result.append(f" {cutPart}") - else: - # Object - show structure with cut point - result.append("Cut point in object:") - cutPart = jsonContent[breakPosition:breakPosition + 200].strip() - preview = contentBeforeBreak[-500:] if len(contentBeforeBreak) > 500 else contentBeforeBreak - result.append(f" {preview}... {cutPart} <-- CUT POINT") - - return "\n".join(result) - - -def _formatParentLevelContext(level: Dict[str, Any], jsonContent: str, maxContentChars: int = 1000) -> str: - """ - Format a parent level showing content (if small enough) or metadata only. - Used for levels above the cut level, showing path to root. - """ - levelType = level['type'] - start = level['start_pos'] - end = level['end_pos'] - key = level.get('key') - - # Get content for this level - levelContent = jsonContent[start:end] - - # If content is too large, show only metadata - if len(levelContent) > maxContentChars: - # Show opening structure with key if available - opening = jsonContent[start:start + 200].strip() - if key: - return f' "{key}": {levelType} (content too large, {len(levelContent)} chars)\n {opening}...' - else: - return f' {levelType} (content too large, {len(levelContent)} chars)\n {opening}...' - else: - # Show full content (formatted, but limit to maxContentChars) - content = levelContent[:maxContentChars] - if key: - return f' "{key}": {content}' - else: - return f' {content}' - - -def _extractLastCompleteArrayElementsWithContext( - arrayContent: str, fullJsonContent: str, arrayStart: int, maxChars: int = 1000 -) -> str: - """ - Extract last complete array elements PLUS further previous content from json string (max 1000 chars). - - This shows: - - Last complete elements on the same level as the cut element - - Additional previous content from the JSON string (up to maxChars total) - """ - # First, extract last complete elements from arrayContent - completeElements = [] - currentElement = "" - braceDepth = 0 - bracketDepth = 0 - inString = False - escapeNext = False - totalChars = 0 - - # Parse backwards to find complete elements - for i in range(len(arrayContent) - 1, -1, -1): - char = arrayContent[i] - - if escapeNext: - escapeNext = False - currentElement = char + currentElement - continue - - if char == '\\': - escapeNext = True - currentElement = char + currentElement - continue - - if char == '"': - inString = not inString - currentElement = char + currentElement - continue - - if not inString: - if char == '}': - braceDepth += 1 - currentElement = char + currentElement - elif char == '{': - braceDepth -= 1 - currentElement = char + currentElement - if braceDepth == 0 and bracketDepth == 0: - # Found complete element - element = currentElement.strip() - if element and element[0] in ['{', '[']: - completeElements.insert(0, element) - totalChars += len(element) - if totalChars >= maxChars: - break - currentElement = "" - elif char == ']': - bracketDepth += 1 - currentElement = char + currentElement - elif char == '[': - bracketDepth -= 1 - currentElement = char + currentElement - if braceDepth == 0 and bracketDepth == 0: - # Found complete element - element = currentElement.strip() - if element and element[0] == '[': - completeElements.insert(0, element) - totalChars += len(element) - if totalChars >= maxChars: - break - currentElement = "" - elif char == ',' and braceDepth == 0 and bracketDepth == 0: - # Element boundary - if currentElement.strip(): - element = currentElement.strip() - if element and element[0] in ['{', '[', '"']: - completeElements.insert(0, element) - totalChars += len(element) - if totalChars >= maxChars: - break - currentElement = "" - else: - currentElement = char + currentElement - - # Format the elements - if completeElements: - # Show last few complete elements (up to maxChars) - formattedElements = [] - charsUsed = 0 - for elem in reversed(completeElements): # Show from newest to oldest - if charsUsed + len(elem) <= maxChars: - formattedElements.insert(0, elem) - charsUsed += len(elem) - else: - break - - if formattedElements: - # Format as JSON array rows (without hardcoded indentation - caller will add it) - result = [] - for elem in formattedElements: - # Remove leading comma if present (from mid-element extraction) - cleanElem = elem.lstrip(',').strip() - if cleanElem: - result.append(f"{cleanElem},") - return "\n".join(result) - - return "" - - -def _findCutArrayElement(jsonContent: str, breakPosition: int, arrayStart: int) -> Optional[str]: - """Find the array element that contains the cut piece.""" - # Look backwards from break position to find the start of the current array element - braceDepth = 0 - bracketDepth = 0 - inString = False - escapeNext = False - elementStart = -1 - - # Search backwards from break position - for i in range(breakPosition - 1, arrayStart - 1, -1): - if i < 0: - break - - char = jsonContent[i] - - if escapeNext: - escapeNext = False - continue - - if char == '\\': - escapeNext = True - continue - - if char == '"': - inString = not inString - continue - - if not inString: - if char == '}': - braceDepth += 1 - elif char == '{': - braceDepth -= 1 - if braceDepth == 0 and bracketDepth == 0: - elementStart = i - break - elif char == ']': - bracketDepth += 1 - elif char == '[': - bracketDepth -= 1 - if braceDepth == 0 and bracketDepth == 0: - elementStart = i - break - elif char == ',' and braceDepth == 0 and bracketDepth == 0: - # Found element boundary - elementStart = i + 1 - break - - if elementStart >= 0: - # Extract the element (including incomplete part) - elementContent = jsonContent[elementStart:breakPosition + 100].strip() - # Clean up - remove leading comma if present - if elementContent.startswith(','): - elementContent = elementContent[1:].strip() - return elementContent[:300] # Limit length - - return None - - -def _extractLastCompleteArrayElements(arrayContent: str, maxChars: int = 1000) -> str: - """Extract last complete array elements, up to maxChars.""" - # Count complete elements from the end - elements = [] - currentElement = "" - braceDepth = 0 - bracketDepth = 0 - inString = False - escapeNext = False - totalChars = 0 - - # Parse backwards to find complete elements - for i in range(len(arrayContent) - 1, -1, -1): - char = arrayContent[i] - - if escapeNext: - escapeNext = False - currentElement = char + currentElement - continue - - if char == '\\': - escapeNext = True - currentElement = char + currentElement - continue - - if char == '"': - inString = not inString - currentElement = char + currentElement - continue - - if not inString: - if char == '}': - braceDepth += 1 - currentElement = char + currentElement - elif char == '{': - braceDepth -= 1 - currentElement = char + currentElement - if braceDepth == 0 and bracketDepth == 0: - # Found complete element - element = currentElement.strip() - if element and element[0] in ['{', '[']: - elements.insert(0, element) - totalChars += len(element) - if totalChars >= maxChars: - break - currentElement = "" - elif char == ']': - bracketDepth += 1 - currentElement = char + currentElement - elif char == '[': - bracketDepth -= 1 - currentElement = char + currentElement - if braceDepth == 0 and bracketDepth == 0: - # Found complete element - element = currentElement.strip() - if element and element[0] == '[': - elements.insert(0, element) - totalChars += len(element) - if totalChars >= maxChars: - break - currentElement = "" - elif char == ',' and braceDepth == 0 and bracketDepth == 0: - # Element boundary - if currentElement.strip(): - element = currentElement.strip() - if element and element[0] in ['{', '[', '"']: - elements.insert(0, element) - totalChars += len(element) - if totalChars >= maxChars: - break - currentElement = "" - else: - currentElement = char + currentElement - - if elements: - indent = " " - formatted = ",\n".join([f"{indent}{elem}" for elem in elements[-5:]]) # Show last 5 elements - if len(elements) > 5: - formatted = f"... ({len(elements) - 5} more elements) ...\n{formatted}" - return formatted - - return "" - - -def _extractStructureContext(jsonContent: str, incompletePart: str, lastCompletePart: str = "") -> str: - """ - Extract structure context showing WHERE in the structure the last complete and incomplete elements are. - - Returns a clear description of the structure context for the broken element. - """ - import json - import re - - if not incompletePart: - # No incomplete part extracted - try to show context from raw JSON - try: - # Show last part of JSON to indicate where it broke - lastPart = jsonContent[-300:] if len(jsonContent) > 300 else jsonContent - return f"Structure context unavailable. Last part of response:\n{lastPart}" - except Exception: - return "Structure context unavailable - response was completely broken" - - # Find where incomplete part starts - incompleteStart = jsonContent.find(incompletePart) - if incompleteStart == -1: - incompleteStart = len(jsonContent) - - # Try to extract the structure context showing the broken element - try: - # Get the part before incomplete to understand structure - beforeIncomplete = jsonContent[:incompleteStart] - - # Try to find the array/object context where the break occurred - # Look for the last complete structure before the break - structureContext = "" - - # Try to parse what we have before the incomplete part - try: - closed = closeJsonStructures(beforeIncomplete) - parsed = json.loads(closed) - - # Build structure showing where we are - if isinstance(parsed, dict) and "elements" in parsed: - elements = parsed.get("elements", []) - if isinstance(elements, list): - structureContext = f"Structure: elements array with {len(elements)} complete elements\n" - structureContext += f"Break occurred in element at index {len(elements)}" - else: - structureContext = "Structure: elements (not an array)" - else: - structureContext = "Structure: " + json.dumps(_buildStructureContext(parsed), indent=2, ensure_ascii=False) - except Exception: - # Can't parse - show raw context - structureContext = f"Structure parsing failed. Context before break:\n{beforeIncomplete[-200:]}" - - return structureContext - - except Exception: - # Fallback: show minimal context - return f"Structure context unavailable. Break occurred at position {incompleteStart} in JSON string" - - -def _findElementPath(parsed: Any, elementStr: str, originalJson: str, isIncomplete: bool = False) -> str: - """ - Find the path to an element in the parsed JSON structure. - - Returns a path like "elements[2]" or "documents[0].chapters[1].sections[3]" - """ - import json - - if not elementStr or not elementStr.strip(): - return "" - - # Strategy: Find position in original JSON string, then determine path from structure - elementStart = originalJson.find(elementStr.strip()) - if elementStart == -1: - return "" - - # Find the array context by looking backwards from element position - beforeElement = originalJson[:elementStart] - - # Find the nearest array declaration before this position - # Look for patterns like "elements": [ or "chapters": [ - arrayPattern = r'"(\w+)"\s*:\s*\[' - matches = list(re.finditer(arrayPattern, beforeElement)) - if not matches: - return "" - - # Get the most recent array (closest to element) - lastMatch = matches[-1] - arrayName = lastMatch.group(1) - arrayStartPos = lastMatch.end() - - # Count complete array elements before this position - arrayContent = beforeElement[arrayStartPos:] - - # Count complete objects (balanced braces) - each complete object is an array element - braceCount = 0 - elementIndex = 0 - inString = False - escapeNext = False - lastCompleteObjectEnd = -1 - - for i, char in enumerate(arrayContent): - if escapeNext: - escapeNext = False - continue - if char == '\\': - escapeNext = True - continue - if char == '"': - inString = not inString - continue - if not inString: - if char == '{': - if braceCount == 0: - # Start of new object - elementIndex += 1 - braceCount += 1 - elif char == '}': - braceCount -= 1 - if braceCount == 0: - # End of complete object - lastCompleteObjectEnd = i - - # Determine the index - # If we're looking for incomplete element, it's at the current elementIndex - # If we're looking for last complete element, it's at elementIndex - 1 - if isIncomplete: - index = elementIndex - else: - index = elementIndex - 1 if elementIndex > 0 else 0 - - # Build the full path by traversing the parsed structure - def _buildPathToArray(obj: Any, targetArrayName: str, targetIndex: int, currentPath: str = "") -> Optional[str]: - """Recursively find path to array element.""" - if isinstance(obj, dict): - for key, value in obj.items(): - newPath = f"{currentPath}.{key}" if currentPath else key - if key == targetArrayName and isinstance(value, list): - # Found the target array - if 0 <= targetIndex < len(value): - return f"{newPath}[{targetIndex}]" - elif targetIndex >= len(value): - # Index beyond array - return array path with index - return f"{newPath}[{targetIndex}]" - result = _buildPathToArray(value, targetArrayName, targetIndex, newPath) - if result: - return result - elif isinstance(obj, list): - for i, item in enumerate(obj): - result = _buildPathToArray(item, targetArrayName, targetIndex, currentPath) - if result: - return result - return None - - # Try to find full path in parsed structure - fullPath = _buildPathToArray(parsed, arrayName, index) - if fullPath: - return fullPath - - # Fallback: return simple array path - return f"{arrayName}[{index}]" - - -def _buildStructureContext(obj: Any, maxDepth: int = 5) -> Any: - """ - Build structure context (metadata only, no content). - Similar to _buildStructureTemplate but focuses on parent structure. - """ - if isinstance(obj, dict): - structure = {} - for key, value in obj.items(): - if isinstance(value, (dict, list)): - structure[key] = _buildStructureContext(value, maxDepth - 1) if maxDepth > 0 else [] - else: - # Skip content values - only keep structure - pass - return structure - elif isinstance(obj, list) and obj: - # Return empty list structure (no content) - return [] - else: - return None - - -def _findIncompleteSectionInRaw(raw_json: str) -> Optional[Dict[str, Any]]: - """ - Find the incomplete section in raw JSON. - - CRITICAL: JSON can be cut off mid-element (e.g., {"text": "20327,20) - We need to find the last section and check if it's incomplete. - """ - try: - # Try to parse documents structure - if '"documents"' in raw_json: - # Find last document - doc_start = raw_json.rfind('"documents"') - if doc_start >= 0: - doc_section = raw_json[doc_start:] - # Try to find sections array - sections_start = doc_section.find('"sections"') - if sections_start >= 0: - sections_section = doc_section[sections_start:] - # Find sections array start - array_start = sections_section.find('[') - if array_start >= 0: - # Find all complete sections - section_objects = [] - depth = 0 - section_start = None - - for i in range(array_start, len(sections_section)): - if sections_section[i] == '{': - if depth == 0: - section_start = i - depth += 1 - elif sections_section[i] == '}': - depth -= 1 - if depth == 0 and section_start is not None: - # Found complete section - section_str = sections_section[section_start:i+1] - try: - section_obj = json.loads('{' + section_str + '}') - section_objects.append(section_obj) - except: - pass - section_start = None - - # CRITICAL: Check if there's content after the last complete section - # If JSON ends mid-element, the last section is incomplete - if section_objects: - # Find position after last complete section - last_section_end = sections_section.rfind('}') - if last_section_end >= 0: - # Check if there's more content after the last } - remaining_after_last_section = sections_section[last_section_end+1:].strip() - # Remove closing brackets/braces that might be there - remaining_after_last_section = remaining_after_last_section.lstrip('],}') - - # If there's still content (like incomplete element), section is incomplete - if remaining_after_last_section and not remaining_after_last_section.startswith(']'): - # Last section is incomplete - return it - return section_objects[-1] - - # Also check: if we can't parse the full sections array, last section is incomplete - try: - # Try to parse the sections array - sections_array_str = sections_section[array_start:] - json.loads(sections_array_str) - # Parsed successfully - all sections complete - return None - except: - # Cannot parse - last section is incomplete - return section_objects[-1] if section_objects else None - except Exception as e: - logger.debug(f"Error finding incomplete section: {e}") - - return None - - -def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> Tuple[Optional[str], Optional[str]]: - """Extract cut-off element and element before from incomplete section.""" - cut_off_element = None - element_before_cutoff = None - - elements = incomplete_section.get("elements", []) - if not elements: - return None, None - - # CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number - # Deliver the cut-off part AS-IS (don't try to "complete" it) - - if isinstance(elements, list): - # Find last element (might be incomplete) - if elements: - # Edge case: If cut-off is in first element, just show cut-off element - if len(elements) == 1: - # Only one element - might be cut-off - last_elem = elements[0] - if isinstance(last_elem, dict): - # Check if element contains nested content (e.g., code_block with JSON string) - cut_off_element = _extractCutOffFromElement(last_elem, raw_json) - if not cut_off_element: - cut_off_element = json.dumps(last_elem) - else: - cut_off_element = str(last_elem) - else: - # Multiple elements - last one might be cut-off, get element before - element_before_cutoff = json.dumps(elements[-2]) if isinstance(elements[-2], dict) else str(elements[-2]) - last_elem = elements[-1] - if isinstance(last_elem, dict): - # Check if element contains nested content - cut_off_element = _extractCutOffFromElement(last_elem, raw_json) - if not cut_off_element: - cut_off_element = json.dumps(last_elem) - else: - cut_off_element = str(last_elem) - elif isinstance(elements, dict): - # Single element - might be cut-off - cut_off_element = _extractCutOffFromElement(elements, raw_json) - if not cut_off_element: - cut_off_element = json.dumps(elements) - - # If we couldn't extract from parsed structure, extract from raw JSON - if not cut_off_element: - # Extract the last incomplete part from raw JSON - # Find the last incomplete string/number/array - # re is already imported at module level - # Look for incomplete string at the end - incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL) - if incomplete_match: - cut_off_element = incomplete_match.group(1) - else: - # Look for incomplete number - number_match = re.search(r'(\d+\.?\d*)(?:\s*[,}\]]|$)', raw_json[-200:]) - if number_match: - cut_off_element = number_match.group(1) - - return cut_off_element, element_before_cutoff - - -def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optional[str]: - """ - Extract cut-off point from within an element (e.g., code_block with JSON string, table with incomplete rows). - - This helps identify where exactly to continue within nested structures. - """ - # re is already imported at module level - - # Check for code_block with nested JSON - if "code" in element: - code_content = element.get("code", "") - if isinstance(code_content, str) and code_content.strip().startswith("{"): - # This is JSON inside a code string - find where it was cut off - # Look for the last complete value in the raw JSON - # Find the code string in raw JSON - code_match = re.search(r'"code"\s*:\s*"([^"]*?)(?:"|$)', raw_json[-2000:], re.DOTALL) - if code_match: - code_str = code_match.group(1) - # Try to find the last complete value in the JSON string - # Look for patterns like: [2, 3, 5, ... 17929, (cut off here) - array_match = re.search(r'\[([^\]]*?)(?:\]|$)', code_str, re.DOTALL) - if array_match: - array_content = array_match.group(1) - # Find last complete number/item - # Match: number followed by comma or end - last_complete = re.findall(r'(\d+)\s*[,]', array_content) - if last_complete: - last_num = last_complete[-1] - # Return context showing where to continue - return f'{{"code": "{{\\"primes\\": [... up to {last_num}, ]"}}' - - # Check for table with incomplete rows - if "rows" in element: - rows = element.get("rows", []) - if isinstance(rows, list) and rows: - # Find last complete row in raw JSON - rows_str = str(rows) - # Try to find where rows were cut off - last_row_match = re.search(r'\[([^\]]*?)(?:\]|$)', raw_json[-1000:], re.DOTALL) - if last_row_match: - return f'{{"rows": [... last complete row shown above, ]}}' - - # Check for list items - if "items" in element: - items = element.get("items", []) - if isinstance(items, list) and items: - # Find last complete item - last_item_match = re.search(r'"([^"]*?)"\s*(?:,|\])', raw_json[-1000:], re.DOTALL) - if last_item_match: - return f'{{"items": [... last item shown above, ]}}' - - return None - - -def _extractCutOffElementsFromRaw(raw_json: str, allSections: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]: - """ - Extract cut-off element directly from raw JSON when section parsing fails. - - This handles ALL cases where JSON is cut off: - - Mid-element (incomplete element object) - - Mid-string/number within an element - - Mid-array within an element (e.g., rows in table, items in list) - - Mid-nested structure - - CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number - deliver as-is. - """ - cut_off_element = None - element_before_cutoff = None - - try: - # Find the last "elements" array in raw JSON - if '"elements"' in raw_json: - # Find the last occurrence of "elements" - last_elements_pos = raw_json.rfind('"elements"') - if last_elements_pos >= 0: - elements_section = raw_json[last_elements_pos:] - - # Find the array start '[' - array_start = elements_section.find('[') - if array_start >= 0: - # Use a simpler approach: find all element objects by tracking braces - # This works even if elements contain nested arrays/objects - element_strings = [] - depth = 0 - in_string = False - escape_next = False - elem_start = None - - for i in range(array_start, len(elements_section)): - char = elements_section[i] - - # Track string state (ignore brackets/braces inside strings) - if escape_next: - escape_next = False - continue - if char == '\\': - escape_next = True - continue - if char == '"' and not escape_next: - in_string = not in_string - continue - - if not in_string: - if char == '{': - if depth == 0: - elem_start = i - depth += 1 - elif char == '}': - depth -= 1 - if depth == 0 and elem_start is not None: - # Found complete element (all braces closed, even if nested arrays are incomplete) - elem_str = elements_section[elem_start:i+1] - element_strings.append(elem_str) - elem_start = None - - # Now analyze what we found - if element_strings: - last_elem = element_strings[-1] - last_complete_pos = elements_section.rfind('}') - - # Check if there's content after the last complete element - if last_complete_pos >= 0: - remaining = elements_section[last_complete_pos+1:].strip() - remaining_clean = remaining.lstrip(',').strip().lstrip(']').strip() - - # Case 1: Incomplete element after last complete one - if remaining_clean and not remaining_clean.startswith(']'): - incomplete_start = last_complete_pos + 1 - while incomplete_start < len(elements_section) and elements_section[incomplete_start] in ' \n\t\r,': - incomplete_start += 1 - - if incomplete_start < len(elements_section): - incomplete_elem_str = elements_section[incomplete_start:].strip() - incomplete_elem_str = incomplete_elem_str.rstrip(']').rstrip('}').rstrip() - cut_off_element = incomplete_elem_str - element_before_cutoff = element_strings[-1] - - # Case 2: Last element itself is incomplete (cut off in nested structure like rows, items, etc.) - else: - # Check if JSON is incomplete by analyzing structure - # Count unclosed brackets/braces in elements section (ignoring strings) - elements_section_braces = 0 - elements_section_brackets = 0 - in_str = False - esc = False - - for char in elements_section: - if esc: - esc = False - continue - if char == '\\': - esc = True - continue - if char == '"': - in_str = not in_str - continue - if not in_str: - if char == '{': - elements_section_braces += 1 - elif char == '}': - elements_section_braces -= 1 - elif char == '[': - elements_section_brackets += 1 - elif char == ']': - elements_section_brackets -= 1 - - # Also check raw JSON for unclosed structures - raw_braces = 0 - raw_brackets = 0 - in_str = False - esc = False - - for char in raw_json: - if esc: - esc = False - continue - if char == '\\': - esc = True - continue - if char == '"': - in_str = not in_str - continue - if not in_str: - if char == '{': - raw_braces += 1 - elif char == '}': - raw_braces -= 1 - elif char == '[': - raw_brackets += 1 - elif char == ']': - raw_brackets -= 1 - - # Check if last element can be parsed - last_elem_parsable = False - try: - json.loads(last_elem) - last_elem_parsable = True - except: - pass - - # Determine if last element is incomplete - is_incomplete = False - - # If there are unclosed structures, element is incomplete - if elements_section_brackets > 0 or elements_section_braces > 0 or raw_brackets > 0 or raw_braces > 0: - is_incomplete = True - - # If element cannot be parsed, it's incomplete - elif not last_elem_parsable: - is_incomplete = True - - # Check if JSON ends mid-element by finding where element ends in raw JSON - elif last_elem_parsable: - # Find where this element ends in the raw JSON - elem_end_marker = last_elem[-100:] if len(last_elem) > 100 else last_elem - elem_end_in_raw = raw_json.rfind(elem_end_marker) - - if elem_end_in_raw >= 0: - actual_elem_end = elem_end_in_raw + len(last_elem) - - if actual_elem_end < len(raw_json): - remaining_after_elem = raw_json[actual_elem_end:].strip() - remaining_clean = remaining_after_elem.lstrip(',').strip() - - # If there's unexpected content, element is incomplete - if remaining_clean and not remaining_clean.startswith(']'): - is_incomplete = True - - if is_incomplete: - cut_off_element = last_elem - if len(element_strings) >= 2: - element_before_cutoff = element_strings[-2] - elif len(element_strings) == 1: - element_before_cutoff = last_elem - - # Case 3: No complete elements found, but there's an incomplete one - elif elem_start is not None: - # There's an incomplete element that hasn't been closed - incomplete_elem_str = elements_section[elem_start:].strip() - cut_off_element = incomplete_elem_str - # No element before (this is the first/only element) - element_before_cutoff = None - except Exception as e: - logger.debug(f"Error extracting cut-off elements from raw JSON: {e}") - - return cut_off_element, element_before_cutoff - - def parseJsonWithModel(jsonString: str, modelClass: Type[T]) -> T: """ Parse JSON string using Pydantic model with error handling. diff --git a/tests/functional/test12_json_split_merge.py b/tests/functional/test12_json_split_merge.py new file mode 100644 index 00000000..b36b93f2 --- /dev/null +++ b/tests/functional/test12_json_split_merge.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +JSON Split and Merge Test 12 - Tests JSON splitting and merging using workflow tools +Tests random splitting of JSON files into 3 parts and merging them back using ModularJsonMerger. +""" + +import asyncio +import json +import sys +import os +import time +import random +from typing import Dict, Any, List, Optional, Tuple + +# Add the gateway to path (go up 2 levels from tests/functional/) +_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _gateway_path not in sys.path: + sys.path.insert(0, _gateway_path) + +# Import JSON merger from workflow tools +from modules.services.serviceAi.subJsonMerger import ModularJsonMerger, JsonMergeLogger +from modules.shared.jsonContinuation import getContexts + + +class JsonSplitMergeTester12: + def __init__(self): + self.testResults = {} + self.testJsonFiles = [] + self.logBuffer = [] + self.logFile = None + + def createTestJsonFiles(self) -> List[Dict[str, Any]]: + """Create various test JSON files with different structures.""" + testFiles = [ + { + "name": "config.json", + "data": { + "application": "Customer Manager", + "version": "1.0.0", + "database": { + "host": "localhost", + "port": 5432, + "name": "customers_db" + }, + "api": { + "baseUrl": "https://api.example.com", + "timeout": 30 + } + } + }, + { + "name": "customers.json", + "data": { + "customers": [ + {"id": 1, "name": "John Doe", "email": "john@example.com", "phone": "+1234567890", "address": "123 Main St"}, + {"id": 2, "name": "Jane Smith", "email": "jane@example.com", "phone": "+0987654321", "address": "456 Oak Ave"}, + {"id": 3, "name": "Bob Johnson", "email": "bob@example.com", "phone": "+1122334455", "address": "789 Pine Rd"}, + {"id": 4, "name": "Alice Williams", "email": "alice@example.com", "phone": "+5566778899", "address": "321 Elm St"}, + {"id": 5, "name": "Charlie Brown", "email": "charlie@example.com", "phone": "+9988776655", "address": "654 Maple Dr"} + ] + } + }, + { + "name": "settings.json", + "data": { + "theme": { + "darkMode": True, + "fontSize": 14, + "language": "en" + }, + "notifications": { + "email": True, + "sms": False, + "push": True + }, + "features": { + "enableAnalytics": True, + "enableReports": False + } + } + }, + { + "name": "products.json", + "data": { + "products": [ + {"id": "P001", "name": "Product A", "price": 29.99, "category": "Electronics", "inStock": True}, + {"id": "P002", "name": "Product B", "price": 49.99, "category": "Clothing", "inStock": True}, + {"id": "P003", "name": "Product C", "price": 19.99, "category": "Books", "inStock": False}, + {"id": "P004", "name": "Product D", "price": 99.99, "category": "Electronics", "inStock": True}, + {"id": "P005", "name": "Product E", "price": 14.99, "category": "Books", "inStock": True}, + {"id": "P006", "name": "Product F", "price": 79.99, "category": "Clothing", "inStock": True} + ] + } + }, + { + "name": "document_structure.json", + "data": { + "metadata": { + "title": "Test Document", + "author": "Test Author", + "date": "2025-01-05" + }, + "documents": [ + { + "id": "doc1", + "title": "Document 1", + "sections": [ + { + "id": "sec1", + "content_type": "heading", + "elements": [ + {"type": "heading", "content": {"text": "Introduction", "level": 1}} + ] + }, + { + "id": "sec2", + "content_type": "paragraph", + "elements": [ + {"type": "paragraph", "content": {"text": "This is a test paragraph."}} + ] + } + ] + } + ] + } + }, + { + "name": "large_customers.json", + "data": self._createLargeCustomersData() + }, + { + "name": "large_products.json", + "data": self._createLargeProductsData() + }, + { + "name": "large_documents.json", + "data": self._createLargeDocumentsData() + } + ] + + return testFiles + + def _createLargeCustomersData(self) -> Dict[str, Any]: + """Create a large customers dataset for budget testing.""" + customers = [] + # Create 100 customers with long descriptions + for i in range(100): + customers.append({ + "id": i + 1, + "name": f"Customer {i + 1}", + "email": f"customer{i+1}@example.com", + "phone": f"+1{5550000000 + i}", + "address": f"{100 + i} Main Street, City {i % 10}, State {i % 5}, ZIP {10000 + i}", + "description": f"This is a detailed description for customer {i + 1}. " * 10 + + f"They have been a loyal customer since {2000 + (i % 25)}. " + + f"Their preferences include various products and services. " * 5, + "orders": [ + { + "orderId": f"ORD-{i+1}-{j+1}", + "date": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}", + "total": round(100.0 + (i * 10) + (j * 5), 2), + "items": [ + { + "productId": f"PROD-{k+1}", + "quantity": (k % 5) + 1, + "price": round(10.0 + k * 2, 2) + } + for k in range(3) + ] + } + for j in range(5) + ], + "metadata": { + "created": f"2020-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}", + "lastLogin": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}", + "tags": [f"tag-{i % 10}", f"category-{i % 5}", f"segment-{i % 3}"] + } + }) + return {"customers": customers} + + def _createLargeProductsData(self) -> Dict[str, Any]: + """Create a large products dataset for budget testing.""" + products = [] + # Create 200 products with detailed information + categories = ["Electronics", "Clothing", "Books", "Home & Garden", "Sports", "Toys", "Automotive", "Health"] + for i in range(200): + category = categories[i % len(categories)] + products.append({ + "id": f"PROD-{i+1:04d}", + "name": f"Product {i+1} - {category}", + "category": category, + "price": round(10.0 + (i * 2.5), 2), + "cost": round(5.0 + (i * 1.5), 2), + "inStock": i % 3 != 0, + "stockQuantity": (i % 100) * 10, + "description": f"This is a comprehensive product description for Product {i+1}. " * 15 + + f"It belongs to the {category} category and offers excellent value. " * 10 + + f"Features include: feature-1, feature-2, feature-3, and many more. " * 5, + "specifications": { + "weight": f"{1.0 + (i % 10)} kg", + "dimensions": f"{10 + (i % 20)}x{5 + (i % 15)}x{3 + (i % 10)} cm", + "color": ["red", "blue", "green", "black", "white"][i % 5], + "material": ["plastic", "metal", "wood", "fabric"][i % 4], + "warranty": f"{1 + (i % 5)} years" + }, + "reviews": [ + { + "userId": f"USER-{j+1}", + "rating": (j % 5) + 1, + "comment": f"Review {j+1} for product {i+1}: " + "This is a detailed review comment. " * 10, + "date": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}" + } + for j in range(3) + ], + "relatedProducts": [f"PROD-{k+1:04d}" for k in range(max(0, i-2), min(200, i+3)) if k != i] + }) + return {"products": products} + + def _createLargeDocumentsData(self) -> Dict[str, Any]: + """Create a large documents dataset for budget testing.""" + documents = [] + # Create 50 documents with nested structures + for i in range(50): + sections = [] + for j in range(10): + elements = [] + for k in range(5): + if k % 2 == 0: + elements.append({ + "type": "heading", + "level": (k % 3) + 1, + "content": { + "text": f"Section {j+1} Heading {k+1} for Document {i+1}", + "style": "bold" + } + }) + else: + elements.append({ + "type": "paragraph", + "content": { + "text": f"This is paragraph {k+1} in section {j+1} of document {i+1}. " * 20 + + f"It contains detailed information about various topics. " * 15 + + f"The content is structured and well-organized. " * 10 + } + }) + + sections.append({ + "id": f"sec-{i+1}-{j+1}", + "title": f"Section {j+1}", + "content_type": "mixed", + "elements": elements, + "metadata": { + "created": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}", + "modified": f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}", + "author": f"Author-{(i % 10) + 1}", + "tags": [f"tag-{j % 10}", f"category-{i % 5}"] + } + }) + + documents.append({ + "id": f"doc-{i+1:03d}", + "title": f"Document {i+1} - Comprehensive Report", + "description": f"This is a comprehensive document with detailed information. " * 30 + + f"It covers multiple topics and sections. " * 20 + + f"The content is extensive and well-structured. " * 15, + "sections": sections, + "metadata": { + "created": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}", + "modified": f"2024-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}", + "author": f"Author-{(i % 10) + 1}", + "version": f"1.{(i % 10)}", + "status": ["draft", "review", "published"][i % 3], + "tags": [f"tag-{i % 20}" for _ in range(5)] + } + }) + + return { + "metadata": { + "title": "Large Document Collection", + "description": "A collection of 50 comprehensive documents for testing budget limits", + "totalDocuments": 50, + "created": "2024-01-01" + }, + "documents": documents + } + + def splitJsonRandomly(self, jsonString: str, numParts: int = 3) -> List[str]: + """ + Split JSON string randomly into specified number of parts. + Simulates real AI response cuts - can split anywhere, even in the middle of strings/numbers/structures. + This is the REAL scenario: AI response gets cut off randomly, not at convenient points. + """ + if numParts < 2: + return [jsonString] + + jsonLength = len(jsonString) + + # Generate truly random split points - can be anywhere! + # Only ensure minimum part size to avoid empty parts + minPartSize = max(10, jsonLength // (numParts * 3)) # Smaller minimum to allow more randomness + + splitPoints = [] + for _ in range(numParts - 1): + # Generate random point - can be anywhere in the string + # Only ensure we don't create parts smaller than minimum + minPoint = len(splitPoints) * minPartSize if splitPoints else minPartSize + maxPoint = jsonLength - (numParts - len(splitPoints) - 1) * minPartSize + + if maxPoint <= minPoint: + # If we can't avoid minimum size, just use the boundary + splitPoint = minPoint + else: + # Truly random point - can be in the middle of anything! + splitPoint = random.randint(minPoint, maxPoint) + + splitPoints.append(splitPoint) + + splitPoints.sort() + + # Create parts - these can be cut anywhere, even mid-string, mid-number, etc. + parts = [] + start = 0 + for splitPoint in splitPoints: + parts.append(jsonString[start:splitPoint]) + start = splitPoint + parts.append(jsonString[start:]) # Last part + + return parts + + def _log(self, message: str): + """Add message to log buffer.""" + self.logBuffer.append(message) + print(message) + + + + def normalizeJson(self, jsonString: str) -> Optional[Dict[str, Any]]: + """Normalize JSON string by parsing and re-serializing. Returns None if parsing fails.""" + try: + parsed = json.loads(jsonString) + return parsed + except json.JSONDecodeError: + # Try to close incomplete JSON structures + try: + from modules.shared.jsonUtils import closeJsonStructures, tryParseJson + closed = closeJsonStructures(jsonString) + parsed, error, _ = tryParseJson(closed) + if error is None and parsed is not None: + return parsed + except Exception: + pass + # Return None if all parsing attempts fail + return None + + def compareJson(self, original: Dict[str, Any], merged: Dict[str, Any]) -> Dict[str, Any]: + """Compare original and merged JSON structures.""" + originalStr = json.dumps(original, sort_keys=True, indent=2) + mergedStr = json.dumps(merged, sort_keys=True, indent=2) + + exactMatch = originalStr == mergedStr + + # Deep comparison + differences = [] + self._findDifferences(original, merged, "", differences) + + return { + "exactMatch": exactMatch, + "differences": differences, + "originalSize": len(originalStr), + "mergedSize": len(mergedStr), + "sizeMatch": len(originalStr) == len(mergedStr) + } + + def _findDifferences(self, obj1: Any, obj2: Any, path: str, differences: List[str]): + """Recursively find differences between two JSON objects.""" + if type(obj1) != type(obj2): + differences.append(f"{path}: Type mismatch - {type(obj1).__name__} vs {type(obj2).__name__}") + return + + if isinstance(obj1, dict): + allKeys = set(obj1.keys()) | set(obj2.keys()) + for key in allKeys: + newPath = f"{path}.{key}" if path else key + if key not in obj1: + differences.append(f"{newPath}: Missing in original") + elif key not in obj2: + differences.append(f"{newPath}: Missing in merged") + else: + self._findDifferences(obj1[key], obj2[key], newPath, differences) + elif isinstance(obj1, list): + if len(obj1) != len(obj2): + differences.append(f"{path}: Length mismatch - {len(obj1)} vs {len(obj2)}") + else: + for i, (item1, item2) in enumerate(zip(obj1, obj2)): + newPath = f"{path}[{i}]" + self._findDifferences(item1, item2, newPath, differences) + else: + if obj1 != obj2: + differences.append(f"{path}: Value mismatch - {obj1} vs {obj2}") + + async def testJsonSplitMerge(self, jsonFile: Dict[str, Any]) -> Dict[str, Any]: + """Test splitting and merging a single JSON file.""" + fileName = jsonFile["name"] + originalData = jsonFile["data"] + + self._log("") + self._log("="*80) + self._log(f"TESTING JSON SPLIT AND MERGE: {fileName}") + self._log("="*80) + + # Convert to JSON string + originalJsonString = json.dumps(originalData, indent=2, ensure_ascii=False) + originalSize = len(originalJsonString) + + # Log original JSON + self._log("") + self._log("="*80) + self._log("ORIGINAL JSON") + self._log("="*80) + self._log(f"JSON length: {originalSize} characters") + self._log("") + self._log("Full JSON content:") + self._log("-"*80) + jsonLines = originalJsonString.split('\n') + if len(jsonLines) > 50: + for line in jsonLines[:25]: + self._log(line) + self._log(f"... ({len(jsonLines) - 50} lines omitted) ...") + for line in jsonLines[-25:]: + self._log(line) + else: + for line in jsonLines: + self._log(line) + + # Split JSON at random position (simulating AI response cut) + self._log("") + self._log("="*80) + self._log("SPLITTING JSON AT RANDOM POSITION (SIMULATING AI RESPONSE CUT)") + self._log("="*80) + + # Find random cut position (not at start or end) + import random + minCutPos = max(100, originalSize // 10) # At least 10% from start + maxCutPos = min(originalSize - 100, originalSize * 9 // 10) # At least 10% from end + cutPosition = random.randint(minCutPos, maxCutPos) + + # Get part from start to cut + partContent = originalJsonString[:cutPosition] + + self._log("") + self._log("="*80) + self._log("PART (from start to cut):") + self._log("="*80) + self._log(f"Cut position: {cutPosition} characters") + self._log(f"Part length: {len(partContent)} characters") + self._log("") + self._log("Part content:") + partLines = partContent.split('\n') + if len(partLines) > 30: + for line in partLines[:15]: + self._log(f" {line}") + self._log(f" ... ({len(partLines) - 30} lines omitted) ...") + for line in partLines[-15:]: + self._log(f" {line}") + else: + for line in partLines: + self._log(f" {line}") + + # Generate contexts using getContexts() + self._log("") + self._log("="*80) + self._log("GENERATING CONTINUATION CONTEXTS") + self._log("="*80) + + contexts = getContexts(partContent) + + # Log overlap context + self._log("") + self._log("="*80) + self._log("OVERLAP CONTEXT (for merging):") + self._log("="*80) + overlapLines = contexts.overlapContext.split('\n') + if len(overlapLines) > 30: + for line in overlapLines[:15]: + self._log(f" {line}") + self._log(f" ... ({len(overlapLines) - 30} lines omitted) ...") + for line in overlapLines[-15:]: + self._log(f" {line}") + else: + for line in overlapLines: + self._log(f" {line}") + + # Log hierarchy context + self._log("") + self._log("="*80) + self._log("HIERARCHY CONTEXT (with budget logic):") + self._log("="*80) + hierarchyLines = contexts.hierarchyContext.split('\n') + if len(hierarchyLines) > 30: + for line in hierarchyLines[:15]: + self._log(f" {line}") + self._log(f" ... ({len(hierarchyLines) - 30} lines omitted) ...") + for line in hierarchyLines[-15:]: + self._log(f" {line}") + else: + for line in hierarchyLines: + self._log(f" {line}") + + # Test completePart as valid JSON + self._log("") + self._log("="*80) + self._log("COMPLETE PART (should be valid JSON):") + self._log("="*80) + completeLines = contexts.completePart.split('\n') + if len(completeLines) > 30: + for line in completeLines[:15]: + self._log(f" {line}") + self._log(f" ... ({len(completeLines) - 30} lines omitted) ...") + for line in completeLines[-15:]: + self._log(f" {line}") + else: + for line in completeLines: + self._log(f" {line}") + + # Validate completePart as JSON + self._log("") + self._log("="*80) + self._log("VALIDATING COMPLETE PART AS JSON:") + self._log("="*80) + + isValidJson = False + parsedCompletePart = None + jsonError = None + + try: + parsedCompletePart = json.loads(contexts.completePart) + isValidJson = True + self._log(" ✅ completePart is valid JSON") + self._log(f" Parsed type: {type(parsedCompletePart).__name__}") + + # Compare with original if possible + if isinstance(parsedCompletePart, dict) and isinstance(originalData, dict): + comparison = self.compareJson(originalData, parsedCompletePart) + self._log(f" Comparison with original:") + self._log(f" Exact match: {comparison['exactMatch']}") + self._log(f" Size match: {comparison['sizeMatch']}") + if comparison['differences']: + self._log(f" Differences found: {len(comparison['differences'])}") + for diff in comparison['differences'][:10]: # Show first 10 differences + self._log(f" - {diff}") + if len(comparison['differences']) > 10: + self._log(f" ... ({len(comparison['differences']) - 10} more differences)") + else: + self._log(" No differences found") + elif isinstance(parsedCompletePart, list) and isinstance(originalData, list): + self._log(f" Both are lists: original={len(originalData)} items, completePart={len(parsedCompletePart)} items") + else: + self._log(f" Different types: original={type(originalData).__name__}, completePart={type(parsedCompletePart).__name__}") + + except json.JSONDecodeError as e: + isValidJson = False + jsonError = str(e) + self._log(f" ❌ completePart is NOT valid JSON") + self._log(f" Error: {jsonError}") + self._log(f" Error position: line {e.lineno}, column {e.colno}") + + # Return test results + return { + "success": isValidJson, + "fileName": fileName, + "originalSize": originalSize, + "cutPosition": cutPosition, + "partSize": len(partContent), + "overlapContextSize": len(contexts.overlapContext), + "hierarchyContextSize": len(contexts.hierarchyContext), + "completePartSize": len(contexts.completePart), + "isValidJson": isValidJson, + "jsonError": jsonError, + "parsedCompletePart": parsedCompletePart is not None + } + + async def testAllJsonFiles(self) -> Dict[str, Any]: + """Test splitting and merging all test JSON files.""" + print("\n" + "="*80) + print("TESTING JSON SPLIT AND MERGE") + print("="*80) + + testFiles = self.createTestJsonFiles() + results = {} + + for jsonFile in testFiles: + try: + result = await self.testJsonSplitMerge(jsonFile) + results[jsonFile["name"]] = result + + # Small delay between tests + await asyncio.sleep(0.5) + + except Exception as e: + import traceback + print(f"\n❌ Error testing {jsonFile['name']}: {str(e)}") + print(traceback.format_exc()) + results[jsonFile["name"]] = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + + return results + + def _writeLogFile(self): + """Write log buffer to file.""" + logDir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "debug") + os.makedirs(logDir, exist_ok=True) + logFilePath = os.path.join(logDir, "test12_json_split_merge_results.txt") + + with open(logFilePath, 'w', encoding='utf-8') as f: + f.write('\n'.join(self.logBuffer)) + + self.logFile = logFilePath + print(f"\n📝 Detailed log written to: {logFilePath}") + + async def runTest(self): + """Run the complete test.""" + self._log("="*80) + self._log("JSON SPLIT AND MERGE TEST 12") + self._log("="*80) + + try: + # Test all JSON files + results = await self.testAllJsonFiles() + + # Write log file + self._writeLogFile() + + # Summary + print("\n" + "="*80) + print("TEST SUMMARY") + print("="*80) + + successCount = 0 + + for fileName, result in results.items(): + if result.get("success"): + successCount += 1 + isValidJson = result.get("isValidJson", False) + if isValidJson: + print(f"✅ {fileName:30s}: Valid JSON - completePart parsed successfully") + else: + jsonError = result.get("jsonError", "Unknown error") + print(f"⚠️ {fileName:30s}: Contexts generated but completePart is not valid JSON - {jsonError}") + else: + error = result.get("error", "Unknown error") + print(f"❌ {fileName:30s}: FAILED - {error}") + + print(f"\nResults: {successCount}/{len(results)} successful") + + self.testResults = { + "success": successCount == len(results), + "totalFiles": len(results), + "successCount": successCount, + "results": results + } + + return self.testResults + + except Exception as e: + import traceback + print(f"\n❌ Test failed with error: {type(e).__name__}: {str(e)}") + print(f"Traceback:\n{traceback.format_exc()}") + self.testResults = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + return self.testResults + + +async def main(): + """Run JSON split and merge test 12.""" + tester = JsonSplitMergeTester12() + results = await tester.runTest() + + # Print final results as JSON for easy parsing + print("\n" + "="*80) + print("FINAL RESULTS (JSON)") + print("="*80) + print(json.dumps(results, indent=2, default=str)) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/test_overlap_context.py b/tests/test_overlap_context.py deleted file mode 100644 index 1a8b9f7b..00000000 --- a/tests/test_overlap_context.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Test function to verify structure hierarchy and overlap context generation. -Tests the functions used to generate continuation prompts for incomplete JSON. -""" - -import json -import os -from pathlib import Path - - -def testOverlapContext(): - """ - Test function that loads two JSON parts and returns: - 1. Structure hierarchy result - 2. Overlap requirement context result - """ - # Load the JSON file (incomplete/cut JSON) - basePath = Path(__file__).parent.parent.parent / "local" / "debug" / "prompts" - - file1Path = basePath / "20260104-220716-032-chapter_2_section_section_2_response.txt" - - # Read JSON (incomplete) - with open(file1Path, 'r', encoding='utf-8') as f: - json1Content = f.read().strip() - - # Find the break position in json1 (where it was cut) - # The last line in json1 is incomplete: [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039 - # We need to find where this incomplete array element ends (right after the last number) - # Find the last number in the file - that's where the content actually ends - import re - # Find all numbers at the end and get the position of the last one - # Look for the pattern: number followed by whitespace/newline or end of string - matches = list(re.finditer(r'\d+', json1Content)) - if matches: - lastMatch = matches[-1] - # Break position is right after the last number (where the closing ] should be) - breakPosition = lastMatch.end() - else: - # Fallback: use end of file - breakPosition = len(json1Content.rstrip()) - - print(f"Break position determined: {breakPosition}") - print(f"Content at break position: '{json1Content[max(0, breakPosition-50):breakPosition+10]}'") - - # Import the functions we need to test - import sys - sys.path.insert(0, str(Path(__file__).parent.parent)) - - from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece, buildIncompleteContext - from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath - - # Test 1: Find structure hierarchy - print("=" * 80) - print("TEST 1: Structure Hierarchy") - print("=" * 80) - print(f"Break position: {breakPosition}") - print(f"JSON length: {len(json1Content)}") - print(f"Content around break: '{json1Content[max(0, breakPosition-100):breakPosition+20]}'") - hierarchy = findStructureHierarchy(json1Content, breakPosition) - print(f"\nHierarchy levels found: {len(hierarchy) if hierarchy else 0}") - if not hierarchy: - print("WARNING: No hierarchy found! This suggests the function isn't working correctly.") - else: - print("\nHierarchy details (from root to cut level):") - for i, level in enumerate(hierarchy): - levelType = level['type'] - levelKey = level.get('key', 'N/A') - levelDepth = level['depth'] - levelStart = level['start_pos'] - levelEnd = level['end_pos'] - print(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}") - # Show a snippet of content at this level - if levelStart < len(json1Content): - snippet = json1Content[levelStart:min(levelStart + 50, levelEnd, len(json1Content))] - print(f" Content: {repr(snippet)}") - - # Test 2: Extract cut piece - print("\n" + "=" * 80) - print("TEST 2: Extract Cut Piece") - print("=" * 80) - cutPiece = extractCutPiece(json1Content, breakPosition) - print(f"\nCut piece extracted (length: {len(cutPiece)}):") - if cutPiece: - print(cutPiece[:500] if len(cutPiece) > 500 else cutPiece) - else: - print("WARNING: Cut piece is empty! This suggests the function isn't working correctly.") - # Try to manually find the cut piece - # Look backwards from break position for the start of the incomplete array - i = breakPosition - 1 - while i >= 0 and json1Content[i] not in ['[', ',', '\n']: - i -= 1 - if i >= 0 and json1Content[i] == '[': - manualCutPiece = json1Content[i:breakPosition] - print(f"\nManually found cut piece: {manualCutPiece[:200]}") - - # Test 3: Build incomplete context (structure hierarchy with cut point) - print("\n" + "=" * 80) - print("TEST 3: Build Incomplete Context (Structure Hierarchy with Cut Point)") - print("=" * 80) - print("Expected: Should show complete hierarchy from root to cut point") - print(" with complete elements before cut and cut piece marked") - incompleteContext = buildIncompleteContext(json1Content, breakPosition) - print(f"\nIncomplete context (length: {len(incompleteContext)} chars):") - print("-" * 80) - print(incompleteContext) - print("-" * 80) - - # Validate the output - if incompleteContext: - # Check if it shows hierarchy (should have multiple levels of indentation) - lines = incompleteContext.split('\n') - indentLevels = set() - for line in lines: - if line.strip(): - indent = len(line) - len(line.lstrip()) - indentLevels.add(indent) - print(f"\nValidation: Found {len(indentLevels)} different indent levels (should be > 1 for hierarchy)") - - # Check if cut point is marked - if "<-- CUT POINT" in incompleteContext: - print("Validation: Cut point marker found ✓") - else: - print("Validation: WARNING - Cut point marker NOT found!") - - # Check if root structure is shown - if incompleteContext.strip().startswith('{') or incompleteContext.strip().startswith('['): - print("Validation: Root structure opening found ✓") - else: - print("Validation: WARNING - Root structure opening NOT found!") - else: - print("WARNING: Incomplete context is empty!") - - # Test 4: Extract overlap context (cut part and full part before same level) - print("\n" + "=" * 80) - print("TEST 4: Extract Overlap Context (Cut Part + Full Part Before Same Level)") - print("=" * 80) - overlapContext = CodeGenerationPath._extractOverlapContext(json1Content, breakPosition) - print(f"\nOverlap context:") - print(overlapContext) - - # Return results as dictionary - results = { - "hierarchy": hierarchy, - "cutPiece": cutPiece, - "incompleteContext": incompleteContext, - "overlapContext": overlapContext, - "breakPosition": breakPosition, - "json1Length": len(json1Content), - "json1Content": json1Content - } - - return results - - -if __name__ == "__main__": - print("Testing Overlap Context Generation") - print("=" * 80) - results = testOverlapContext() - - print("\n" + "=" * 80) - print("SUMMARY") - print("=" * 80) - print(f"\nBreak position: {results['breakPosition']}") - print(f"JSON1 length: {results['json1Length']}") - print(f"Hierarchy levels: {len(results['hierarchy']) if results['hierarchy'] else 0}") - print(f"Cut piece length: {len(results['cutPiece'])}") - print(f"Incomplete context length: {len(results['incompleteContext'])}") - print(f"Overlap context length: {len(results['overlapContext'])}") - - # Save results to file for inspection - outputPath = Path(__file__).parent.parent.parent / "local" / "debug" / "test_overlap_results.txt" - outputPath.parent.mkdir(parents=True, exist_ok=True) - - with open(outputPath, 'w', encoding='utf-8') as f: - f.write("=" * 80 + "\n") - f.write("OVERLAP CONTEXT TEST RESULTS\n") - f.write("=" * 80 + "\n\n") - - f.write("FIRST JSON (CUT/INCOMPLETE):\n") - f.write("-" * 80 + "\n") - f.write(f"Break position: {results['breakPosition']}\n") - f.write(f"JSON length: {results['json1Length']}\n") - json1Content = results['json1Content'] - f.write(f"Content around break: '{json1Content[max(0, results['breakPosition']-100):results['breakPosition']+20]}'\n\n") - f.write("Full JSON1 content:\n") - f.write(json1Content) - - f.write("\n\n" + "=" * 80 + "\n") - f.write("STRUCTURE HIERARCHY:\n") - f.write("-" * 80 + "\n") - if results['hierarchy']: - f.write(f"Hierarchy levels found: {len(results['hierarchy'])}\n\n") - f.write("Hierarchy details (from root to cut level):\n") - for i, level in enumerate(results['hierarchy']): - levelType = level['type'] - levelKey = level.get('key', 'N/A') - levelDepth = level['depth'] - levelStart = level['start_pos'] - levelEnd = level['end_pos'] - f.write(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}\n") - else: - f.write("No hierarchy found\n") - - f.write("\n\n" + "=" * 80 + "\n") - f.write("INCOMPLETE CONTEXT (Structure Hierarchy with Cut Point):\n") - f.write("-" * 80 + "\n") - f.write(results['incompleteContext']) - - f.write("\n\n" + "=" * 80 + "\n") - f.write("OVERLAP CONTEXT (Object containing the cut element):\n") - f.write("-" * 80 + "\n") - f.write(results['overlapContext']) - - print(f"\n\nFull results saved to: {outputPath}")