gateway/modules/services/serviceAi/subJsonResponseHandling.py
2025-12-15 21:55:26 +01:00

1528 lines
68 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
JSON Response Handling Module
Handles merging of JSON responses from multiple AI iterations, including:
- Section merging with intelligent overlap detection
- JSON fragment detection and merging
- Deep recursive structure merging
- Overlap detection for complex nested structures
- String accumulation for iterative JSON generation
"""
import json
import logging
import re
from typing import Dict, Any, List, Optional, Tuple
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
from modules.datamodels.datamodelAi import JsonAccumulationState
logger = logging.getLogger(__name__)
class JsonResponseHandler:
"""Handles JSON response merging and fragment detection for iterative AI generation."""
@staticmethod
def mergeSectionsIntelligently(
existingSections: List[Dict[str, Any]],
newSections: List[Dict[str, Any]],
iteration: int
) -> List[Dict[str, Any]]:
"""
Intelligently merge sections from multiple iterations.
This is a GENERIC merging strategy that handles broken JSON iterations.
The break can occur anywhere - in any section, at any depth.
Merging strategies (in order of priority):
1. Same Section ID: Merge sections with identical IDs
2. Same Content-Type + Position: If last section is incomplete and new section continues it
3. Same Order: Merge sections with same order value
4. Structural Analysis: Detect continuation based on content structure
Args:
existingSections: Sections accumulated from previous iterations
newSections: Sections extracted from current iteration
iteration: Current iteration number
Returns:
Merged list of sections
"""
if not newSections:
return existingSections
if not existingSections:
return newSections
mergedSections = existingSections.copy()
for newSection in newSections:
merged = False
# Strategy 1: Same Section ID - merge directly
newSectionId = newSection.get("id")
if newSectionId:
for i, existingSection in enumerate(mergedSections):
if existingSection.get("id") == newSectionId:
# Merge sections with same ID
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
existingSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
break
if merged:
continue
# Strategy 2: Same Content-Type + Position (continuation detection)
# Check if last section is incomplete and new section continues it
if mergedSections:
lastSection = mergedSections[-1]
lastContentType = lastSection.get("content_type")
newContentType = newSection.get("content_type")
if lastContentType == newContentType:
# Same content type - check if last section is incomplete
if JsonResponseHandler.isSectionIncomplete(lastSection):
# Last section is incomplete, merge with new section
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
lastSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
continue
# Strategy 3: Same Order value
newOrder = newSection.get("order")
if newOrder is not None:
for i, existingSection in enumerate(mergedSections):
existingOrder = existingSection.get("order")
if existingOrder is not None and existingOrder == newOrder:
# Merge sections with same order
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
existingSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
break
if merged:
continue
# Strategy 4: Structural Analysis - detect continuation
# For code_block and table: if last section matches new section type, merge them
if mergedSections:
lastSection = mergedSections[-1]
lastContentType = lastSection.get("content_type")
newContentType = newSection.get("content_type")
# Both are code blocks - merge them
if lastContentType == "code_block" and newContentType == "code_block":
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
lastSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
continue
# Both are tables - merge them (common case for broken JSON iterations)
if lastContentType == "table" and newContentType == "table":
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
lastSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
continue
# No merge strategy matched - add as new section
if not merged:
mergedSections.append(newSection)
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
return mergedSections
@staticmethod
def isSectionIncomplete(section: Dict[str, Any]) -> bool:
"""
Check if a section is incomplete (broken at the end).
This detects incomplete sections based on content analysis:
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
- Text sections: ends mid-sentence, ends with incomplete structure
- Other types: check for incomplete elements
"""
contentType = section.get("content_type", "")
elements = section.get("elements", [])
if not elements:
return False
# Handle list of elements
if isinstance(elements, list) and len(elements) > 0:
lastElement = elements[-1]
else:
lastElement = elements
if not isinstance(lastElement, dict):
return False
# Check code_block for incomplete code
if contentType == "code_block":
code = lastElement.get("code", "")
if code:
# Check if code ends incompletely:
# - Ends with comma (incomplete CSV line)
# - Ends with number but no newline (incomplete line)
# - Ends mid-token (e.g., "23431,23" - incomplete number)
codeStripped = code.rstrip()
if codeStripped:
# Check for incomplete patterns
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
# Ends with comma or has comma but no final newline - likely incomplete
return True
# Check if last line is incomplete (doesn't end with newline and has partial content)
if not code.endswith('\n') and codeStripped:
# No final newline - might be incomplete
# More sophisticated: check if last number is complete
lastLine = codeStripped.split('\n')[-1]
if lastLine and ',' in lastLine:
# Has commas but might be incomplete
parts = lastLine.split(',')
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
return True
# Check table for incomplete rows
if contentType == "table":
rows = lastElement.get("rows", [])
if rows:
# Check if last row is incomplete (ends with incomplete data)
lastRow = rows[-1] if isinstance(rows, list) else []
if isinstance(lastRow, list) and lastRow:
# CRITICAL: Check if last row doesn't have expected number of columns (if headers exist)
# This is the PRIMARY indicator of incomplete table rows
headers = lastElement.get("headers", [])
if headers and isinstance(headers, list):
expectedCols = len(headers)
if len(lastRow) < expectedCols:
logger.debug(f"Table section incomplete: last row has {len(lastRow)} columns, expected {expectedCols}")
return True
# Also check if last row ends with incomplete data (e.g., incomplete string)
lastCell = lastRow[-1] if lastRow else ""
if isinstance(lastCell, str):
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
logger.debug(f"Table section incomplete: last cell appears incomplete: '{lastCell}'")
return True
# Additional check: if last row has fewer cells than previous rows, it's likely incomplete
if len(rows) > 1:
prevRow = rows[-2] if isinstance(rows, list) and len(rows) > 1 else []
if isinstance(prevRow, list) and len(prevRow) > len(lastRow):
logger.debug(f"Table section incomplete: last row has {len(lastRow)} cells, previous row has {len(prevRow)}")
return True
# Check paragraph/text for incomplete sentences
if contentType in ["paragraph", "heading"]:
text = lastElement.get("text", "")
if text:
# Simple heuristic: if doesn't end with sentence-ending punctuation
textStripped = text.rstrip()
if textStripped and not textStripped[-1] in '.!?':
# Might be incomplete, but this is less reliable
# Only mark as incomplete if very short (likely cut off)
if len(textStripped) < 20:
return True
# Check lists for incomplete items
if contentType in ["bullet_list", "numbered_list"]:
items = lastElement.get("items", [])
if items and isinstance(items, list):
# Check if last item is incomplete (very short or ends with incomplete string)
lastItem = items[-1] if items else None
if isinstance(lastItem, str) and len(lastItem) < 3:
return True
# Check image for incomplete base64 data
if contentType == "image":
imageData = lastElement.get("base64Data", "")
if imageData:
# Base64 strings should end with padding ('=' or '==')
# If it doesn't, it might be incomplete
stripped = imageData.rstrip()
if stripped and not stripped.endswith(('=', '==')):
# Check if it's a valid base64 character sequence that was cut off
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
return True
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
if len(stripped) % 4 != 0:
return True
# GENERIC CHECK: Recursively analyze structure for incompleteness
# This works for ANY structure: arrays, objects, nested, primitives
return JsonResponseHandler._isStructureIncomplete(lastElement)
@staticmethod
def _isStructureIncomplete(structure: Any, max_depth: int = 10) -> bool:
"""
GENERIC recursive check for incomplete structures.
Detects incompleteness by analyzing patterns:
- Arrays: Last item shorter than previous items, incomplete patterns
- Objects: Last object has fewer keys than pattern, incomplete values
- Strings: Very short, ends abruptly, incomplete patterns
- Nested: Recursively checks nested structures
Works for ANY JSON structure of any depth/complexity.
"""
if max_depth <= 0:
return False
# Arrays/Lists - check for incomplete patterns
if isinstance(structure, list):
if len(structure) == 0:
return False
# Check if last item is incomplete compared to previous items
last_item = structure[-1]
# If we have previous items, compare structure
if len(structure) > 1:
prev_item = structure[-2]
# If last item is a list and previous is a list, check length
if isinstance(last_item, list) and isinstance(prev_item, list):
if len(last_item) < len(prev_item):
return True # Last row/item has fewer elements - likely incomplete
# If last item is a dict and previous is a dict, check keys
if isinstance(last_item, dict) and isinstance(prev_item, dict):
if len(last_item) < len(prev_item):
return True # Last object has fewer keys - likely incomplete
# Recursively check last item for incompleteness
if JsonResponseHandler._isStructureIncomplete(last_item, max_depth - 1):
return True
# Objects/Dicts - check for incomplete values
elif isinstance(structure, dict):
for key, value in structure.items():
# Recursively check each value
if JsonResponseHandler._isStructureIncomplete(value, max_depth - 1):
return True
# Check for incomplete strings
if isinstance(value, str):
# Very short strings might be incomplete
if len(value) > 0 and len(value) < 3:
return True
# Strings ending with incomplete patterns (comma, quote, etc.)
stripped = value.rstrip()
if stripped and stripped.endswith((',', '"', '\\')):
return True
# Strings - check for incomplete patterns
elif isinstance(structure, str):
# Very short strings might be incomplete
if len(structure) > 0 and len(structure) < 3:
return True
# Strings ending with incomplete patterns
stripped = structure.rstrip()
if stripped and stripped.endswith((',', '"', '\\')):
return True
return False
@staticmethod
def mergeSectionContent(
existingSection: Dict[str, Any],
newSection: Dict[str, Any],
iteration: int
) -> Dict[str, Any]:
"""
Merge content from two sections.
Handles different content types:
- code_block: Append code, handle overlaps, merge incomplete lines
- paragraph/heading: Append text
- table: Merge rows
- list: Merge items
- Other: Merge elements
"""
contentType = existingSection.get("content_type", "")
existingElements = existingSection.get("elements", [])
newElements = newSection.get("elements", [])
if not newElements:
return existingSection
# Handle list of elements
if isinstance(existingElements, list):
existingElem = existingElements[-1] if existingElements else {}
else:
existingElem = existingElements
if isinstance(newElements, list):
newElem = newElements[0] if newElements else {}
else:
newElem = newElements
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
return existingSection
# Merge based on content type
if contentType == "code_block":
existingCode = existingElem.get("code", "")
newCode = newElem.get("code", "")
if existingCode and newCode:
mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration)
existingElem["code"] = mergedCode
# Preserve language from existing or new
if "language" not in existingElem and "language" in newElem:
existingElem["language"] = newElem["language"]
elif contentType in ["paragraph", "heading"]:
existingText = existingElem.get("text", "")
newText = newElem.get("text", "")
if existingText and newText:
# Append text with space if needed
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
mergedText = existingText.rstrip() + " " + newText.lstrip()
else:
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
existingElem["text"] = mergedText
elif contentType == "table":
# Merge table rows with sophisticated overlap detection
existingRows = existingElem.get("rows", [])
newRows = newElem.get("rows", [])
if existingRows and newRows:
# Use sophisticated overlap detection that handles multiple overlapping rows
mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration)
existingElem["rows"] = mergedRows
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}")
elif newRows:
# If existing has no rows but new does, use new rows
existingElem["rows"] = newRows
# Preserve headers from existing (or use new if existing has none)
if not existingElem.get("headers") and newElem.get("headers"):
existingElem["headers"] = newElem["headers"]
# Preserve caption from existing (or use new if existing has none)
if not existingElem.get("caption") and newElem.get("caption"):
existingElem["caption"] = newElem.get("caption")
elif contentType in ["bullet_list", "numbered_list"]:
# Merge list items with sophisticated overlap detection
existingItems = existingElem.get("items", [])
newItems = newElem.get("items", [])
if existingItems and newItems:
mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration)
existingElem["items"] = mergedItems
elif newItems:
existingElem["items"] = newItems
elif contentType == "image":
# Images are typically complete - if new image is provided, replace existing
# But check if existing image data is incomplete (e.g., base64 string cut off)
existingImageData = existingElem.get("base64Data", "")
newImageData = newElem.get("base64Data", "")
if existingImageData and newImageData:
# If existing image data doesn't end with valid base64 padding, it might be incomplete
# Base64 padding is '=' or '==' at the end
if not existingImageData.rstrip().endswith(('=', '==')):
# Existing image might be incomplete - merge by appending new data
# This handles cases where base64 string was cut off
existingElem["base64Data"] = existingImageData + newImageData
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
else:
# Existing image is complete - replace with new (or keep existing if new is empty)
if newImageData:
existingElem["base64Data"] = newImageData
elif newImageData:
existingElem["base64Data"] = newImageData
# Preserve other image metadata
if not existingElem.get("altText") and newElem.get("altText"):
existingElem["altText"] = newElem["altText"]
if not existingElem.get("caption") and newElem.get("caption"):
existingElem["caption"] = newElem["caption"]
else:
# GENERIC FALLBACK: Use deep recursive merging for complex nested structures
# This handles any content type with arbitrary depth and complexity
merged_element = JsonResponseHandler.mergeDeepStructures(
existingElem,
newElem,
iteration,
f"section.{contentType}"
)
existingElem = merged_element
# Update section with merged content
mergedSection = existingSection.copy()
if isinstance(existingElements, list):
# Update the last element in the list with merged content
if existingElements:
existingElements[-1] = existingElem
mergedSection["elements"] = existingElements
else:
mergedSection["elements"] = existingElem
# Preserve metadata from new section if missing in existing
if "order" not in mergedSection and "order" in newSection:
mergedSection["order"] = newSection["order"]
return mergedSection
@staticmethod
def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str:
"""
Merge two code blocks intelligently, handling overlaps and incomplete lines.
"""
if not existingCode:
return newCode
if not newCode:
return existingCode
existingLines = existingCode.rstrip().split('\n')
newLines = newCode.strip().split('\n')
if not existingLines or not newLines:
return existingCode + "\n" + newCode
lastExistingLine = existingLines[-1].strip()
firstNewLine = newLines[0].strip()
# Strategy 1: Exact overlap - remove duplicate line
if lastExistingLine == firstNewLine:
newLines = newLines[1:]
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
# Strategy 2: Incomplete line merge
# If last existing line ends with comma or is incomplete, merge with first new line
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
# Last line is incomplete - merge with first new line
# Remove trailing comma from existing line
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
existingLines[-1] = mergedLine
newLines = newLines[1:]
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
# Strategy 3: Partial overlap detection
# Check if first new line starts with the end of last existing line
elif ',' in lastExistingLine and ',' in firstNewLine:
lastExistingParts = lastExistingLine.split(',')
firstNewParts = firstNewLine.split(',')
# Check for overlap: if last part of existing matches first part of new
if lastExistingParts and firstNewParts:
lastExistingPart = lastExistingParts[-1].strip()
firstNewPart = firstNewParts[0].strip()
# If they match, there's overlap
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
# Remove overlapping part from new line
newLines[0] = ','.join(firstNewParts[1:])
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
# Reconstruct merged code
mergedCode = '\n'.join(existingLines)
if newLines:
if mergedCode and not mergedCode.endswith('\n'):
mergedCode += '\n'
mergedCode += '\n'.join(newLines)
return mergedCode
@staticmethod
def detectAndParseJsonFragment(
result: str,
allSections: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""
GENERIC fragment detection for ANY JSON structure.
Detects if response is a JSON fragment (continuation content) rather than full document structure.
Works for ANY JSON type: arrays, objects, primitives, nested structures of any depth/complexity.
Fragment = Any JSON that:
1. Does NOT have "documents" or "sections" keys (not full document structure)
2. Can be ANY structure: array, object, nested, primitive, etc.
3. Is continuation content that needs to be merged into existing sections
Examples (all handled generically):
- Array: [["37643", ...], ...] (table rows, list items, any array)
- Object: {"rows": [...], "headers": [...]} (partial element)
- Primitive: "continuation text" (rare but possible)
- Nested: {"data": {"items": [...]}} (any nested structure)
Returns fragment info dict with:
- fragment_data: The parsed fragment content (ANY type)
- target_section_id: ID of last incomplete section (generic, not type-specific)
CRITICAL: Fully generic - no specific logic for tables, paragraphs, etc.
"""
try:
extracted = extractJsonString(result)
parsed = json.loads(extracted)
# GENERIC fragment detection: Check if it's NOT a full document structure
is_full_document = False
if isinstance(parsed, dict):
# Full document structure has "documents" or "sections" keys
if "documents" in parsed or "sections" in parsed:
is_full_document = True
# If it's a full document structure, it's not a fragment
if is_full_document:
return None
# Otherwise, it's a fragment (can be ANY structure: array, object, primitive, nested)
# Find target: last incomplete section (generic, regardless of content type)
target_section_id = JsonResponseHandler.findLastIncompleteSectionId(allSections)
logger.info(f"Detected GENERIC JSON fragment (type: {type(parsed).__name__}), target: {target_section_id}")
return {
"fragment_data": parsed, # Can be ANY JSON structure
"target_section_id": target_section_id
}
except Exception as e:
logger.error(f"Error detecting JSON fragment: {e}")
logger.debug(f"Fragment detection failed for result: {result[:500]}...")
return None
@staticmethod
def findLastIncompleteSectionId(
allSections: List[Dict[str, Any]]
) -> Optional[str]:
"""
GENERIC: Find the last incomplete section (regardless of content type).
This is fully generic - works for ANY content type, ANY structure.
Returns the ID of the last section that is incomplete, or None if all are complete.
"""
# Find the last incomplete section (generic, not type-specific)
for section in reversed(allSections):
if JsonResponseHandler.isSectionIncomplete(section):
return section.get("id")
# If no incomplete section found, return last section as fallback
if allSections:
return allSections[-1].get("id")
return None
@staticmethod
def mergeFragmentIntoSection(
fragment: Dict[str, Any],
allSections: List[Dict[str, Any]],
iteration: int
) -> Optional[List[Dict[str, Any]]]:
"""
GENERIC fragment merging for ANY JSON structure.
Merges a JSON fragment (ANY structure: array, object, nested, primitive) into the last incomplete section.
Uses ONLY deep recursive merging - no specific logic for content types.
Handles ALL cases:
1. Fragments with overlap (detected and merged intelligently)
2. Fragments without overlap (continuation after cut-off, appended)
3. Any JSON structure (arrays, objects, nested, primitives)
4. Accumulative merging (uses merged data from past iterations)
CRITICAL: Fully generic - works for ANY JSON structure, ANY content type.
NO FALLBACKS: Returns None if merge fails (no target section found).
"""
fragment_data = fragment.get("fragment_data")
target_section_id = fragment.get("target_section_id")
if fragment_data is None:
logger.error(f"Iteration {iteration}: ❌ Fragment has no fragment_data - merge FAILED")
return None
# Find the target section (last incomplete section, generic)
target_section = None
target_index = -1
if target_section_id:
for i, section in enumerate(allSections):
if section.get("id") == target_section_id:
target_section = section
target_index = i
break
# NO FALLBACKS: If target not found by ID, try to find incomplete section
if not target_section:
for i, section in enumerate(reversed(allSections)):
if JsonResponseHandler.isSectionIncomplete(section):
target_section = section
target_index = len(allSections) - 1 - i
break
# NO FALLBACKS: If no target found, merge FAILS
if not target_section:
logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - No target section found for fragment!")
logger.error(f"Iteration {iteration}: Available sections: {[s.get('id') + ' (' + s.get('content_type', 'unknown') + ')' for s in allSections]}")
return None
# Get the last element from target section (where fragment will be merged)
merged_section = target_section.copy()
elements = merged_section.get("elements", [])
if not isinstance(elements, list):
elements = [elements] if elements else []
if not elements:
elements = [{}]
last_element = elements[-1] if elements else {}
if not isinstance(last_element, dict):
last_element = {}
elements.append(last_element)
# CRITICAL: Use ONLY deep recursive merging for ALL fragment types
# This handles ANY structure: arrays, objects, nested, primitives
# Handles overlap detection generically (deep recursive comparison)
# Handles continuation after cut-off (no overlap case)
merged_element = JsonResponseHandler.mergeDeepStructures(
last_element,
fragment_data,
iteration,
f"section.{target_section_id}.fragment"
)
# Update elements with merged content
elements[-1] = merged_element
merged_section["elements"] = elements
# Update allSections (this ensures accumulative merging - merged data is used for next iteration)
merged_sections = allSections.copy()
merged_sections[target_index] = merged_section
logger.info(f"Iteration {iteration}: ✅ Merged GENERIC fragment (type: {type(fragment_data).__name__}) into section '{target_section_id}'")
# Log merged JSON for debugging
try:
from modules.shared.debugLogger import writeDebugFile
merged_json_str = json.dumps(merged_sections, indent=2, ensure_ascii=False)
writeDebugFile(merged_json_str, f"merged_json_iteration_{iteration}.json")
except Exception as e:
logger.debug(f"Iteration {iteration}: Failed to write merged JSON debug file: {e}")
return merged_sections
@staticmethod
def completeIncompleteStructures(allSections: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Complete any incomplete structures in sections by ensuring proper JSON structure.
This ensures JSON is properly closed even if merge failed or iterations stopped early.
Works generically for ANY structure type - recursively processes all nested structures.
Returns sections with completed structures.
"""
completed_sections = []
for section in allSections:
completed_section = JsonResponseHandler._completeStructure(section)
completed_sections.append(completed_section)
return completed_sections
@staticmethod
def _completeStructure(structure: Any) -> Any:
"""
Recursively complete incomplete structures by ensuring arrays/objects are properly structured.
Works generically for ANY JSON structure - no specific logic for content types.
"""
if isinstance(structure, dict):
completed = {}
for key, value in structure.items():
completed[key] = JsonResponseHandler._completeStructure(value)
return completed
elif isinstance(structure, list):
completed = []
for item in structure:
completed.append(JsonResponseHandler._completeStructure(item))
return completed
else:
# Primitive value - return as is
return structure
@staticmethod
def getContentTypeForFragment(fragment_type: str) -> str:
"""Map fragment type to content type."""
mapping = {
"table_rows": "table",
"table_element": "table",
"code_lines": "code_block",
"code_element": "code_block",
"list_items": "bullet_list"
}
return mapping.get(fragment_type, "paragraph")
@staticmethod
def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool:
"""
Deep recursive comparison of two JSON-serializable objects.
Handles nested structures of any depth and complexity.
Args:
obj1: First object to compare
obj2: Second object to compare
max_depth: Maximum recursion depth to prevent infinite loops
Returns:
True if objects are deeply equal, False otherwise
"""
if max_depth <= 0:
return False
# Type check
if type(obj1) != type(obj2):
return False
# Primitive types
if isinstance(obj1, (str, int, float, bool, type(None))):
return obj1 == obj2
# Lists/arrays - compare element by element
if isinstance(obj1, list):
if len(obj1) != len(obj2):
return False
return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1)
for item1, item2 in zip(obj1, obj2))
# Dicts/objects - compare key by key
if isinstance(obj1, dict):
if set(obj1.keys()) != set(obj2.keys()):
return False
return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1)
for key in obj1.keys())
# Fallback for other types
return obj1 == obj2
@staticmethod
def findLongestCommonSuffix(
existing_list: List[Any],
new_list: List[Any],
min_overlap: int = 1
) -> int:
"""
Find the longest common suffix of existing_list that matches a prefix of new_list.
This handles cases where multiple elements overlap:
- existing: [A, B, C, D]
- new: [C, D, E, F]
- overlap: [C, D] (length 2)
Returns the length of the overlap (0 if no overlap found).
"""
if not existing_list or not new_list:
return 0
max_overlap = min(len(existing_list), len(new_list))
# Try all possible overlap lengths (from longest to shortest)
for overlap_len in range(max_overlap, min_overlap - 1, -1):
existing_suffix = existing_list[-overlap_len:]
new_prefix = new_list[:overlap_len]
# Deep compare suffix and prefix
if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i])
for i in range(overlap_len)):
return overlap_len
return 0
@staticmethod
def findPartialOverlap(
existing_item: Any,
new_item: Any
) -> Tuple[bool, Optional[Any]]:
"""
Detect if new_item completes an incomplete existing_item.
Handles cases like:
- existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"]
- new: ["37643", "37649", ...]
Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None).
"""
# Check if both are lists
if isinstance(existing_item, list) and isinstance(new_item, list):
if not existing_item or not new_item:
return False, None
# Check if last element of existing is incomplete and matches first of new
last_existing = existing_item[-1]
first_new = new_item[0]
# If last existing is a string and first new is a string
if isinstance(last_existing, str) and isinstance(first_new, str):
# Check if last existing is incomplete (very short, ends with number, etc.)
if len(last_existing) < 10 and first_new.startswith(last_existing):
# Partial overlap - merge them
merged_last = last_existing + first_new[len(last_existing):]
merged_item = existing_item[:-1] + [merged_last] + new_item[1:]
return True, merged_item
# Check if last existing is incomplete list and first new completes it
if isinstance(last_existing, list) and isinstance(first_new, list):
if len(last_existing) < len(first_new):
# Check if last existing is prefix of first new
if first_new[:len(last_existing)] == last_existing:
# Merge: replace incomplete last with complete first
merged_item = existing_item[:-1] + [first_new] + new_item[1:]
return True, merged_item
# Check if existing is incomplete string and new completes it
if isinstance(existing_item, str) and isinstance(new_item, str):
if len(existing_item) < 50 and new_item.startswith(existing_item):
# Partial overlap
merged = existing_item + new_item[len(existing_item):]
return True, merged
return False, None
@staticmethod
def mergeRowsWithOverlap(
existing_rows: List[List[str]],
new_rows: List[List[str]],
iteration: int
) -> List[List[str]]:
"""
Merge table rows with sophisticated overlap detection.
Handles multiple overlapping rows and partial overlaps.
"""
if not new_rows:
return existing_rows
if not existing_rows:
return new_rows
# Strategy 1: Find longest common suffix/prefix overlap
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1)
if overlap_len > 0:
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates")
return existing_rows + new_rows[overlap_len:]
# Strategy 2: Check for partial overlap in last row
if len(existing_rows) > 0 and len(new_rows) > 0:
last_existing = existing_rows[-1]
first_new = new_rows[0]
is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new)
if is_partial:
logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging")
return existing_rows[:-1] + [merged_row] + new_rows[1:]
# Strategy 3: Simple first/last comparison (fallback)
if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list):
if list(existing_rows[-1]) == list(new_rows[0]):
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
return existing_rows + new_rows[1:]
# No overlap detected - append all new rows
return existing_rows + new_rows
@staticmethod
def mergeItemsWithOverlap(
existing_items: List[str],
new_items: List[str],
iteration: int
) -> List[str]:
"""
Merge list items with sophisticated overlap detection.
Handles multiple overlapping items and partial overlaps.
"""
if not new_items:
return existing_items
if not existing_items:
return new_items
# Strategy 1: Find longest common suffix/prefix overlap
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1)
if overlap_len > 0:
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates")
return existing_items + new_items[overlap_len:]
# Strategy 2: Check for partial overlap in last item
if len(existing_items) > 0 and len(new_items) > 0:
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0])
if is_partial:
logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging")
return existing_items[:-1] + [merged_item] + new_items[1:]
# Strategy 3: Simple first/last comparison (fallback)
if existing_items[-1] == new_items[0]:
logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)")
return existing_items + new_items[1:]
# No overlap detected - append all new items
return existing_items + new_items
@staticmethod
def mergeDeepStructures(
existing: Any,
new: Any,
iteration: int,
path: str = "root"
) -> Any:
"""
FULLY GENERIC recursive merge for ANY JSON structure of arbitrary depth/complexity.
Handles ALL cases generically:
1. Arrays/Lists: Overlap detection (suffix/prefix), partial overlap, no overlap (continuation)
2. Objects/Dicts: Key-by-key merge with overlap detection for nested structures
3. Primitives: Equality check, replacement if different
4. Nested structures: Recursively handles any depth/complexity
Overlap detection strategies (all generic):
- Array overlap: Finds longest common suffix/prefix, handles partial overlaps
- Object overlap: Detected recursively through key matching and deep comparison
- No overlap: Appends/merges continuation content after cut-off point
CRITICAL: Fully generic - no specific logic for content types.
Works for ANY JSON structure: arrays, objects, nested, primitives, any combination.
"""
# Type check
if type(existing) != type(new):
# Types don't match - return new (replacement)
logger.debug(f"Iteration {iteration}: Types don't match at {path} ({type(existing).__name__} vs {type(new).__name__}), replacing")
return new
# Lists/arrays - GENERIC merge with overlap detection
if isinstance(existing, list) and isinstance(new, list):
if not new:
return existing
if not existing:
return new
# Strategy 1: Find longest common suffix/prefix overlap (handles multiple overlapping elements)
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1)
if overlap_len > 0:
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates")
return existing + new[overlap_len:]
# Strategy 2: Check for partial overlap in last element (incomplete element completion)
if len(existing) > 0 and len(new) > 0:
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0])
if is_partial:
logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging incomplete element")
return existing[:-1] + [merged_item] + new[1:]
# Strategy 3: No overlap detected - continuation after cut-off point
# This handles the case where new data starts exactly after the cut-off
logger.debug(f"Iteration {iteration}: No overlap at {path}, appending continuation content ({len(new)} items)")
return existing + new
# Dicts/objects - GENERIC merge with recursive overlap detection
if isinstance(existing, dict) and isinstance(new, dict):
merged = existing.copy()
# Check for object-level overlap: if new object is subset/superset of existing
# This handles cases where same object structure appears in both
existing_keys = set(existing.keys())
new_keys = set(new.keys())
# If new is subset of existing and values match, it's overlap (skip)
if new_keys.issubset(existing_keys):
all_match = True
for key in new_keys:
if not JsonResponseHandler.deepCompare(existing[key], new[key]):
all_match = False
break
if all_match:
logger.debug(f"Iteration {iteration}: Object at {path} is subset overlap, skipping")
return existing
# Merge key-by-key with recursive overlap detection
for key, new_value in new.items():
if key in merged:
# Key exists - merge recursively (handles nested overlap detection)
merged[key] = JsonResponseHandler.mergeDeepStructures(
merged[key],
new_value,
iteration,
f"{path}.{key}"
)
else:
# New key - add it (continuation content)
merged[key] = new_value
logger.debug(f"Iteration {iteration}: Added new key '{key}' at {path} (continuation)")
return merged
# Primitives - equality check
if existing == new:
return existing
# Different primitive values - return new (continuation/replacement)
logger.debug(f"Iteration {iteration}: Primitive at {path} differs, using new value")
return new
@staticmethod
def cleanEncodingIssues(jsonString: str) -> str:
"""
GENERIC function to remove problematic encoding parts from JSON string.
Works for ANY JSON structure - removes problematic characters/bytes.
Args:
jsonString: JSON string that may have encoding issues
Returns:
Cleaned JSON string
"""
try:
# Try to decode/encode to detect issues
jsonString.encode('utf-8').decode('utf-8')
return jsonString
except UnicodeError:
# Remove problematic parts
cleaned = jsonString.encode('utf-8', errors='ignore').decode('utf-8', errors='ignore')
logger.warning("Removed encoding issues from JSON string")
return cleaned
@staticmethod
def mergeJsonStringsWithOverlap(
accumulated: str,
newFragment: str
) -> str:
"""
GENERIC function to merge two JSON strings, handling overlaps intelligently.
Works for ANY JSON structure - no specific logic for content types.
Overlap scenarios (all handled generically):
- Exact continuation: newFragment starts exactly where accumulated ends
- Partial overlap: newFragment overlaps with end of accumulated
- Full overlap: newFragment is subset of accumulated
Strategy:
1. Find longest common suffix/prefix match (string-based comparison)
2. Remove duplicate content
3. Concatenate remaining parts
Args:
accumulated: Previously accumulated JSON string
newFragment: New fragment string to append
Returns:
Combined JSON string with overlaps removed
"""
if not accumulated:
return newFragment
if not newFragment:
return accumulated
# Find longest common suffix/prefix match
# Try different overlap lengths (from longest to shortest)
# Overlaps can be as small as 1 character, so we check all possible lengths
maxOverlapLen = min(len(accumulated), len(newFragment))
# Start from maximum possible overlap down to 1 character
# This ensures we find the longest overlap, even if it's just 1 character
for overlapLen in range(maxOverlapLen, 0, -1):
accumulatedSuffix = accumulated[-overlapLen:]
newFragmentPrefix = newFragment[:overlapLen]
if accumulatedSuffix == newFragmentPrefix:
# Found overlap - remove duplicate part
logger.debug(f"Found overlap of {overlapLen} characters, removing duplicate")
return accumulated + newFragment[overlapLen:]
# No overlap found - simple concatenation
return accumulated + newFragment
@staticmethod
def isJsonComplete(parsedJson: Dict[str, Any]) -> bool:
"""
GENERIC function to check if parsed JSON structure is complete.
Works for ANY JSON structure - no specific logic for content types.
Completeness checks (all generic):
- All arrays are properly closed
- All objects are properly closed
- No incomplete structures
- Recursive validation of nested structures
Args:
parsedJson: Parsed JSON object
Returns:
True if JSON is complete, False otherwise
"""
def _checkStructureComplete(obj: Any, depth: int = 0) -> bool:
"""Recursively check if structure is complete."""
if depth > 50: # Prevent infinite recursion
return True
if isinstance(obj, dict):
# Check all values recursively
for value in obj.values():
if not _checkStructureComplete(value, depth + 1):
return False
return True
elif isinstance(obj, list):
# Check all items recursively
for item in obj:
if not _checkStructureComplete(item, depth + 1):
return False
return True
else:
# Primitive value - always complete
return True
try:
return _checkStructureComplete(parsedJson)
except Exception as e:
logger.debug(f"Error checking JSON completeness: {e}")
return False
@staticmethod
def finalizeJson(parsedJson: Dict[str, Any]) -> Dict[str, Any]:
"""
GENERIC function to finalize complete JSON by adding missing closing elements and repairing corruption.
Works for ANY JSON structure - no specific logic for content types.
Steps (all generic):
1. Analyze structure for missing closing elements (recursively)
2. Add closing brackets/braces where needed
3. Repair any remaining corruption
4. Validate final structure
Args:
parsedJson: Parsed JSON object that needs finalization
Returns:
Finalized JSON object
"""
# For now, just return as-is since parsing succeeded
# If needed, can add logic to check for incomplete structures
# and add closing elements
return parsedJson
@staticmethod
def extractKpiValuesFromJson(
parsedJson: Dict[str, Any],
kpis: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""
Extract current KPI values from parsed JSON and update KPI objects.
Args:
parsedJson: Parsed JSON object
kpis: List of KPI objects (will be updated with currentValue)
Returns:
Updated list of KPI objects with currentValue set
"""
updatedKpis = []
for kpi in kpis:
kpiId = kpi.get("id")
jsonPath = kpi.get("jsonPath")
if not kpiId or not jsonPath:
continue
# Create copy of KPI object
updatedKpi = kpi.copy()
try:
# Extract value using JSON path
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
# Handle None (path doesn't exist - incomplete JSON)
if value is None:
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type
elif isinstance(value, list):
updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
elif isinstance(value, (int, float)):
updatedKpi["currentValue"] = int(value)
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: numeric value {int(value)}")
else:
updatedKpi["currentValue"] = 0
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: non-list/non-numeric value, set to 0")
except Exception as e:
logger.warning(f"Error extracting KPI {kpiId} from path {jsonPath}: {e}")
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
updatedKpis.append(updatedKpi)
return updatedKpis
@staticmethod
def extractKpiValuesFromIncompleteJson(
jsonString: str,
kpis: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""
Extract KPI values from incomplete JSON string.
Uses existing JSON completion function to close incomplete structures, then extracts KPIs.
Args:
jsonString: Incomplete JSON string
kpis: List of KPI objects
Returns:
Updated list of KPI objects with currentValue set
"""
updatedKpis = []
for kpi in kpis:
kpiId = kpi.get("id")
jsonPath = kpi.get("jsonPath")
if not kpiId or not jsonPath:
continue
updatedKpi = kpi.copy()
try:
# Use existing JSON completion function to close incomplete structures
from modules.shared.jsonUtils import extractJsonString, closeJsonStructures
# Extract JSON string and complete it with missing closing elements
extracted = extractJsonString(jsonString)
completed = closeJsonStructures(extracted)
# Parse completed JSON
parsed = json.loads(completed)
# Extract value using path
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
# Handle None (path doesn't exist - incomplete JSON)
if value is None:
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type
elif isinstance(value, list):
updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
elif isinstance(value, (int, float)):
updatedKpi["currentValue"] = int(value)
logger.debug(f"Extracted KPI {kpiId} from completed JSON: numeric value {int(value)}")
else:
updatedKpi["currentValue"] = 0
logger.debug(f"Extracted KPI {kpiId} from completed JSON: non-list/non-numeric value, set to 0")
except Exception as e:
logger.warning(f"Error extracting KPI {kpiId} from incomplete JSON: {e}")
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
updatedKpis.append(updatedKpi)
return updatedKpis
@staticmethod
def _extractValueByPath(obj: Any, path: str) -> Any:
"""
Extract value from object using dot-notation path with array indices.
Example: "sections[0].elements[0].items"
Returns None if path doesn't exist (for incomplete JSON handling).
"""
parts = path.split('.')
current = obj
for part in parts:
if '[' in part and ']' in part:
# Handle array access: "sections[0]"
key = part[:part.index('[')]
index = int(part[part.index('[') + 1:part.index(']')])
if key:
if isinstance(current, dict):
current = current.get(key)
if current is None:
return None # Key doesn't exist
else:
return None # Can't access key on non-dict
if isinstance(current, list):
if 0 <= index < len(current):
current = current[index]
else:
# Index out of range - return None for incomplete JSON
return None
else:
# Not a list, can't index
return None
else:
# Handle dict access
if isinstance(current, dict):
current = current.get(part)
if current is None:
return None # Key doesn't exist
else:
return None # Can't access key on non-dict
return current
@staticmethod
def validateKpiProgression(
accumulationState: JsonAccumulationState,
updatedKpis: List[Dict[str, Any]]
) -> Tuple[bool, str]:
"""
Validate KPI progression from parsed JSON.
Validation rules:
- Proceed if: At least ONE KPI increased
- Stop if: Any KPI went backwards → return (False, "KPI went backwards")
- Stop if: No KPIs progressed → return (False, "No progress")
- Finish if: All KPIs completed OR JSON is complete → return (True, "Complete")
Args:
accumulationState: Current accumulation state (contains kpis)
updatedKpis: Updated KPI objects with currentValue set
Returns:
Tuple of (shouldProceed, reason)
"""
if not accumulationState.kpis:
# No KPIs defined - always proceed
return True, "No KPIs defined"
# Build dict of last values for comparison
lastValues = {kpi.get("id"): kpi.get("currentValue", 0) for kpi in accumulationState.kpis}
logger.debug(f"KPI validation: lastValues = {lastValues}")
logger.debug(f"KPI validation: updatedKpis = {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
# Check if any KPI went backwards
for updatedKpi in updatedKpis:
kpiId = updatedKpi.get("id")
currentValue = updatedKpi.get("currentValue", 0)
if kpiId in lastValues:
lastValue = lastValues[kpiId]
if currentValue < lastValue:
logger.warning(f"KPI {kpiId} went BACKWARDS: {lastValue}{currentValue}")
return False, f"KPI {kpiId} went backwards"
# Check if all KPIs are completed
allCompleted = True
for updatedKpi in updatedKpis:
targetValue = updatedKpi.get("targetValue", 0)
currentValue = updatedKpi.get("currentValue", 0)
if currentValue < targetValue:
allCompleted = False
break
if allCompleted:
logger.info("All KPIs completed")
return True, "All KPIs completed"
# Check if at least one KPI progressed
atLeastOneProgressed = False
for updatedKpi in updatedKpis:
kpiId = updatedKpi.get("id")
currentValue = updatedKpi.get("currentValue", 0)
if kpiId in lastValues:
lastValue = lastValues[kpiId]
if currentValue > lastValue:
atLeastOneProgressed = True
logger.info(f"KPI {kpiId} progressed: {lastValue}{currentValue}")
break
else:
# First time seeing this KPI - if it has a value, it's progress
if currentValue > 0:
atLeastOneProgressed = True
logger.info(f"KPI {kpiId} initialized: {currentValue}")
break
if not atLeastOneProgressed:
logger.warning(f"No KPIs progressed. Last values: {lastValues}, Current values: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
return False, "No progress"
return True, "Progress detected"
@staticmethod
def accumulateAndParseJsonFragments(
accumulatedJsonString: str,
newFragmentString: str,
allSections: List[Dict[str, Any]],
iteration: int
) -> Tuple[str, List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
"""
Accumulate JSON fragments and parse when complete.
GENERIC function that handles:
1. Concatenating JSON strings with overlap detection
2. Parsing the accumulated string
3. Extracting sections (partial if incomplete, final if complete)
4. Determining completion status
Args:
accumulatedJsonString: Previously accumulated JSON string
newFragmentString: New fragment string from current iteration
allSections: Sections extracted so far (for prompt context)
iteration: Current iteration number
Returns:
Tuple of:
- accumulatedJsonString: Updated accumulated string
- sections: Extracted sections (partial if incomplete, final if complete)
- isComplete: True if JSON is complete and valid
- parsedResult: Parsed JSON object (if parsing succeeded)
"""
# Step 1: Clean encoding issues from accumulated string (check end of first delivered part)
cleanedAccumulated = JsonResponseHandler.cleanEncodingIssues(accumulatedJsonString)
# Step 2: Clean encoding issues from new fragment
cleanedFragment = JsonResponseHandler.cleanEncodingIssues(newFragmentString)
# Step 3: Concatenate with overlap handling
combinedString = JsonResponseHandler.mergeJsonStringsWithOverlap(
cleanedAccumulated,
cleanedFragment
)
# Step 4: Try to parse
try:
extracted = extractJsonString(combinedString)
parsedResult = json.loads(extracted)
# Step 5: Parsing succeeded - check completeness
isComplete = JsonResponseHandler.isJsonComplete(parsedResult)
if isComplete:
# Step 6: Complete JSON - finalize
finalizedJson = JsonResponseHandler.finalizeJson(parsedResult)
sections = extractSectionsFromDocument(finalizedJson)
logger.info(f"Iteration {iteration}: JSON accumulation complete, extracted {len(sections)} sections")
return combinedString, sections, True, finalizedJson
else:
# Step 7: Incomplete but parseable - extract partial sections
sections = extractSectionsFromDocument(parsedResult)
logger.info(f"Iteration {iteration}: JSON accumulation incomplete but parseable, extracted {len(sections)} partial sections")
return combinedString, sections, False, parsedResult
except json.JSONDecodeError:
# Step 8: Still broken - repair and extract partial sections
repaired = repairBrokenJson(combinedString)
if repaired:
sections = extractSectionsFromDocument(repaired)
logger.info(f"Iteration {iteration}: JSON accumulation repaired, extracted {len(sections)} sections")
return combinedString, sections, False, repaired
else:
# Repair failed - continue with data BEFORE merging the problematic piece
# Return previous accumulated string (before adding new fragment)
# This ensures we don't lose previously accumulated data
logger.warning(f"Iteration {iteration}: Repair failed, continuing with previous accumulated data")
return accumulatedJsonString, [], False, None