gateway/modules/services/serviceAi/subJsonResponseHandling.py

1022 lines
46 KiB
Python

"""
JSON Response Handling Module
Handles merging of JSON responses from multiple AI iterations, including:
- Section merging with intelligent overlap detection
- JSON fragment detection and merging
- Deep recursive structure merging
- Overlap detection for complex nested structures
"""
import json
import logging
from typing import Dict, Any, List, Optional, Tuple
from modules.shared.jsonUtils import extractJsonString
logger = logging.getLogger(__name__)
class JsonResponseHandler:
"""Handles JSON response merging and fragment detection for iterative AI generation."""
@staticmethod
def mergeSectionsIntelligently(
existingSections: List[Dict[str, Any]],
newSections: List[Dict[str, Any]],
iteration: int
) -> List[Dict[str, Any]]:
"""
Intelligently merge sections from multiple iterations.
This is a GENERIC merging strategy that handles broken JSON iterations.
The break can occur anywhere - in any section, at any depth.
Merging strategies (in order of priority):
1. Same Section ID: Merge sections with identical IDs
2. Same Content-Type + Position: If last section is incomplete and new section continues it
3. Same Order: Merge sections with same order value
4. Structural Analysis: Detect continuation based on content structure
Args:
existingSections: Sections accumulated from previous iterations
newSections: Sections extracted from current iteration
iteration: Current iteration number
Returns:
Merged list of sections
"""
if not newSections:
return existingSections
if not existingSections:
return newSections
mergedSections = existingSections.copy()
for newSection in newSections:
merged = False
# Strategy 1: Same Section ID - merge directly
newSectionId = newSection.get("id")
if newSectionId:
for i, existingSection in enumerate(mergedSections):
if existingSection.get("id") == newSectionId:
# Merge sections with same ID
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
existingSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
break
if merged:
continue
# Strategy 2: Same Content-Type + Position (continuation detection)
# Check if last section is incomplete and new section continues it
if mergedSections:
lastSection = mergedSections[-1]
lastContentType = lastSection.get("content_type")
newContentType = newSection.get("content_type")
if lastContentType == newContentType:
# Same content type - check if last section is incomplete
if JsonResponseHandler.isSectionIncomplete(lastSection):
# Last section is incomplete, merge with new section
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
lastSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
continue
# Strategy 3: Same Order value
newOrder = newSection.get("order")
if newOrder is not None:
for i, existingSection in enumerate(mergedSections):
existingOrder = existingSection.get("order")
if existingOrder is not None and existingOrder == newOrder:
# Merge sections with same order
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
existingSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
break
if merged:
continue
# Strategy 4: Structural Analysis - detect continuation
# For code_block and table: if last section matches new section type, merge them
if mergedSections:
lastSection = mergedSections[-1]
lastContentType = lastSection.get("content_type")
newContentType = newSection.get("content_type")
# Both are code blocks - merge them
if lastContentType == "code_block" and newContentType == "code_block":
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
lastSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
continue
# Both are tables - merge them (common case for broken JSON iterations)
if lastContentType == "table" and newContentType == "table":
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
lastSection, newSection, iteration
)
merged = True
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
continue
# No merge strategy matched - add as new section
if not merged:
mergedSections.append(newSection)
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
return mergedSections
@staticmethod
def isSectionIncomplete(section: Dict[str, Any]) -> bool:
"""
Check if a section is incomplete (broken at the end).
This detects incomplete sections based on content analysis:
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
- Text sections: ends mid-sentence, ends with incomplete structure
- Other types: check for incomplete elements
"""
contentType = section.get("content_type", "")
elements = section.get("elements", [])
if not elements:
return False
# Handle list of elements
if isinstance(elements, list) and len(elements) > 0:
lastElement = elements[-1]
else:
lastElement = elements
if not isinstance(lastElement, dict):
return False
# Check code_block for incomplete code
if contentType == "code_block":
code = lastElement.get("code", "")
if code:
# Check if code ends incompletely:
# - Ends with comma (incomplete CSV line)
# - Ends with number but no newline (incomplete line)
# - Ends mid-token (e.g., "23431,23" - incomplete number)
codeStripped = code.rstrip()
if codeStripped:
# Check for incomplete patterns
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
# Ends with comma or has comma but no final newline - likely incomplete
return True
# Check if last line is incomplete (doesn't end with newline and has partial content)
if not code.endswith('\n') and codeStripped:
# No final newline - might be incomplete
# More sophisticated: check if last number is complete
lastLine = codeStripped.split('\n')[-1]
if lastLine and ',' in lastLine:
# Has commas but might be incomplete
parts = lastLine.split(',')
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
return True
# Check table for incomplete rows
if contentType == "table":
rows = lastElement.get("rows", [])
if rows:
# Check if last row is incomplete (ends with incomplete data)
lastRow = rows[-1] if isinstance(rows, list) else []
if isinstance(lastRow, list) and lastRow:
# Check if last row ends with incomplete data (e.g., incomplete string)
lastCell = lastRow[-1] if lastRow else ""
if isinstance(lastCell, str):
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
return True
# Also check if last row doesn't have expected number of columns (if headers exist)
headers = lastElement.get("headers", [])
if headers and isinstance(headers, list):
expectedCols = len(headers)
if len(lastRow) < expectedCols:
return True
# Check paragraph/text for incomplete sentences
if contentType in ["paragraph", "heading"]:
text = lastElement.get("text", "")
if text:
# Simple heuristic: if doesn't end with sentence-ending punctuation
textStripped = text.rstrip()
if textStripped and not textStripped[-1] in '.!?':
# Might be incomplete, but this is less reliable
# Only mark as incomplete if very short (likely cut off)
if len(textStripped) < 20:
return True
# Check lists for incomplete items
if contentType in ["bullet_list", "numbered_list"]:
items = lastElement.get("items", [])
if items and isinstance(items, list):
# Check if last item is incomplete (very short or ends with incomplete string)
lastItem = items[-1] if items else None
if isinstance(lastItem, str) and len(lastItem) < 3:
return True
# Check image for incomplete base64 data
if contentType == "image":
imageData = lastElement.get("base64Data", "")
if imageData:
# Base64 strings should end with padding ('=' or '==')
# If it doesn't, it might be incomplete
stripped = imageData.rstrip()
if stripped and not stripped.endswith(('=', '==')):
# Check if it's a valid base64 character sequence that was cut off
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
return True
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
if len(stripped) % 4 != 0:
return True
# GENERIC CHECK: Look for incomplete structures in any element
# Check if element has arrays/lists that might be incomplete
for key, value in lastElement.items():
if isinstance(value, list) and len(value) > 0:
# Check last item in list
lastItem = value[-1]
if isinstance(lastItem, str):
# If last string item is very short, might be incomplete
if len(lastItem) < 3:
return True
elif isinstance(lastItem, dict):
# If last dict item has very few keys, might be incomplete
if len(lastItem) < 2:
return True
elif isinstance(value, str):
# Check if string ends abruptly (no punctuation, very short)
if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
return True
return False
@staticmethod
def mergeSectionContent(
existingSection: Dict[str, Any],
newSection: Dict[str, Any],
iteration: int
) -> Dict[str, Any]:
"""
Merge content from two sections.
Handles different content types:
- code_block: Append code, handle overlaps, merge incomplete lines
- paragraph/heading: Append text
- table: Merge rows
- list: Merge items
- Other: Merge elements
"""
contentType = existingSection.get("content_type", "")
existingElements = existingSection.get("elements", [])
newElements = newSection.get("elements", [])
if not newElements:
return existingSection
# Handle list of elements
if isinstance(existingElements, list):
existingElem = existingElements[-1] if existingElements else {}
else:
existingElem = existingElements
if isinstance(newElements, list):
newElem = newElements[0] if newElements else {}
else:
newElem = newElements
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
return existingSection
# Merge based on content type
if contentType == "code_block":
existingCode = existingElem.get("code", "")
newCode = newElem.get("code", "")
if existingCode and newCode:
mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration)
existingElem["code"] = mergedCode
# Preserve language from existing or new
if "language" not in existingElem and "language" in newElem:
existingElem["language"] = newElem["language"]
elif contentType in ["paragraph", "heading"]:
existingText = existingElem.get("text", "")
newText = newElem.get("text", "")
if existingText and newText:
# Append text with space if needed
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
mergedText = existingText.rstrip() + " " + newText.lstrip()
else:
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
existingElem["text"] = mergedText
elif contentType == "table":
# Merge table rows with sophisticated overlap detection
existingRows = existingElem.get("rows", [])
newRows = newElem.get("rows", [])
if existingRows and newRows:
# Use sophisticated overlap detection that handles multiple overlapping rows
mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration)
existingElem["rows"] = mergedRows
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}")
elif newRows:
# If existing has no rows but new does, use new rows
existingElem["rows"] = newRows
# Preserve headers from existing (or use new if existing has none)
if not existingElem.get("headers") and newElem.get("headers"):
existingElem["headers"] = newElem["headers"]
# Preserve caption from existing (or use new if existing has none)
if not existingElem.get("caption") and newElem.get("caption"):
existingElem["caption"] = newElem.get("caption")
elif contentType in ["bullet_list", "numbered_list"]:
# Merge list items with sophisticated overlap detection
existingItems = existingElem.get("items", [])
newItems = newElem.get("items", [])
if existingItems and newItems:
mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration)
existingElem["items"] = mergedItems
elif newItems:
existingElem["items"] = newItems
elif contentType == "image":
# Images are typically complete - if new image is provided, replace existing
# But check if existing image data is incomplete (e.g., base64 string cut off)
existingImageData = existingElem.get("base64Data", "")
newImageData = newElem.get("base64Data", "")
if existingImageData and newImageData:
# If existing image data doesn't end with valid base64 padding, it might be incomplete
# Base64 padding is '=' or '==' at the end
if not existingImageData.rstrip().endswith(('=', '==')):
# Existing image might be incomplete - merge by appending new data
# This handles cases where base64 string was cut off
existingElem["base64Data"] = existingImageData + newImageData
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
else:
# Existing image is complete - replace with new (or keep existing if new is empty)
if newImageData:
existingElem["base64Data"] = newImageData
elif newImageData:
existingElem["base64Data"] = newImageData
# Preserve other image metadata
if not existingElem.get("altText") and newElem.get("altText"):
existingElem["altText"] = newElem["altText"]
if not existingElem.get("caption") and newElem.get("caption"):
existingElem["caption"] = newElem["caption"]
else:
# GENERIC FALLBACK: Use deep recursive merging for complex nested structures
# This handles any content type with arbitrary depth and complexity
merged_element = JsonResponseHandler.mergeDeepStructures(
existingElem,
newElem,
iteration,
f"section.{contentType}"
)
existingElem = merged_element
# Update section with merged content
mergedSection = existingSection.copy()
if isinstance(existingElements, list):
# Update the last element in the list with merged content
if existingElements:
existingElements[-1] = existingElem
mergedSection["elements"] = existingElements
else:
mergedSection["elements"] = existingElem
# Preserve metadata from new section if missing in existing
if "order" not in mergedSection and "order" in newSection:
mergedSection["order"] = newSection["order"]
return mergedSection
@staticmethod
def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str:
"""
Merge two code blocks intelligently, handling overlaps and incomplete lines.
"""
if not existingCode:
return newCode
if not newCode:
return existingCode
existingLines = existingCode.rstrip().split('\n')
newLines = newCode.strip().split('\n')
if not existingLines or not newLines:
return existingCode + "\n" + newCode
lastExistingLine = existingLines[-1].strip()
firstNewLine = newLines[0].strip()
# Strategy 1: Exact overlap - remove duplicate line
if lastExistingLine == firstNewLine:
newLines = newLines[1:]
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
# Strategy 2: Incomplete line merge
# If last existing line ends with comma or is incomplete, merge with first new line
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
# Last line is incomplete - merge with first new line
# Remove trailing comma from existing line
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
existingLines[-1] = mergedLine
newLines = newLines[1:]
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
# Strategy 3: Partial overlap detection
# Check if first new line starts with the end of last existing line
elif ',' in lastExistingLine and ',' in firstNewLine:
lastExistingParts = lastExistingLine.split(',')
firstNewParts = firstNewLine.split(',')
# Check for overlap: if last part of existing matches first part of new
if lastExistingParts and firstNewParts:
lastExistingPart = lastExistingParts[-1].strip()
firstNewPart = firstNewParts[0].strip()
# If they match, there's overlap
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
# Remove overlapping part from new line
newLines[0] = ','.join(firstNewParts[1:])
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
# Reconstruct merged code
mergedCode = '\n'.join(existingLines)
if newLines:
if mergedCode and not mergedCode.endswith('\n'):
mergedCode += '\n'
mergedCode += '\n'.join(newLines)
return mergedCode
@staticmethod
def detectAndParseJsonFragment(
result: str,
allSections: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""
Detect if response is a JSON fragment (continuation content) rather than full document structure.
Fragments are continuation content that needs to be merged into existing sections.
Examples:
- Array of table rows: [["37643", "37649", ...], ...]
- Array of code lines: ["line1", "line2", ...]
- Array of list items: ["item1", "item2", ...]
Returns fragment info dict with:
- fragment_type: "table_rows", "code_lines", "list_items", etc.
- fragment_data: The parsed fragment content
- target_section_id: ID of section to merge into (if identifiable)
"""
try:
extracted = extractJsonString(result)
parsed = json.loads(extracted)
# Check if it's a JSON fragment (not full document structure)
# Fragment indicators:
# 1. It's an array (not an object)
# 2. It doesn't have "documents" or "sections" keys
# 3. It's continuation content (rows, lines, items, etc.)
if isinstance(parsed, list):
# It's an array - check if it looks like continuation content
if len(parsed) > 0:
first_item = parsed[0]
# Check if it's an array of arrays (table rows)
if isinstance(first_item, list):
# This looks like table rows: [["col1", "col2"], ["col3", "col4"], ...]
logger.debug("Detected JSON fragment: table rows array")
return {
"fragment_type": "table_rows",
"fragment_data": parsed,
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table")
}
# Check if it's an array of strings (code lines or list items)
elif isinstance(first_item, str):
# Could be code lines or list items - check context
# If we have a code_block section, it's likely code lines
# If we have a list section, it's likely list items
target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "code_block")
if target_section_id:
logger.debug("Detected JSON fragment: code lines array")
return {
"fragment_type": "code_lines",
"fragment_data": parsed,
"target_section_id": target_section_id
}
target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "bullet_list")
if target_section_id:
logger.debug("Detected JSON fragment: list items array")
return {
"fragment_type": "list_items",
"fragment_data": parsed,
"target_section_id": target_section_id
}
# Default to code lines if no context
logger.debug("Detected JSON fragment: string array (assuming code lines)")
return {
"fragment_type": "code_lines",
"fragment_data": parsed,
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block")
}
# Check if it's a partial object that's missing document structure
elif isinstance(parsed, dict):
# If it has "rows" but no "documents" or "sections", it might be a table element fragment
if "rows" in parsed and "documents" not in parsed and "sections" not in parsed:
logger.debug("Detected JSON fragment: table element with rows")
return {
"fragment_type": "table_element",
"fragment_data": parsed,
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table")
}
# If it has "code" but no "documents" or "sections", it might be a code element fragment
if "code" in parsed and "documents" not in parsed and "sections" not in parsed:
logger.debug("Detected JSON fragment: code element")
return {
"fragment_type": "code_element",
"fragment_data": parsed,
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block")
}
except Exception as e:
logger.debug(f"Error detecting JSON fragment: {e}")
return None
@staticmethod
def findTargetSectionId(
allSections: List[Dict[str, Any]],
contentType: str
) -> Optional[str]:
"""Find the last incomplete section of the given content type."""
# Find the last section with matching content type
for section in reversed(allSections):
if section.get("content_type") == contentType:
# Check if it's incomplete
if JsonResponseHandler.isSectionIncomplete(section):
return section.get("id")
# If not incomplete but it's the right type, still return it
return section.get("id")
return None
@staticmethod
def mergeFragmentIntoSection(
fragment: Dict[str, Any],
allSections: List[Dict[str, Any]],
iteration: int
) -> List[Dict[str, Any]]:
"""
Merge a JSON fragment into the appropriate section.
This handles the special case where iteration N returns continuation content
that needs to be merged into the existing structure at the overlapping point.
"""
fragment_type = fragment.get("fragment_type")
fragment_data = fragment.get("fragment_data")
target_section_id = fragment.get("target_section_id")
if not fragment_type or not fragment_data:
return allSections
# Find the target section
target_section = None
target_index = -1
for i, section in enumerate(allSections):
if section.get("id") == target_section_id:
target_section = section
target_index = i
break
# If no target section found, try to find last incomplete section of matching type
if not target_section:
for i, section in enumerate(allSections):
if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type):
if JsonResponseHandler.isSectionIncomplete(section):
target_section = section
target_index = i
break
# If still no target, find last section of matching type
if not target_section:
for i, section in enumerate(reversed(allSections)):
if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type):
target_section = section
target_index = len(allSections) - 1 - i
break
if not target_section:
logger.warning(f"Iteration {iteration}: No target section found for fragment type {fragment_type}")
return allSections
# Merge fragment into target section based on type
merged_section = target_section.copy()
elements = merged_section.get("elements", [])
if not isinstance(elements, list):
elements = [elements] if elements else []
if not elements:
# Create new element if none exists
elements = [{}]
last_element = elements[-1] if elements else {}
if not isinstance(last_element, dict):
last_element = {}
elements.append(last_element)
# Merge based on fragment type using deep recursive merging
if fragment_type == "table_rows":
existing_rows = last_element.get("rows", [])
if not isinstance(existing_rows, list):
existing_rows = []
# Merge rows with sophisticated overlap detection
new_rows = fragment_data
merged_rows = JsonResponseHandler.mergeRowsWithOverlap(existing_rows, new_rows, iteration)
last_element["rows"] = merged_rows
# Preserve headers if they exist
if not last_element.get("headers") and isinstance(fragment_data, list) and len(fragment_data) > 0:
# Try to infer headers from first row if it's a header row
first_row = fragment_data[0]
if isinstance(first_row, list) and len(first_row) > 0:
# Check if first row looks like headers (all strings, descriptive)
if all(isinstance(cell, str) for cell in first_row):
last_element["headers"] = first_row
merged_rows = merged_rows[1:] # Remove header row
last_element["rows"] = merged_rows
elif fragment_type == "code_lines":
existing_code = last_element.get("code", "")
new_lines = fragment_data
# Convert array of strings to code block
if isinstance(new_lines, list):
new_code = "\n".join(str(line) for line in new_lines)
else:
new_code = str(new_lines)
merged_code = JsonResponseHandler.mergeCodeBlocks(existing_code, new_code, iteration)
last_element["code"] = merged_code
elif fragment_type == "list_items":
existing_items = last_element.get("items", [])
if not isinstance(existing_items, list):
existing_items = []
new_items = fragment_data if isinstance(fragment_data, list) else [fragment_data]
merged_items = JsonResponseHandler.mergeItemsWithOverlap(existing_items, new_items, iteration)
last_element["items"] = merged_items
elif fragment_type == "table_element":
# Use deep recursive merge for complex table structures
# This handles nested structures, multiple overlapping rows, etc.
merged_element = JsonResponseHandler.mergeDeepStructures(
last_element,
fragment_data,
iteration,
f"section.{target_section_id}.table_element"
)
last_element = merged_element
elif fragment_type == "code_element":
# Use deep recursive merge for complex code structures
merged_element = JsonResponseHandler.mergeDeepStructures(
last_element,
fragment_data,
iteration,
f"section.{target_section_id}.code_element"
)
last_element = merged_element
else:
# Generic fragment - use deep recursive merge
# This handles any complex nested structure
merged_element = JsonResponseHandler.mergeDeepStructures(
last_element,
fragment_data,
iteration,
f"section.{target_section_id}.{fragment_type}"
)
last_element = merged_element
# Update elements
elements[-1] = last_element
merged_section["elements"] = elements
# Update allSections
merged_sections = allSections.copy()
merged_sections[target_index] = merged_section
logger.info(f"Iteration {iteration}: Merged {fragment_type} fragment into section '{target_section_id}'")
return merged_sections
@staticmethod
def getContentTypeForFragment(fragment_type: str) -> str:
"""Map fragment type to content type."""
mapping = {
"table_rows": "table",
"table_element": "table",
"code_lines": "code_block",
"code_element": "code_block",
"list_items": "bullet_list"
}
return mapping.get(fragment_type, "paragraph")
@staticmethod
def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool:
"""
Deep recursive comparison of two JSON-serializable objects.
Handles nested structures of any depth and complexity.
Args:
obj1: First object to compare
obj2: Second object to compare
max_depth: Maximum recursion depth to prevent infinite loops
Returns:
True if objects are deeply equal, False otherwise
"""
if max_depth <= 0:
return False
# Type check
if type(obj1) != type(obj2):
return False
# Primitive types
if isinstance(obj1, (str, int, float, bool, type(None))):
return obj1 == obj2
# Lists/arrays - compare element by element
if isinstance(obj1, list):
if len(obj1) != len(obj2):
return False
return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1)
for item1, item2 in zip(obj1, obj2))
# Dicts/objects - compare key by key
if isinstance(obj1, dict):
if set(obj1.keys()) != set(obj2.keys()):
return False
return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1)
for key in obj1.keys())
# Fallback for other types
return obj1 == obj2
@staticmethod
def findLongestCommonSuffix(
existing_list: List[Any],
new_list: List[Any],
min_overlap: int = 1
) -> int:
"""
Find the longest common suffix of existing_list that matches a prefix of new_list.
This handles cases where multiple elements overlap:
- existing: [A, B, C, D]
- new: [C, D, E, F]
- overlap: [C, D] (length 2)
Returns the length of the overlap (0 if no overlap found).
"""
if not existing_list or not new_list:
return 0
max_overlap = min(len(existing_list), len(new_list))
# Try all possible overlap lengths (from longest to shortest)
for overlap_len in range(max_overlap, min_overlap - 1, -1):
existing_suffix = existing_list[-overlap_len:]
new_prefix = new_list[:overlap_len]
# Deep compare suffix and prefix
if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i])
for i in range(overlap_len)):
return overlap_len
return 0
@staticmethod
def findPartialOverlap(
existing_item: Any,
new_item: Any
) -> Tuple[bool, Optional[Any]]:
"""
Detect if new_item completes an incomplete existing_item.
Handles cases like:
- existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"]
- new: ["37643", "37649", ...]
Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None).
"""
# Check if both are lists
if isinstance(existing_item, list) and isinstance(new_item, list):
if not existing_item or not new_item:
return False, None
# Check if last element of existing is incomplete and matches first of new
last_existing = existing_item[-1]
first_new = new_item[0]
# If last existing is a string and first new is a string
if isinstance(last_existing, str) and isinstance(first_new, str):
# Check if last existing is incomplete (very short, ends with number, etc.)
if len(last_existing) < 10 and first_new.startswith(last_existing):
# Partial overlap - merge them
merged_last = last_existing + first_new[len(last_existing):]
merged_item = existing_item[:-1] + [merged_last] + new_item[1:]
return True, merged_item
# Check if last existing is incomplete list and first new completes it
if isinstance(last_existing, list) and isinstance(first_new, list):
if len(last_existing) < len(first_new):
# Check if last existing is prefix of first new
if first_new[:len(last_existing)] == last_existing:
# Merge: replace incomplete last with complete first
merged_item = existing_item[:-1] + [first_new] + new_item[1:]
return True, merged_item
# Check if existing is incomplete string and new completes it
if isinstance(existing_item, str) and isinstance(new_item, str):
if len(existing_item) < 50 and new_item.startswith(existing_item):
# Partial overlap
merged = existing_item + new_item[len(existing_item):]
return True, merged
return False, None
@staticmethod
def mergeRowsWithOverlap(
existing_rows: List[List[str]],
new_rows: List[List[str]],
iteration: int
) -> List[List[str]]:
"""
Merge table rows with sophisticated overlap detection.
Handles multiple overlapping rows and partial overlaps.
"""
if not new_rows:
return existing_rows
if not existing_rows:
return new_rows
# Strategy 1: Find longest common suffix/prefix overlap
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1)
if overlap_len > 0:
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates")
return existing_rows + new_rows[overlap_len:]
# Strategy 2: Check for partial overlap in last row
if len(existing_rows) > 0 and len(new_rows) > 0:
last_existing = existing_rows[-1]
first_new = new_rows[0]
is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new)
if is_partial:
logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging")
return existing_rows[:-1] + [merged_row] + new_rows[1:]
# Strategy 3: Simple first/last comparison (fallback)
if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list):
if list(existing_rows[-1]) == list(new_rows[0]):
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
return existing_rows + new_rows[1:]
# No overlap detected - append all new rows
return existing_rows + new_rows
@staticmethod
def mergeItemsWithOverlap(
existing_items: List[str],
new_items: List[str],
iteration: int
) -> List[str]:
"""
Merge list items with sophisticated overlap detection.
Handles multiple overlapping items and partial overlaps.
"""
if not new_items:
return existing_items
if not existing_items:
return new_items
# Strategy 1: Find longest common suffix/prefix overlap
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1)
if overlap_len > 0:
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates")
return existing_items + new_items[overlap_len:]
# Strategy 2: Check for partial overlap in last item
if len(existing_items) > 0 and len(new_items) > 0:
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0])
if is_partial:
logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging")
return existing_items[:-1] + [merged_item] + new_items[1:]
# Strategy 3: Simple first/last comparison (fallback)
if existing_items[-1] == new_items[0]:
logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)")
return existing_items + new_items[1:]
# No overlap detected - append all new items
return existing_items + new_items
@staticmethod
def mergeDeepStructures(
existing: Any,
new: Any,
iteration: int,
path: str = "root"
) -> Any:
"""
Recursively merge two JSON structures of arbitrary depth and complexity.
Handles overlaps at any nesting level.
Args:
existing: Existing structure to merge into
new: New structure to merge
iteration: Current iteration number for logging
path: Current path in structure (for debugging)
Returns:
Merged structure
"""
# Type check
if type(existing) != type(new):
# Types don't match - return new (replacement)
logger.debug(f"Iteration {iteration}: Types don't match at {path}, replacing")
return new
# Lists/arrays - merge with overlap detection
if isinstance(existing, list) and isinstance(new, list):
if not new:
return existing
if not existing:
return new
# Try to find overlap
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1)
if overlap_len > 0:
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates")
return existing + new[overlap_len:]
# Check for partial overlap in last element
if len(existing) > 0 and len(new) > 0:
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0])
if is_partial:
logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging")
return existing[:-1] + [merged_item] + new[1:]
# No overlap - append all
return existing + new
# Dicts/objects - merge recursively
if isinstance(existing, dict) and isinstance(new, dict):
merged = existing.copy()
for key, new_value in new.items():
if key in merged:
# Key exists - merge recursively
merged[key] = JsonResponseHandler.mergeDeepStructures(
merged[key],
new_value,
iteration,
f"{path}.{key}"
)
else:
# New key - add it
merged[key] = new_value
return merged
# Primitives - if equal, return existing; otherwise return new
if existing == new:
return existing
return new