1022 lines
46 KiB
Python
1022 lines
46 KiB
Python
"""
|
|
JSON Response Handling Module
|
|
|
|
Handles merging of JSON responses from multiple AI iterations, including:
|
|
- Section merging with intelligent overlap detection
|
|
- JSON fragment detection and merging
|
|
- Deep recursive structure merging
|
|
- Overlap detection for complex nested structures
|
|
"""
|
|
import json
|
|
import logging
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
|
|
from modules.shared.jsonUtils import extractJsonString
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JsonResponseHandler:
|
|
"""Handles JSON response merging and fragment detection for iterative AI generation."""
|
|
|
|
@staticmethod
|
|
def mergeSectionsIntelligently(
|
|
existingSections: List[Dict[str, Any]],
|
|
newSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Intelligently merge sections from multiple iterations.
|
|
|
|
This is a GENERIC merging strategy that handles broken JSON iterations.
|
|
The break can occur anywhere - in any section, at any depth.
|
|
|
|
Merging strategies (in order of priority):
|
|
1. Same Section ID: Merge sections with identical IDs
|
|
2. Same Content-Type + Position: If last section is incomplete and new section continues it
|
|
3. Same Order: Merge sections with same order value
|
|
4. Structural Analysis: Detect continuation based on content structure
|
|
|
|
Args:
|
|
existingSections: Sections accumulated from previous iterations
|
|
newSections: Sections extracted from current iteration
|
|
iteration: Current iteration number
|
|
|
|
Returns:
|
|
Merged list of sections
|
|
"""
|
|
if not newSections:
|
|
return existingSections
|
|
|
|
if not existingSections:
|
|
return newSections
|
|
|
|
mergedSections = existingSections.copy()
|
|
|
|
for newSection in newSections:
|
|
merged = False
|
|
|
|
# Strategy 1: Same Section ID - merge directly
|
|
newSectionId = newSection.get("id")
|
|
if newSectionId:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
if existingSection.get("id") == newSectionId:
|
|
# Merge sections with same ID
|
|
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
|
|
existingSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 2: Same Content-Type + Position (continuation detection)
|
|
# Check if last section is incomplete and new section continues it
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
if lastContentType == newContentType:
|
|
# Same content type - check if last section is incomplete
|
|
if JsonResponseHandler.isSectionIncomplete(lastSection):
|
|
# Last section is incomplete, merge with new section
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
|
|
continue
|
|
|
|
# Strategy 3: Same Order value
|
|
newOrder = newSection.get("order")
|
|
if newOrder is not None:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
existingOrder = existingSection.get("order")
|
|
if existingOrder is not None and existingOrder == newOrder:
|
|
# Merge sections with same order
|
|
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
|
|
existingSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 4: Structural Analysis - detect continuation
|
|
# For code_block and table: if last section matches new section type, merge them
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
# Both are code blocks - merge them
|
|
if lastContentType == "code_block" and newContentType == "code_block":
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
|
|
continue
|
|
|
|
# Both are tables - merge them (common case for broken JSON iterations)
|
|
if lastContentType == "table" and newContentType == "table":
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
|
|
continue
|
|
|
|
# No merge strategy matched - add as new section
|
|
if not merged:
|
|
mergedSections.append(newSection)
|
|
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
|
|
|
|
return mergedSections
|
|
|
|
@staticmethod
|
|
def isSectionIncomplete(section: Dict[str, Any]) -> bool:
|
|
"""
|
|
Check if a section is incomplete (broken at the end).
|
|
|
|
This detects incomplete sections based on content analysis:
|
|
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
|
|
- Text sections: ends mid-sentence, ends with incomplete structure
|
|
- Other types: check for incomplete elements
|
|
"""
|
|
contentType = section.get("content_type", "")
|
|
elements = section.get("elements", [])
|
|
|
|
if not elements:
|
|
return False
|
|
|
|
# Handle list of elements
|
|
if isinstance(elements, list) and len(elements) > 0:
|
|
lastElement = elements[-1]
|
|
else:
|
|
lastElement = elements
|
|
|
|
if not isinstance(lastElement, dict):
|
|
return False
|
|
|
|
# Check code_block for incomplete code
|
|
if contentType == "code_block":
|
|
code = lastElement.get("code", "")
|
|
if code:
|
|
# Check if code ends incompletely:
|
|
# - Ends with comma (incomplete CSV line)
|
|
# - Ends with number but no newline (incomplete line)
|
|
# - Ends mid-token (e.g., "23431,23" - incomplete number)
|
|
codeStripped = code.rstrip()
|
|
if codeStripped:
|
|
# Check for incomplete patterns
|
|
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
|
|
# Ends with comma or has comma but no final newline - likely incomplete
|
|
return True
|
|
# Check if last line is incomplete (doesn't end with newline and has partial content)
|
|
if not code.endswith('\n') and codeStripped:
|
|
# No final newline - might be incomplete
|
|
# More sophisticated: check if last number is complete
|
|
lastLine = codeStripped.split('\n')[-1]
|
|
if lastLine and ',' in lastLine:
|
|
# Has commas but might be incomplete
|
|
parts = lastLine.split(',')
|
|
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
|
|
return True
|
|
|
|
# Check table for incomplete rows
|
|
if contentType == "table":
|
|
rows = lastElement.get("rows", [])
|
|
if rows:
|
|
# Check if last row is incomplete (ends with incomplete data)
|
|
lastRow = rows[-1] if isinstance(rows, list) else []
|
|
if isinstance(lastRow, list) and lastRow:
|
|
# Check if last row ends with incomplete data (e.g., incomplete string)
|
|
lastCell = lastRow[-1] if lastRow else ""
|
|
if isinstance(lastCell, str):
|
|
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
|
|
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
|
|
return True
|
|
# Also check if last row doesn't have expected number of columns (if headers exist)
|
|
headers = lastElement.get("headers", [])
|
|
if headers and isinstance(headers, list):
|
|
expectedCols = len(headers)
|
|
if len(lastRow) < expectedCols:
|
|
return True
|
|
|
|
# Check paragraph/text for incomplete sentences
|
|
if contentType in ["paragraph", "heading"]:
|
|
text = lastElement.get("text", "")
|
|
if text:
|
|
# Simple heuristic: if doesn't end with sentence-ending punctuation
|
|
textStripped = text.rstrip()
|
|
if textStripped and not textStripped[-1] in '.!?':
|
|
# Might be incomplete, but this is less reliable
|
|
# Only mark as incomplete if very short (likely cut off)
|
|
if len(textStripped) < 20:
|
|
return True
|
|
|
|
# Check lists for incomplete items
|
|
if contentType in ["bullet_list", "numbered_list"]:
|
|
items = lastElement.get("items", [])
|
|
if items and isinstance(items, list):
|
|
# Check if last item is incomplete (very short or ends with incomplete string)
|
|
lastItem = items[-1] if items else None
|
|
if isinstance(lastItem, str) and len(lastItem) < 3:
|
|
return True
|
|
|
|
# Check image for incomplete base64 data
|
|
if contentType == "image":
|
|
imageData = lastElement.get("base64Data", "")
|
|
if imageData:
|
|
# Base64 strings should end with padding ('=' or '==')
|
|
# If it doesn't, it might be incomplete
|
|
stripped = imageData.rstrip()
|
|
if stripped and not stripped.endswith(('=', '==')):
|
|
# Check if it's a valid base64 character sequence that was cut off
|
|
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
|
|
return True
|
|
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
|
|
if len(stripped) % 4 != 0:
|
|
return True
|
|
|
|
# GENERIC CHECK: Look for incomplete structures in any element
|
|
# Check if element has arrays/lists that might be incomplete
|
|
for key, value in lastElement.items():
|
|
if isinstance(value, list) and len(value) > 0:
|
|
# Check last item in list
|
|
lastItem = value[-1]
|
|
if isinstance(lastItem, str):
|
|
# If last string item is very short, might be incomplete
|
|
if len(lastItem) < 3:
|
|
return True
|
|
elif isinstance(lastItem, dict):
|
|
# If last dict item has very few keys, might be incomplete
|
|
if len(lastItem) < 2:
|
|
return True
|
|
elif isinstance(value, str):
|
|
# Check if string ends abruptly (no punctuation, very short)
|
|
if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
|
|
return True
|
|
|
|
return False
|
|
|
|
@staticmethod
|
|
def mergeSectionContent(
|
|
existingSection: Dict[str, Any],
|
|
newSection: Dict[str, Any],
|
|
iteration: int
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Merge content from two sections.
|
|
|
|
Handles different content types:
|
|
- code_block: Append code, handle overlaps, merge incomplete lines
|
|
- paragraph/heading: Append text
|
|
- table: Merge rows
|
|
- list: Merge items
|
|
- Other: Merge elements
|
|
"""
|
|
contentType = existingSection.get("content_type", "")
|
|
existingElements = existingSection.get("elements", [])
|
|
newElements = newSection.get("elements", [])
|
|
|
|
if not newElements:
|
|
return existingSection
|
|
|
|
# Handle list of elements
|
|
if isinstance(existingElements, list):
|
|
existingElem = existingElements[-1] if existingElements else {}
|
|
else:
|
|
existingElem = existingElements
|
|
|
|
if isinstance(newElements, list):
|
|
newElem = newElements[0] if newElements else {}
|
|
else:
|
|
newElem = newElements
|
|
|
|
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
|
|
return existingSection
|
|
|
|
# Merge based on content type
|
|
if contentType == "code_block":
|
|
existingCode = existingElem.get("code", "")
|
|
newCode = newElem.get("code", "")
|
|
|
|
if existingCode and newCode:
|
|
mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration)
|
|
existingElem["code"] = mergedCode
|
|
# Preserve language from existing or new
|
|
if "language" not in existingElem and "language" in newElem:
|
|
existingElem["language"] = newElem["language"]
|
|
|
|
elif contentType in ["paragraph", "heading"]:
|
|
existingText = existingElem.get("text", "")
|
|
newText = newElem.get("text", "")
|
|
|
|
if existingText and newText:
|
|
# Append text with space if needed
|
|
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
|
|
mergedText = existingText.rstrip() + " " + newText.lstrip()
|
|
else:
|
|
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
|
|
existingElem["text"] = mergedText
|
|
|
|
elif contentType == "table":
|
|
# Merge table rows with sophisticated overlap detection
|
|
existingRows = existingElem.get("rows", [])
|
|
newRows = newElem.get("rows", [])
|
|
if existingRows and newRows:
|
|
# Use sophisticated overlap detection that handles multiple overlapping rows
|
|
mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration)
|
|
existingElem["rows"] = mergedRows
|
|
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}")
|
|
elif newRows:
|
|
# If existing has no rows but new does, use new rows
|
|
existingElem["rows"] = newRows
|
|
# Preserve headers from existing (or use new if existing has none)
|
|
if not existingElem.get("headers") and newElem.get("headers"):
|
|
existingElem["headers"] = newElem["headers"]
|
|
# Preserve caption from existing (or use new if existing has none)
|
|
if not existingElem.get("caption") and newElem.get("caption"):
|
|
existingElem["caption"] = newElem.get("caption")
|
|
|
|
elif contentType in ["bullet_list", "numbered_list"]:
|
|
# Merge list items with sophisticated overlap detection
|
|
existingItems = existingElem.get("items", [])
|
|
newItems = newElem.get("items", [])
|
|
if existingItems and newItems:
|
|
mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration)
|
|
existingElem["items"] = mergedItems
|
|
elif newItems:
|
|
existingElem["items"] = newItems
|
|
|
|
elif contentType == "image":
|
|
# Images are typically complete - if new image is provided, replace existing
|
|
# But check if existing image data is incomplete (e.g., base64 string cut off)
|
|
existingImageData = existingElem.get("base64Data", "")
|
|
newImageData = newElem.get("base64Data", "")
|
|
if existingImageData and newImageData:
|
|
# If existing image data doesn't end with valid base64 padding, it might be incomplete
|
|
# Base64 padding is '=' or '==' at the end
|
|
if not existingImageData.rstrip().endswith(('=', '==')):
|
|
# Existing image might be incomplete - merge by appending new data
|
|
# This handles cases where base64 string was cut off
|
|
existingElem["base64Data"] = existingImageData + newImageData
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
|
|
else:
|
|
# Existing image is complete - replace with new (or keep existing if new is empty)
|
|
if newImageData:
|
|
existingElem["base64Data"] = newImageData
|
|
elif newImageData:
|
|
existingElem["base64Data"] = newImageData
|
|
# Preserve other image metadata
|
|
if not existingElem.get("altText") and newElem.get("altText"):
|
|
existingElem["altText"] = newElem["altText"]
|
|
if not existingElem.get("caption") and newElem.get("caption"):
|
|
existingElem["caption"] = newElem["caption"]
|
|
|
|
else:
|
|
# GENERIC FALLBACK: Use deep recursive merging for complex nested structures
|
|
# This handles any content type with arbitrary depth and complexity
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
existingElem,
|
|
newElem,
|
|
iteration,
|
|
f"section.{contentType}"
|
|
)
|
|
existingElem = merged_element
|
|
|
|
# Update section with merged content
|
|
mergedSection = existingSection.copy()
|
|
if isinstance(existingElements, list):
|
|
# Update the last element in the list with merged content
|
|
if existingElements:
|
|
existingElements[-1] = existingElem
|
|
mergedSection["elements"] = existingElements
|
|
else:
|
|
mergedSection["elements"] = existingElem
|
|
|
|
# Preserve metadata from new section if missing in existing
|
|
if "order" not in mergedSection and "order" in newSection:
|
|
mergedSection["order"] = newSection["order"]
|
|
|
|
return mergedSection
|
|
|
|
@staticmethod
|
|
def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str:
|
|
"""
|
|
Merge two code blocks intelligently, handling overlaps and incomplete lines.
|
|
"""
|
|
if not existingCode:
|
|
return newCode
|
|
if not newCode:
|
|
return existingCode
|
|
|
|
existingLines = existingCode.rstrip().split('\n')
|
|
newLines = newCode.strip().split('\n')
|
|
|
|
if not existingLines or not newLines:
|
|
return existingCode + "\n" + newCode
|
|
|
|
lastExistingLine = existingLines[-1].strip()
|
|
firstNewLine = newLines[0].strip()
|
|
|
|
# Strategy 1: Exact overlap - remove duplicate line
|
|
if lastExistingLine == firstNewLine:
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
|
|
|
|
# Strategy 2: Incomplete line merge
|
|
# If last existing line ends with comma or is incomplete, merge with first new line
|
|
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
|
|
# Last line is incomplete - merge with first new line
|
|
# Remove trailing comma from existing line
|
|
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
|
|
existingLines[-1] = mergedLine
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
|
|
|
|
# Strategy 3: Partial overlap detection
|
|
# Check if first new line starts with the end of last existing line
|
|
elif ',' in lastExistingLine and ',' in firstNewLine:
|
|
lastExistingParts = lastExistingLine.split(',')
|
|
firstNewParts = firstNewLine.split(',')
|
|
|
|
# Check for overlap: if last part of existing matches first part of new
|
|
if lastExistingParts and firstNewParts:
|
|
lastExistingPart = lastExistingParts[-1].strip()
|
|
firstNewPart = firstNewParts[0].strip()
|
|
|
|
# If they match, there's overlap
|
|
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
|
|
# Remove overlapping part from new line
|
|
newLines[0] = ','.join(firstNewParts[1:])
|
|
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
|
|
|
|
# Reconstruct merged code
|
|
mergedCode = '\n'.join(existingLines)
|
|
if newLines:
|
|
if mergedCode and not mergedCode.endswith('\n'):
|
|
mergedCode += '\n'
|
|
mergedCode += '\n'.join(newLines)
|
|
|
|
return mergedCode
|
|
|
|
@staticmethod
|
|
def detectAndParseJsonFragment(
|
|
result: str,
|
|
allSections: List[Dict[str, Any]]
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Detect if response is a JSON fragment (continuation content) rather than full document structure.
|
|
|
|
Fragments are continuation content that needs to be merged into existing sections.
|
|
Examples:
|
|
- Array of table rows: [["37643", "37649", ...], ...]
|
|
- Array of code lines: ["line1", "line2", ...]
|
|
- Array of list items: ["item1", "item2", ...]
|
|
|
|
Returns fragment info dict with:
|
|
- fragment_type: "table_rows", "code_lines", "list_items", etc.
|
|
- fragment_data: The parsed fragment content
|
|
- target_section_id: ID of section to merge into (if identifiable)
|
|
"""
|
|
try:
|
|
extracted = extractJsonString(result)
|
|
parsed = json.loads(extracted)
|
|
|
|
# Check if it's a JSON fragment (not full document structure)
|
|
# Fragment indicators:
|
|
# 1. It's an array (not an object)
|
|
# 2. It doesn't have "documents" or "sections" keys
|
|
# 3. It's continuation content (rows, lines, items, etc.)
|
|
|
|
if isinstance(parsed, list):
|
|
# It's an array - check if it looks like continuation content
|
|
if len(parsed) > 0:
|
|
first_item = parsed[0]
|
|
|
|
# Check if it's an array of arrays (table rows)
|
|
if isinstance(first_item, list):
|
|
# This looks like table rows: [["col1", "col2"], ["col3", "col4"], ...]
|
|
logger.debug("Detected JSON fragment: table rows array")
|
|
return {
|
|
"fragment_type": "table_rows",
|
|
"fragment_data": parsed,
|
|
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table")
|
|
}
|
|
|
|
# Check if it's an array of strings (code lines or list items)
|
|
elif isinstance(first_item, str):
|
|
# Could be code lines or list items - check context
|
|
# If we have a code_block section, it's likely code lines
|
|
# If we have a list section, it's likely list items
|
|
target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "code_block")
|
|
if target_section_id:
|
|
logger.debug("Detected JSON fragment: code lines array")
|
|
return {
|
|
"fragment_type": "code_lines",
|
|
"fragment_data": parsed,
|
|
"target_section_id": target_section_id
|
|
}
|
|
|
|
target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "bullet_list")
|
|
if target_section_id:
|
|
logger.debug("Detected JSON fragment: list items array")
|
|
return {
|
|
"fragment_type": "list_items",
|
|
"fragment_data": parsed,
|
|
"target_section_id": target_section_id
|
|
}
|
|
|
|
# Default to code lines if no context
|
|
logger.debug("Detected JSON fragment: string array (assuming code lines)")
|
|
return {
|
|
"fragment_type": "code_lines",
|
|
"fragment_data": parsed,
|
|
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block")
|
|
}
|
|
|
|
# Check if it's a partial object that's missing document structure
|
|
elif isinstance(parsed, dict):
|
|
# If it has "rows" but no "documents" or "sections", it might be a table element fragment
|
|
if "rows" in parsed and "documents" not in parsed and "sections" not in parsed:
|
|
logger.debug("Detected JSON fragment: table element with rows")
|
|
return {
|
|
"fragment_type": "table_element",
|
|
"fragment_data": parsed,
|
|
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table")
|
|
}
|
|
|
|
# If it has "code" but no "documents" or "sections", it might be a code element fragment
|
|
if "code" in parsed and "documents" not in parsed and "sections" not in parsed:
|
|
logger.debug("Detected JSON fragment: code element")
|
|
return {
|
|
"fragment_type": "code_element",
|
|
"fragment_data": parsed,
|
|
"target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block")
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Error detecting JSON fragment: {e}")
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def findTargetSectionId(
|
|
allSections: List[Dict[str, Any]],
|
|
contentType: str
|
|
) -> Optional[str]:
|
|
"""Find the last incomplete section of the given content type."""
|
|
# Find the last section with matching content type
|
|
for section in reversed(allSections):
|
|
if section.get("content_type") == contentType:
|
|
# Check if it's incomplete
|
|
if JsonResponseHandler.isSectionIncomplete(section):
|
|
return section.get("id")
|
|
# If not incomplete but it's the right type, still return it
|
|
return section.get("id")
|
|
return None
|
|
|
|
@staticmethod
|
|
def mergeFragmentIntoSection(
|
|
fragment: Dict[str, Any],
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Merge a JSON fragment into the appropriate section.
|
|
|
|
This handles the special case where iteration N returns continuation content
|
|
that needs to be merged into the existing structure at the overlapping point.
|
|
"""
|
|
fragment_type = fragment.get("fragment_type")
|
|
fragment_data = fragment.get("fragment_data")
|
|
target_section_id = fragment.get("target_section_id")
|
|
|
|
if not fragment_type or not fragment_data:
|
|
return allSections
|
|
|
|
# Find the target section
|
|
target_section = None
|
|
target_index = -1
|
|
for i, section in enumerate(allSections):
|
|
if section.get("id") == target_section_id:
|
|
target_section = section
|
|
target_index = i
|
|
break
|
|
|
|
# If no target section found, try to find last incomplete section of matching type
|
|
if not target_section:
|
|
for i, section in enumerate(allSections):
|
|
if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type):
|
|
if JsonResponseHandler.isSectionIncomplete(section):
|
|
target_section = section
|
|
target_index = i
|
|
break
|
|
|
|
# If still no target, find last section of matching type
|
|
if not target_section:
|
|
for i, section in enumerate(reversed(allSections)):
|
|
if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type):
|
|
target_section = section
|
|
target_index = len(allSections) - 1 - i
|
|
break
|
|
|
|
if not target_section:
|
|
logger.warning(f"Iteration {iteration}: No target section found for fragment type {fragment_type}")
|
|
return allSections
|
|
|
|
# Merge fragment into target section based on type
|
|
merged_section = target_section.copy()
|
|
elements = merged_section.get("elements", [])
|
|
|
|
if not isinstance(elements, list):
|
|
elements = [elements] if elements else []
|
|
|
|
if not elements:
|
|
# Create new element if none exists
|
|
elements = [{}]
|
|
|
|
last_element = elements[-1] if elements else {}
|
|
if not isinstance(last_element, dict):
|
|
last_element = {}
|
|
elements.append(last_element)
|
|
|
|
# Merge based on fragment type using deep recursive merging
|
|
if fragment_type == "table_rows":
|
|
existing_rows = last_element.get("rows", [])
|
|
if not isinstance(existing_rows, list):
|
|
existing_rows = []
|
|
|
|
# Merge rows with sophisticated overlap detection
|
|
new_rows = fragment_data
|
|
merged_rows = JsonResponseHandler.mergeRowsWithOverlap(existing_rows, new_rows, iteration)
|
|
last_element["rows"] = merged_rows
|
|
|
|
# Preserve headers if they exist
|
|
if not last_element.get("headers") and isinstance(fragment_data, list) and len(fragment_data) > 0:
|
|
# Try to infer headers from first row if it's a header row
|
|
first_row = fragment_data[0]
|
|
if isinstance(first_row, list) and len(first_row) > 0:
|
|
# Check if first row looks like headers (all strings, descriptive)
|
|
if all(isinstance(cell, str) for cell in first_row):
|
|
last_element["headers"] = first_row
|
|
merged_rows = merged_rows[1:] # Remove header row
|
|
last_element["rows"] = merged_rows
|
|
|
|
elif fragment_type == "code_lines":
|
|
existing_code = last_element.get("code", "")
|
|
new_lines = fragment_data
|
|
|
|
# Convert array of strings to code block
|
|
if isinstance(new_lines, list):
|
|
new_code = "\n".join(str(line) for line in new_lines)
|
|
else:
|
|
new_code = str(new_lines)
|
|
|
|
merged_code = JsonResponseHandler.mergeCodeBlocks(existing_code, new_code, iteration)
|
|
last_element["code"] = merged_code
|
|
|
|
elif fragment_type == "list_items":
|
|
existing_items = last_element.get("items", [])
|
|
if not isinstance(existing_items, list):
|
|
existing_items = []
|
|
|
|
new_items = fragment_data if isinstance(fragment_data, list) else [fragment_data]
|
|
merged_items = JsonResponseHandler.mergeItemsWithOverlap(existing_items, new_items, iteration)
|
|
last_element["items"] = merged_items
|
|
|
|
elif fragment_type == "table_element":
|
|
# Use deep recursive merge for complex table structures
|
|
# This handles nested structures, multiple overlapping rows, etc.
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
last_element,
|
|
fragment_data,
|
|
iteration,
|
|
f"section.{target_section_id}.table_element"
|
|
)
|
|
last_element = merged_element
|
|
|
|
elif fragment_type == "code_element":
|
|
# Use deep recursive merge for complex code structures
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
last_element,
|
|
fragment_data,
|
|
iteration,
|
|
f"section.{target_section_id}.code_element"
|
|
)
|
|
last_element = merged_element
|
|
|
|
else:
|
|
# Generic fragment - use deep recursive merge
|
|
# This handles any complex nested structure
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
last_element,
|
|
fragment_data,
|
|
iteration,
|
|
f"section.{target_section_id}.{fragment_type}"
|
|
)
|
|
last_element = merged_element
|
|
|
|
# Update elements
|
|
elements[-1] = last_element
|
|
merged_section["elements"] = elements
|
|
|
|
# Update allSections
|
|
merged_sections = allSections.copy()
|
|
merged_sections[target_index] = merged_section
|
|
|
|
logger.info(f"Iteration {iteration}: Merged {fragment_type} fragment into section '{target_section_id}'")
|
|
return merged_sections
|
|
|
|
@staticmethod
|
|
def getContentTypeForFragment(fragment_type: str) -> str:
|
|
"""Map fragment type to content type."""
|
|
mapping = {
|
|
"table_rows": "table",
|
|
"table_element": "table",
|
|
"code_lines": "code_block",
|
|
"code_element": "code_block",
|
|
"list_items": "bullet_list"
|
|
}
|
|
return mapping.get(fragment_type, "paragraph")
|
|
|
|
@staticmethod
|
|
def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool:
|
|
"""
|
|
Deep recursive comparison of two JSON-serializable objects.
|
|
Handles nested structures of any depth and complexity.
|
|
|
|
Args:
|
|
obj1: First object to compare
|
|
obj2: Second object to compare
|
|
max_depth: Maximum recursion depth to prevent infinite loops
|
|
|
|
Returns:
|
|
True if objects are deeply equal, False otherwise
|
|
"""
|
|
if max_depth <= 0:
|
|
return False
|
|
|
|
# Type check
|
|
if type(obj1) != type(obj2):
|
|
return False
|
|
|
|
# Primitive types
|
|
if isinstance(obj1, (str, int, float, bool, type(None))):
|
|
return obj1 == obj2
|
|
|
|
# Lists/arrays - compare element by element
|
|
if isinstance(obj1, list):
|
|
if len(obj1) != len(obj2):
|
|
return False
|
|
return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1)
|
|
for item1, item2 in zip(obj1, obj2))
|
|
|
|
# Dicts/objects - compare key by key
|
|
if isinstance(obj1, dict):
|
|
if set(obj1.keys()) != set(obj2.keys()):
|
|
return False
|
|
return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1)
|
|
for key in obj1.keys())
|
|
|
|
# Fallback for other types
|
|
return obj1 == obj2
|
|
|
|
@staticmethod
|
|
def findLongestCommonSuffix(
|
|
existing_list: List[Any],
|
|
new_list: List[Any],
|
|
min_overlap: int = 1
|
|
) -> int:
|
|
"""
|
|
Find the longest common suffix of existing_list that matches a prefix of new_list.
|
|
|
|
This handles cases where multiple elements overlap:
|
|
- existing: [A, B, C, D]
|
|
- new: [C, D, E, F]
|
|
- overlap: [C, D] (length 2)
|
|
|
|
Returns the length of the overlap (0 if no overlap found).
|
|
"""
|
|
if not existing_list or not new_list:
|
|
return 0
|
|
|
|
max_overlap = min(len(existing_list), len(new_list))
|
|
|
|
# Try all possible overlap lengths (from longest to shortest)
|
|
for overlap_len in range(max_overlap, min_overlap - 1, -1):
|
|
existing_suffix = existing_list[-overlap_len:]
|
|
new_prefix = new_list[:overlap_len]
|
|
|
|
# Deep compare suffix and prefix
|
|
if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i])
|
|
for i in range(overlap_len)):
|
|
return overlap_len
|
|
|
|
return 0
|
|
|
|
@staticmethod
|
|
def findPartialOverlap(
|
|
existing_item: Any,
|
|
new_item: Any
|
|
) -> Tuple[bool, Optional[Any]]:
|
|
"""
|
|
Detect if new_item completes an incomplete existing_item.
|
|
|
|
Handles cases like:
|
|
- existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"]
|
|
- new: ["37643", "37649", ...]
|
|
|
|
Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None).
|
|
"""
|
|
# Check if both are lists
|
|
if isinstance(existing_item, list) and isinstance(new_item, list):
|
|
if not existing_item or not new_item:
|
|
return False, None
|
|
|
|
# Check if last element of existing is incomplete and matches first of new
|
|
last_existing = existing_item[-1]
|
|
first_new = new_item[0]
|
|
|
|
# If last existing is a string and first new is a string
|
|
if isinstance(last_existing, str) and isinstance(first_new, str):
|
|
# Check if last existing is incomplete (very short, ends with number, etc.)
|
|
if len(last_existing) < 10 and first_new.startswith(last_existing):
|
|
# Partial overlap - merge them
|
|
merged_last = last_existing + first_new[len(last_existing):]
|
|
merged_item = existing_item[:-1] + [merged_last] + new_item[1:]
|
|
return True, merged_item
|
|
|
|
# Check if last existing is incomplete list and first new completes it
|
|
if isinstance(last_existing, list) and isinstance(first_new, list):
|
|
if len(last_existing) < len(first_new):
|
|
# Check if last existing is prefix of first new
|
|
if first_new[:len(last_existing)] == last_existing:
|
|
# Merge: replace incomplete last with complete first
|
|
merged_item = existing_item[:-1] + [first_new] + new_item[1:]
|
|
return True, merged_item
|
|
|
|
# Check if existing is incomplete string and new completes it
|
|
if isinstance(existing_item, str) and isinstance(new_item, str):
|
|
if len(existing_item) < 50 and new_item.startswith(existing_item):
|
|
# Partial overlap
|
|
merged = existing_item + new_item[len(existing_item):]
|
|
return True, merged
|
|
|
|
return False, None
|
|
|
|
@staticmethod
|
|
def mergeRowsWithOverlap(
|
|
existing_rows: List[List[str]],
|
|
new_rows: List[List[str]],
|
|
iteration: int
|
|
) -> List[List[str]]:
|
|
"""
|
|
Merge table rows with sophisticated overlap detection.
|
|
Handles multiple overlapping rows and partial overlaps.
|
|
"""
|
|
if not new_rows:
|
|
return existing_rows
|
|
if not existing_rows:
|
|
return new_rows
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates")
|
|
return existing_rows + new_rows[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last row
|
|
if len(existing_rows) > 0 and len(new_rows) > 0:
|
|
last_existing = existing_rows[-1]
|
|
first_new = new_rows[0]
|
|
|
|
is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new)
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging")
|
|
return existing_rows[:-1] + [merged_row] + new_rows[1:]
|
|
|
|
# Strategy 3: Simple first/last comparison (fallback)
|
|
if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list):
|
|
if list(existing_rows[-1]) == list(new_rows[0]):
|
|
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
|
|
return existing_rows + new_rows[1:]
|
|
|
|
# No overlap detected - append all new rows
|
|
return existing_rows + new_rows
|
|
|
|
@staticmethod
|
|
def mergeItemsWithOverlap(
|
|
existing_items: List[str],
|
|
new_items: List[str],
|
|
iteration: int
|
|
) -> List[str]:
|
|
"""
|
|
Merge list items with sophisticated overlap detection.
|
|
Handles multiple overlapping items and partial overlaps.
|
|
"""
|
|
if not new_items:
|
|
return existing_items
|
|
if not existing_items:
|
|
return new_items
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates")
|
|
return existing_items + new_items[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last item
|
|
if len(existing_items) > 0 and len(new_items) > 0:
|
|
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0])
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging")
|
|
return existing_items[:-1] + [merged_item] + new_items[1:]
|
|
|
|
# Strategy 3: Simple first/last comparison (fallback)
|
|
if existing_items[-1] == new_items[0]:
|
|
logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)")
|
|
return existing_items + new_items[1:]
|
|
|
|
# No overlap detected - append all new items
|
|
return existing_items + new_items
|
|
|
|
@staticmethod
|
|
def mergeDeepStructures(
|
|
existing: Any,
|
|
new: Any,
|
|
iteration: int,
|
|
path: str = "root"
|
|
) -> Any:
|
|
"""
|
|
Recursively merge two JSON structures of arbitrary depth and complexity.
|
|
Handles overlaps at any nesting level.
|
|
|
|
Args:
|
|
existing: Existing structure to merge into
|
|
new: New structure to merge
|
|
iteration: Current iteration number for logging
|
|
path: Current path in structure (for debugging)
|
|
|
|
Returns:
|
|
Merged structure
|
|
"""
|
|
# Type check
|
|
if type(existing) != type(new):
|
|
# Types don't match - return new (replacement)
|
|
logger.debug(f"Iteration {iteration}: Types don't match at {path}, replacing")
|
|
return new
|
|
|
|
# Lists/arrays - merge with overlap detection
|
|
if isinstance(existing, list) and isinstance(new, list):
|
|
if not new:
|
|
return existing
|
|
if not existing:
|
|
return new
|
|
|
|
# Try to find overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates")
|
|
return existing + new[overlap_len:]
|
|
|
|
# Check for partial overlap in last element
|
|
if len(existing) > 0 and len(new) > 0:
|
|
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0])
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging")
|
|
return existing[:-1] + [merged_item] + new[1:]
|
|
|
|
# No overlap - append all
|
|
return existing + new
|
|
|
|
# Dicts/objects - merge recursively
|
|
if isinstance(existing, dict) and isinstance(new, dict):
|
|
merged = existing.copy()
|
|
for key, new_value in new.items():
|
|
if key in merged:
|
|
# Key exists - merge recursively
|
|
merged[key] = JsonResponseHandler.mergeDeepStructures(
|
|
merged[key],
|
|
new_value,
|
|
iteration,
|
|
f"{path}.{key}"
|
|
)
|
|
else:
|
|
# New key - add it
|
|
merged[key] = new_value
|
|
return merged
|
|
|
|
# Primitives - if equal, return existing; otherwise return new
|
|
if existing == new:
|
|
return existing
|
|
return new
|
|
|