3134 lines
139 KiB
Python
3134 lines
139 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
JSON Response Handling Module
|
|
|
|
Handles merging of JSON responses from multiple AI iterations, including:
|
|
- Section merging with intelligent overlap detection
|
|
- JSON fragment detection and merging
|
|
- Deep recursive structure merging
|
|
- Overlap detection for complex nested structures
|
|
- String accumulation for iterative JSON generation
|
|
"""
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
|
|
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
|
|
from modules.datamodels.datamodelAi import JsonAccumulationState
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JsonResponseHandler:
|
|
"""Handles JSON response merging and fragment detection for iterative AI generation."""
|
|
|
|
@staticmethod
|
|
def mergeSectionsIntelligently(
|
|
existingSections: List[Dict[str, Any]],
|
|
newSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Intelligently merge sections from multiple iterations.
|
|
|
|
This is a GENERIC merging strategy that handles broken JSON iterations.
|
|
The break can occur anywhere - in any section, at any depth.
|
|
|
|
Merging strategies (in order of priority):
|
|
1. Same Section ID: Merge sections with identical IDs
|
|
2. Same Content-Type + Position: If last section is incomplete and new section continues it
|
|
3. Same Order: Merge sections with same order value
|
|
4. Structural Analysis: Detect continuation based on content structure
|
|
|
|
Args:
|
|
existingSections: Sections accumulated from previous iterations
|
|
newSections: Sections extracted from current iteration
|
|
iteration: Current iteration number
|
|
|
|
Returns:
|
|
Merged list of sections
|
|
"""
|
|
if not newSections:
|
|
return existingSections
|
|
|
|
if not existingSections:
|
|
return newSections
|
|
|
|
mergedSections = existingSections.copy()
|
|
|
|
for newSection in newSections:
|
|
merged = False
|
|
|
|
# Strategy 1: Same Section ID - merge directly
|
|
newSectionId = newSection.get("id")
|
|
if newSectionId:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
if existingSection.get("id") == newSectionId:
|
|
# Merge sections with same ID
|
|
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
|
|
existingSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 2: Same Content-Type + Position (continuation detection)
|
|
# Check if last section is incomplete and new section continues it
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
if lastContentType == newContentType:
|
|
# Same content type - check if last section is incomplete
|
|
if JsonResponseHandler.isSectionIncomplete(lastSection):
|
|
# Last section is incomplete, merge with new section
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
|
|
continue
|
|
|
|
# Strategy 3: Same Order value
|
|
newOrder = newSection.get("order")
|
|
if newOrder is not None:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
existingOrder = existingSection.get("order")
|
|
if existingOrder is not None and existingOrder == newOrder:
|
|
# Merge sections with same order
|
|
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
|
|
existingSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 4: Structural Analysis - detect continuation
|
|
# For code_block and table: if last section matches new section type, merge them
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
# Both are code blocks - merge them
|
|
if lastContentType == "code_block" and newContentType == "code_block":
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
|
|
continue
|
|
|
|
# Both are tables - merge them (common case for broken JSON iterations)
|
|
if lastContentType == "table" and newContentType == "table":
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
|
|
continue
|
|
|
|
# No merge strategy matched - add as new section
|
|
if not merged:
|
|
mergedSections.append(newSection)
|
|
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
|
|
|
|
return mergedSections
|
|
|
|
@staticmethod
|
|
def isSectionIncomplete(section: Dict[str, Any]) -> bool:
|
|
"""
|
|
Check if a section is incomplete (broken at the end).
|
|
|
|
This detects incomplete sections based on content analysis:
|
|
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
|
|
- Text sections: ends mid-sentence, ends with incomplete structure
|
|
- Other types: check for incomplete elements
|
|
"""
|
|
contentType = section.get("content_type", "")
|
|
elements = section.get("elements", [])
|
|
|
|
if not elements:
|
|
return False
|
|
|
|
# Handle list of elements
|
|
if isinstance(elements, list) and len(elements) > 0:
|
|
lastElement = elements[-1]
|
|
else:
|
|
lastElement = elements
|
|
|
|
if not isinstance(lastElement, dict):
|
|
return False
|
|
|
|
# Check code_block for incomplete code
|
|
if contentType == "code_block":
|
|
code = lastElement.get("code", "")
|
|
if code:
|
|
# Check if code ends incompletely:
|
|
# - Ends with comma (incomplete CSV line)
|
|
# - Ends with number but no newline (incomplete line)
|
|
# - Ends mid-token (e.g., "23431,23" - incomplete number)
|
|
codeStripped = code.rstrip()
|
|
if codeStripped:
|
|
# Check for incomplete patterns
|
|
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
|
|
# Ends with comma or has comma but no final newline - likely incomplete
|
|
return True
|
|
# Check if last line is incomplete (doesn't end with newline and has partial content)
|
|
if not code.endswith('\n') and codeStripped:
|
|
# No final newline - might be incomplete
|
|
# More sophisticated: check if last number is complete
|
|
lastLine = codeStripped.split('\n')[-1]
|
|
if lastLine and ',' in lastLine:
|
|
# Has commas but might be incomplete
|
|
parts = lastLine.split(',')
|
|
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
|
|
return True
|
|
|
|
# Check table for incomplete rows
|
|
if contentType == "table":
|
|
rows = lastElement.get("rows", [])
|
|
if rows:
|
|
# Check if last row is incomplete (ends with incomplete data)
|
|
lastRow = rows[-1] if isinstance(rows, list) else []
|
|
if isinstance(lastRow, list) and lastRow:
|
|
# CRITICAL: Check if last row doesn't have expected number of columns (if headers exist)
|
|
# This is the PRIMARY indicator of incomplete table rows
|
|
headers = lastElement.get("headers", [])
|
|
if headers and isinstance(headers, list):
|
|
expectedCols = len(headers)
|
|
if len(lastRow) < expectedCols:
|
|
logger.debug(f"Table section incomplete: last row has {len(lastRow)} columns, expected {expectedCols}")
|
|
return True
|
|
# Also check if last row ends with incomplete data (e.g., incomplete string)
|
|
lastCell = lastRow[-1] if lastRow else ""
|
|
if isinstance(lastCell, str):
|
|
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
|
|
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
|
|
logger.debug(f"Table section incomplete: last cell appears incomplete: '{lastCell}'")
|
|
return True
|
|
# Additional check: if last row has fewer cells than previous rows, it's likely incomplete
|
|
if len(rows) > 1:
|
|
prevRow = rows[-2] if isinstance(rows, list) and len(rows) > 1 else []
|
|
if isinstance(prevRow, list) and len(prevRow) > len(lastRow):
|
|
logger.debug(f"Table section incomplete: last row has {len(lastRow)} cells, previous row has {len(prevRow)}")
|
|
return True
|
|
|
|
# Check paragraph/text for incomplete sentences
|
|
if contentType in ["paragraph", "heading"]:
|
|
text = lastElement.get("text", "")
|
|
if text:
|
|
# Simple heuristic: if doesn't end with sentence-ending punctuation
|
|
textStripped = text.rstrip()
|
|
if textStripped and not textStripped[-1] in '.!?':
|
|
# Might be incomplete, but this is less reliable
|
|
# Only mark as incomplete if very short (likely cut off)
|
|
if len(textStripped) < 20:
|
|
return True
|
|
|
|
# Check lists for incomplete items
|
|
if contentType in ["bullet_list", "numbered_list"]:
|
|
items = lastElement.get("items", [])
|
|
if items and isinstance(items, list):
|
|
# Check if last item is incomplete (very short or ends with incomplete string)
|
|
lastItem = items[-1] if items else None
|
|
if isinstance(lastItem, str) and len(lastItem) < 3:
|
|
return True
|
|
|
|
# Check image for incomplete base64 data
|
|
if contentType == "image":
|
|
imageData = lastElement.get("base64Data", "")
|
|
if imageData:
|
|
# Base64 strings should end with padding ('=' or '==')
|
|
# If it doesn't, it might be incomplete
|
|
stripped = imageData.rstrip()
|
|
if stripped and not stripped.endswith(('=', '==')):
|
|
# Check if it's a valid base64 character sequence that was cut off
|
|
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
|
|
return True
|
|
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
|
|
if len(stripped) % 4 != 0:
|
|
return True
|
|
|
|
# GENERIC CHECK: Recursively analyze structure for incompleteness
|
|
# This works for ANY structure: arrays, objects, nested, primitives
|
|
return JsonResponseHandler._isStructureIncomplete(lastElement)
|
|
|
|
@staticmethod
|
|
def _isStructureIncomplete(structure: Any, max_depth: int = 10) -> bool:
|
|
"""
|
|
GENERIC recursive check for incomplete structures.
|
|
|
|
Detects incompleteness by analyzing patterns:
|
|
- Arrays: Last item shorter than previous items, incomplete patterns
|
|
- Objects: Last object has fewer keys than pattern, incomplete values
|
|
- Strings: Very short, ends abruptly, incomplete patterns
|
|
- Nested: Recursively checks nested structures
|
|
|
|
Works for ANY JSON structure of any depth/complexity.
|
|
"""
|
|
if max_depth <= 0:
|
|
return False
|
|
|
|
# Arrays/Lists - check for incomplete patterns
|
|
if isinstance(structure, list):
|
|
if len(structure) == 0:
|
|
return False
|
|
|
|
# Check if last item is incomplete compared to previous items
|
|
last_item = structure[-1]
|
|
|
|
# If we have previous items, compare structure
|
|
if len(structure) > 1:
|
|
prev_item = structure[-2]
|
|
|
|
# If last item is a list and previous is a list, check length
|
|
if isinstance(last_item, list) and isinstance(prev_item, list):
|
|
if len(last_item) < len(prev_item):
|
|
return True # Last row/item has fewer elements - likely incomplete
|
|
|
|
# If last item is a dict and previous is a dict, check keys
|
|
if isinstance(last_item, dict) and isinstance(prev_item, dict):
|
|
if len(last_item) < len(prev_item):
|
|
return True # Last object has fewer keys - likely incomplete
|
|
|
|
# Recursively check last item for incompleteness
|
|
if JsonResponseHandler._isStructureIncomplete(last_item, max_depth - 1):
|
|
return True
|
|
|
|
# Objects/Dicts - check for incomplete values
|
|
elif isinstance(structure, dict):
|
|
for key, value in structure.items():
|
|
# Recursively check each value
|
|
if JsonResponseHandler._isStructureIncomplete(value, max_depth - 1):
|
|
return True
|
|
|
|
# Check for incomplete strings
|
|
if isinstance(value, str):
|
|
# Very short strings might be incomplete
|
|
if len(value) > 0 and len(value) < 3:
|
|
return True
|
|
# Strings ending with incomplete patterns (comma, quote, etc.)
|
|
stripped = value.rstrip()
|
|
if stripped and stripped.endswith((',', '"', '\\')):
|
|
return True
|
|
|
|
# Strings - check for incomplete patterns
|
|
elif isinstance(structure, str):
|
|
# Very short strings might be incomplete
|
|
if len(structure) > 0 and len(structure) < 3:
|
|
return True
|
|
# Strings ending with incomplete patterns
|
|
stripped = structure.rstrip()
|
|
if stripped and stripped.endswith((',', '"', '\\')):
|
|
return True
|
|
|
|
return False
|
|
|
|
@staticmethod
|
|
def mergeSectionContent(
|
|
existingSection: Dict[str, Any],
|
|
newSection: Dict[str, Any],
|
|
iteration: int
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Merge content from two sections.
|
|
|
|
Handles different content types:
|
|
- code_block: Append code, handle overlaps, merge incomplete lines
|
|
- paragraph/heading: Append text
|
|
- table: Merge rows
|
|
- list: Merge items
|
|
- Other: Merge elements
|
|
"""
|
|
contentType = existingSection.get("content_type", "")
|
|
existingElements = existingSection.get("elements", [])
|
|
newElements = newSection.get("elements", [])
|
|
|
|
if not newElements:
|
|
return existingSection
|
|
|
|
# Handle list of elements
|
|
if isinstance(existingElements, list):
|
|
existingElem = existingElements[-1] if existingElements else {}
|
|
else:
|
|
existingElem = existingElements
|
|
|
|
if isinstance(newElements, list):
|
|
newElem = newElements[0] if newElements else {}
|
|
else:
|
|
newElem = newElements
|
|
|
|
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
|
|
return existingSection
|
|
|
|
# Merge based on content type
|
|
if contentType == "code_block":
|
|
existingCode = existingElem.get("code", "")
|
|
newCode = newElem.get("code", "")
|
|
|
|
if existingCode and newCode:
|
|
mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration)
|
|
existingElem["code"] = mergedCode
|
|
# Preserve language from existing or new
|
|
if "language" not in existingElem and "language" in newElem:
|
|
existingElem["language"] = newElem["language"]
|
|
|
|
elif contentType in ["paragraph", "heading"]:
|
|
existingText = existingElem.get("text", "")
|
|
newText = newElem.get("text", "")
|
|
|
|
if existingText and newText:
|
|
# Append text with space if needed
|
|
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
|
|
mergedText = existingText.rstrip() + " " + newText.lstrip()
|
|
else:
|
|
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
|
|
existingElem["text"] = mergedText
|
|
|
|
elif contentType == "table":
|
|
# Merge table rows with sophisticated overlap detection
|
|
# CRITICAL: Tables can have rows in two places:
|
|
# 1. Direct: existingElem["rows"] (legacy format)
|
|
# 2. Nested: existingElem["content"]["rows"] (current format)
|
|
existingRows = None
|
|
newRows = None
|
|
|
|
# Check nested structure first (current format)
|
|
if "content" in existingElem and isinstance(existingElem["content"], dict):
|
|
existingRows = existingElem["content"].get("rows", [])
|
|
# Fallback to direct structure (legacy format)
|
|
if not existingRows:
|
|
existingRows = existingElem.get("rows", [])
|
|
|
|
# Check nested structure first (current format)
|
|
if "content" in newElem and isinstance(newElem["content"], dict):
|
|
newRows = newElem["content"].get("rows", [])
|
|
# Fallback to direct structure (legacy format)
|
|
if not newRows:
|
|
newRows = newElem.get("rows", [])
|
|
|
|
if existingRows and newRows:
|
|
# Use sophisticated overlap detection that handles multiple overlapping rows
|
|
mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration)
|
|
# Store in nested structure (current format)
|
|
if "content" not in existingElem:
|
|
existingElem["content"] = {}
|
|
existingElem["content"]["rows"] = mergedRows
|
|
# Also set type if missing
|
|
if "type" not in existingElem:
|
|
existingElem["type"] = "table"
|
|
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}")
|
|
elif newRows:
|
|
# If existing has no rows but new does, use new rows
|
|
if "content" not in existingElem:
|
|
existingElem["content"] = {}
|
|
existingElem["content"]["rows"] = newRows
|
|
if "type" not in existingElem:
|
|
existingElem["type"] = "table"
|
|
# Preserve headers from existing (or use new if existing has none)
|
|
# Headers can be in content.headers or directly in element
|
|
existingHeaders = existingElem.get("content", {}).get("headers", []) if "content" in existingElem else existingElem.get("headers", [])
|
|
newHeaders = newElem.get("content", {}).get("headers", []) if "content" in newElem else newElem.get("headers", [])
|
|
if not existingHeaders and newHeaders:
|
|
if "content" not in existingElem:
|
|
existingElem["content"] = {}
|
|
existingElem["content"]["headers"] = newHeaders
|
|
# Preserve caption from existing (or use new if existing has none)
|
|
existingCaption = existingElem.get("content", {}).get("caption") if "content" in existingElem else existingElem.get("caption")
|
|
newCaption = newElem.get("content", {}).get("caption") if "content" in newElem else newElem.get("caption")
|
|
if not existingCaption and newCaption:
|
|
if "content" not in existingElem:
|
|
existingElem["content"] = {}
|
|
existingElem["content"]["caption"] = newCaption
|
|
|
|
elif contentType in ["bullet_list", "numbered_list"]:
|
|
# Merge list items with sophisticated overlap detection
|
|
existingItems = existingElem.get("items", [])
|
|
newItems = newElem.get("items", [])
|
|
if existingItems and newItems:
|
|
mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration)
|
|
existingElem["items"] = mergedItems
|
|
elif newItems:
|
|
existingElem["items"] = newItems
|
|
|
|
elif contentType == "image":
|
|
# Images are typically complete - if new image is provided, replace existing
|
|
# But check if existing image data is incomplete (e.g., base64 string cut off)
|
|
existingImageData = existingElem.get("base64Data", "")
|
|
newImageData = newElem.get("base64Data", "")
|
|
if existingImageData and newImageData:
|
|
# If existing image data doesn't end with valid base64 padding, it might be incomplete
|
|
# Base64 padding is '=' or '==' at the end
|
|
if not existingImageData.rstrip().endswith(('=', '==')):
|
|
# Existing image might be incomplete - merge by appending new data
|
|
# This handles cases where base64 string was cut off
|
|
existingElem["base64Data"] = existingImageData + newImageData
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
|
|
else:
|
|
# Existing image is complete - replace with new (or keep existing if new is empty)
|
|
if newImageData:
|
|
existingElem["base64Data"] = newImageData
|
|
elif newImageData:
|
|
existingElem["base64Data"] = newImageData
|
|
# Preserve other image metadata
|
|
if not existingElem.get("altText") and newElem.get("altText"):
|
|
existingElem["altText"] = newElem["altText"]
|
|
if not existingElem.get("caption") and newElem.get("caption"):
|
|
existingElem["caption"] = newElem["caption"]
|
|
|
|
else:
|
|
# GENERIC FALLBACK: Use deep recursive merging for complex nested structures
|
|
# This handles any content type with arbitrary depth and complexity
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
existingElem,
|
|
newElem,
|
|
iteration,
|
|
f"section.{contentType}"
|
|
)
|
|
existingElem = merged_element
|
|
|
|
# Update section with merged content
|
|
mergedSection = existingSection.copy()
|
|
if isinstance(existingElements, list):
|
|
# Update the last element in the list with merged content
|
|
if existingElements:
|
|
existingElements[-1] = existingElem
|
|
mergedSection["elements"] = existingElements
|
|
else:
|
|
mergedSection["elements"] = existingElem
|
|
|
|
# Preserve metadata from new section if missing in existing
|
|
if "order" not in mergedSection and "order" in newSection:
|
|
mergedSection["order"] = newSection["order"]
|
|
|
|
return mergedSection
|
|
|
|
@staticmethod
|
|
def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str:
|
|
"""
|
|
Merge two code blocks intelligently, handling overlaps and incomplete lines.
|
|
"""
|
|
if not existingCode:
|
|
return newCode
|
|
if not newCode:
|
|
return existingCode
|
|
|
|
existingLines = existingCode.rstrip().split('\n')
|
|
newLines = newCode.strip().split('\n')
|
|
|
|
if not existingLines or not newLines:
|
|
return existingCode + "\n" + newCode
|
|
|
|
lastExistingLine = existingLines[-1].strip()
|
|
firstNewLine = newLines[0].strip()
|
|
|
|
# Strategy 1: Exact overlap - remove duplicate line
|
|
if lastExistingLine == firstNewLine:
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
|
|
|
|
# Strategy 2: Incomplete line merge
|
|
# If last existing line ends with comma or is incomplete, merge with first new line
|
|
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
|
|
# Last line is incomplete - merge with first new line
|
|
# Remove trailing comma from existing line
|
|
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
|
|
existingLines[-1] = mergedLine
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
|
|
|
|
# Strategy 3: Partial overlap detection
|
|
# Check if first new line starts with the end of last existing line
|
|
elif ',' in lastExistingLine and ',' in firstNewLine:
|
|
lastExistingParts = lastExistingLine.split(',')
|
|
firstNewParts = firstNewLine.split(',')
|
|
|
|
# Check for overlap: if last part of existing matches first part of new
|
|
if lastExistingParts and firstNewParts:
|
|
lastExistingPart = lastExistingParts[-1].strip()
|
|
firstNewPart = firstNewParts[0].strip()
|
|
|
|
# If they match, there's overlap
|
|
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
|
|
# Remove overlapping part from new line
|
|
newLines[0] = ','.join(firstNewParts[1:])
|
|
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
|
|
|
|
# Reconstruct merged code
|
|
mergedCode = '\n'.join(existingLines)
|
|
if newLines:
|
|
if mergedCode and not mergedCode.endswith('\n'):
|
|
mergedCode += '\n'
|
|
mergedCode += '\n'.join(newLines)
|
|
|
|
return mergedCode
|
|
|
|
@staticmethod
|
|
def detectAndParseJsonFragment(
|
|
result: str,
|
|
allSections: List[Dict[str, Any]]
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
GENERIC fragment detection for ANY JSON structure.
|
|
|
|
Detects if response is a JSON fragment (continuation content) rather than full document structure.
|
|
Works for ANY JSON type: arrays, objects, primitives, nested structures of any depth/complexity.
|
|
|
|
Fragment = Any JSON that:
|
|
1. Does NOT have "documents" or "sections" keys (not full document structure)
|
|
2. Can be ANY structure: array, object, nested, primitive, etc.
|
|
3. Is continuation content that needs to be merged into existing sections
|
|
|
|
Examples (all handled generically):
|
|
- Array: [["37643", ...], ...] (table rows, list items, any array)
|
|
- Object: {"rows": [...], "headers": [...]} (partial element)
|
|
- Primitive: "continuation text" (rare but possible)
|
|
- Nested: {"data": {"items": [...]}} (any nested structure)
|
|
|
|
Returns fragment info dict with:
|
|
- fragment_data: The parsed fragment content (ANY type)
|
|
- target_section_id: ID of last incomplete section (generic, not type-specific)
|
|
|
|
CRITICAL: Fully generic - no specific logic for tables, paragraphs, etc.
|
|
"""
|
|
try:
|
|
extracted = extractJsonString(result)
|
|
parsed = json.loads(extracted)
|
|
|
|
# GENERIC fragment detection: Check if it's NOT a full document structure
|
|
is_full_document = False
|
|
if isinstance(parsed, dict):
|
|
# Full document structure has "documents" or "sections" keys
|
|
if "documents" in parsed or "sections" in parsed:
|
|
is_full_document = True
|
|
|
|
# If it's a full document structure, it's not a fragment
|
|
if is_full_document:
|
|
return None
|
|
|
|
# Otherwise, it's a fragment (can be ANY structure: array, object, primitive, nested)
|
|
# Find target: last incomplete section (generic, regardless of content type)
|
|
target_section_id = JsonResponseHandler.findLastIncompleteSectionId(allSections)
|
|
|
|
logger.info(f"Detected GENERIC JSON fragment (type: {type(parsed).__name__}), target: {target_section_id}")
|
|
|
|
return {
|
|
"fragment_data": parsed, # Can be ANY JSON structure
|
|
"target_section_id": target_section_id
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error detecting JSON fragment: {e}")
|
|
logger.debug(f"Fragment detection failed for result: {result[:500]}...")
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def findLastIncompleteSectionId(
|
|
allSections: List[Dict[str, Any]]
|
|
) -> Optional[str]:
|
|
"""
|
|
GENERIC: Find the last incomplete section (regardless of content type).
|
|
|
|
This is fully generic - works for ANY content type, ANY structure.
|
|
Returns the ID of the last section that is incomplete, or None if all are complete.
|
|
"""
|
|
# Find the last incomplete section (generic, not type-specific)
|
|
for section in reversed(allSections):
|
|
if JsonResponseHandler.isSectionIncomplete(section):
|
|
return section.get("id")
|
|
# If no incomplete section found, return last section as fallback
|
|
if allSections:
|
|
return allSections[-1].get("id")
|
|
return None
|
|
|
|
@staticmethod
|
|
def mergeFragmentIntoSection(
|
|
fragment: Dict[str, Any],
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> Optional[List[Dict[str, Any]]]:
|
|
"""
|
|
GENERIC fragment merging for ANY JSON structure.
|
|
|
|
Merges a JSON fragment (ANY structure: array, object, nested, primitive) into the last incomplete section.
|
|
Uses ONLY deep recursive merging - no specific logic for content types.
|
|
|
|
Handles ALL cases:
|
|
1. Fragments with overlap (detected and merged intelligently)
|
|
2. Fragments without overlap (continuation after cut-off, appended)
|
|
3. Any JSON structure (arrays, objects, nested, primitives)
|
|
4. Accumulative merging (uses merged data from past iterations)
|
|
|
|
CRITICAL: Fully generic - works for ANY JSON structure, ANY content type.
|
|
NO FALLBACKS: Returns None if merge fails (no target section found).
|
|
"""
|
|
fragment_data = fragment.get("fragment_data")
|
|
target_section_id = fragment.get("target_section_id")
|
|
|
|
if fragment_data is None:
|
|
logger.error(f"Iteration {iteration}: ❌ Fragment has no fragment_data - merge FAILED")
|
|
return None
|
|
|
|
# Find the target section (last incomplete section, generic)
|
|
target_section = None
|
|
target_index = -1
|
|
|
|
if target_section_id:
|
|
for i, section in enumerate(allSections):
|
|
if section.get("id") == target_section_id:
|
|
target_section = section
|
|
target_index = i
|
|
break
|
|
|
|
# NO FALLBACKS: If target not found by ID, try to find incomplete section
|
|
if not target_section:
|
|
for i, section in enumerate(reversed(allSections)):
|
|
if JsonResponseHandler.isSectionIncomplete(section):
|
|
target_section = section
|
|
target_index = len(allSections) - 1 - i
|
|
break
|
|
|
|
# NO FALLBACKS: If no target found, merge FAILS
|
|
if not target_section:
|
|
logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - No target section found for fragment!")
|
|
logger.error(f"Iteration {iteration}: Available sections: {[s.get('id') + ' (' + s.get('content_type', 'unknown') + ')' for s in allSections]}")
|
|
return None
|
|
|
|
# Get the last element from target section (where fragment will be merged)
|
|
merged_section = target_section.copy()
|
|
elements = merged_section.get("elements", [])
|
|
|
|
if not isinstance(elements, list):
|
|
elements = [elements] if elements else []
|
|
|
|
if not elements:
|
|
elements = [{}]
|
|
|
|
last_element = elements[-1] if elements else {}
|
|
if not isinstance(last_element, dict):
|
|
last_element = {}
|
|
elements.append(last_element)
|
|
|
|
# CRITICAL: GENERIC fragment merging for ALL structure types
|
|
# Automatically detects the structure type and merges accordingly
|
|
# Works for: tables, lists, code blocks, paragraphs, images, and any nested structures
|
|
merged_element = JsonResponseHandler._mergeFragmentIntoElement(
|
|
last_element,
|
|
fragment_data,
|
|
target_section,
|
|
iteration,
|
|
f"section.{target_section_id}.fragment"
|
|
)
|
|
|
|
# Update elements with merged content
|
|
elements[-1] = merged_element
|
|
merged_section["elements"] = elements
|
|
|
|
# Update allSections (this ensures accumulative merging - merged data is used for next iteration)
|
|
merged_sections = allSections.copy()
|
|
merged_sections[target_index] = merged_section
|
|
|
|
logger.info(f"Iteration {iteration}: ✅ Merged GENERIC fragment (type: {type(fragment_data).__name__}) into section '{target_section_id}'")
|
|
|
|
# Log merged JSON for debugging
|
|
try:
|
|
from modules.shared.debugLogger import writeDebugFile
|
|
merged_json_str = json.dumps(merged_sections, indent=2, ensure_ascii=False)
|
|
writeDebugFile(merged_json_str, f"merged_json_iteration_{iteration}.json")
|
|
except Exception as e:
|
|
logger.debug(f"Iteration {iteration}: Failed to write merged JSON debug file: {e}")
|
|
|
|
return merged_sections
|
|
|
|
@staticmethod
|
|
def completeIncompleteStructures(allSections: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Complete any incomplete structures in sections by ensuring proper JSON structure.
|
|
|
|
This ensures JSON is properly closed even if merge failed or iterations stopped early.
|
|
Works generically for ANY structure type - recursively processes all nested structures.
|
|
|
|
Returns sections with completed structures.
|
|
"""
|
|
completed_sections = []
|
|
for section in allSections:
|
|
completed_section = JsonResponseHandler._completeStructure(section)
|
|
completed_sections.append(completed_section)
|
|
return completed_sections
|
|
|
|
@staticmethod
|
|
def _completeStructure(structure: Any) -> Any:
|
|
"""
|
|
Recursively complete incomplete structures by ensuring arrays/objects are properly structured.
|
|
Works generically for ANY JSON structure - no specific logic for content types.
|
|
"""
|
|
if isinstance(structure, dict):
|
|
completed = {}
|
|
for key, value in structure.items():
|
|
completed[key] = JsonResponseHandler._completeStructure(value)
|
|
return completed
|
|
elif isinstance(structure, list):
|
|
completed = []
|
|
for item in structure:
|
|
completed.append(JsonResponseHandler._completeStructure(item))
|
|
return completed
|
|
else:
|
|
# Primitive value - return as is
|
|
return structure
|
|
|
|
@staticmethod
|
|
def getContentTypeForFragment(fragment_type: str) -> str:
|
|
"""Map fragment type to content type."""
|
|
mapping = {
|
|
"table_rows": "table",
|
|
"table_element": "table",
|
|
"code_lines": "code_block",
|
|
"code_element": "code_block",
|
|
"list_items": "bullet_list"
|
|
}
|
|
return mapping.get(fragment_type, "paragraph")
|
|
|
|
@staticmethod
|
|
def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool:
|
|
"""
|
|
Deep recursive comparison of two JSON-serializable objects.
|
|
Handles nested structures of any depth and complexity.
|
|
|
|
Args:
|
|
obj1: First object to compare
|
|
obj2: Second object to compare
|
|
max_depth: Maximum recursion depth to prevent infinite loops
|
|
|
|
Returns:
|
|
True if objects are deeply equal, False otherwise
|
|
"""
|
|
if max_depth <= 0:
|
|
return False
|
|
|
|
# Type check
|
|
if type(obj1) != type(obj2):
|
|
return False
|
|
|
|
# Primitive types
|
|
if isinstance(obj1, (str, int, float, bool, type(None))):
|
|
return obj1 == obj2
|
|
|
|
# Lists/arrays - compare element by element
|
|
if isinstance(obj1, list):
|
|
if len(obj1) != len(obj2):
|
|
return False
|
|
return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1)
|
|
for item1, item2 in zip(obj1, obj2))
|
|
|
|
# Dicts/objects - compare key by key
|
|
if isinstance(obj1, dict):
|
|
if set(obj1.keys()) != set(obj2.keys()):
|
|
return False
|
|
return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1)
|
|
for key in obj1.keys())
|
|
|
|
# Fallback for other types
|
|
return obj1 == obj2
|
|
|
|
@staticmethod
|
|
def findLongestCommonSuffix(
|
|
existing_list: List[Any],
|
|
new_list: List[Any],
|
|
min_overlap: int = 1
|
|
) -> int:
|
|
"""
|
|
Find the longest common suffix of existing_list that matches a prefix of new_list.
|
|
|
|
This handles cases where multiple elements overlap:
|
|
- existing: [A, B, C, D]
|
|
- new: [C, D, E, F]
|
|
- overlap: [C, D] (length 2)
|
|
|
|
Returns the length of the overlap (0 if no overlap found).
|
|
"""
|
|
if not existing_list or not new_list:
|
|
return 0
|
|
|
|
max_overlap = min(len(existing_list), len(new_list))
|
|
|
|
# Try all possible overlap lengths (from longest to shortest)
|
|
for overlap_len in range(max_overlap, min_overlap - 1, -1):
|
|
existing_suffix = existing_list[-overlap_len:]
|
|
new_prefix = new_list[:overlap_len]
|
|
|
|
# Deep compare suffix and prefix
|
|
if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i])
|
|
for i in range(overlap_len)):
|
|
return overlap_len
|
|
|
|
return 0
|
|
|
|
@staticmethod
|
|
def findPartialOverlap(
|
|
existing_item: Any,
|
|
new_item: Any
|
|
) -> Tuple[bool, Optional[Any]]:
|
|
"""
|
|
Detect if new_item completes an incomplete existing_item.
|
|
|
|
Handles cases like:
|
|
- existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"]
|
|
- new: ["37643", "37649", ...]
|
|
|
|
Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None).
|
|
"""
|
|
# Check if both are lists
|
|
if isinstance(existing_item, list) and isinstance(new_item, list):
|
|
if not existing_item or not new_item:
|
|
return False, None
|
|
|
|
# Check if last element of existing is incomplete and matches first of new
|
|
last_existing = existing_item[-1]
|
|
first_new = new_item[0]
|
|
|
|
# If last existing is a string and first new is a string
|
|
if isinstance(last_existing, str) and isinstance(first_new, str):
|
|
# Check if last existing is incomplete (very short, ends with number, etc.)
|
|
if len(last_existing) < 10 and first_new.startswith(last_existing):
|
|
# Partial overlap - merge them
|
|
merged_last = last_existing + first_new[len(last_existing):]
|
|
merged_item = existing_item[:-1] + [merged_last] + new_item[1:]
|
|
return True, merged_item
|
|
|
|
# Check if last existing is incomplete list and first new completes it
|
|
if isinstance(last_existing, list) and isinstance(first_new, list):
|
|
if len(last_existing) < len(first_new):
|
|
# Check if last existing is prefix of first new
|
|
if first_new[:len(last_existing)] == last_existing:
|
|
# Merge: replace incomplete last with complete first
|
|
merged_item = existing_item[:-1] + [first_new] + new_item[1:]
|
|
return True, merged_item
|
|
|
|
# Check if existing is incomplete string and new completes it
|
|
if isinstance(existing_item, str) and isinstance(new_item, str):
|
|
if len(existing_item) < 50 and new_item.startswith(existing_item):
|
|
# Partial overlap
|
|
merged = existing_item + new_item[len(existing_item):]
|
|
return True, merged
|
|
|
|
return False, None
|
|
|
|
@staticmethod
|
|
def mergeRowsWithOverlap(
|
|
existing_rows: List[List[str]],
|
|
new_rows: List[List[str]],
|
|
iteration: int
|
|
) -> List[List[str]]:
|
|
"""
|
|
Merge table rows with sophisticated overlap detection.
|
|
Handles multiple overlapping rows and partial overlaps.
|
|
"""
|
|
if not new_rows:
|
|
return existing_rows
|
|
if not existing_rows:
|
|
return new_rows
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates")
|
|
return existing_rows + new_rows[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last row
|
|
if len(existing_rows) > 0 and len(new_rows) > 0:
|
|
last_existing = existing_rows[-1]
|
|
first_new = new_rows[0]
|
|
|
|
is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new)
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging")
|
|
return existing_rows[:-1] + [merged_row] + new_rows[1:]
|
|
|
|
# Strategy 3: Simple first/last comparison (fallback)
|
|
if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list):
|
|
if list(existing_rows[-1]) == list(new_rows[0]):
|
|
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
|
|
return existing_rows + new_rows[1:]
|
|
|
|
# No overlap detected - append all new rows
|
|
return existing_rows + new_rows
|
|
|
|
@staticmethod
|
|
def mergeItemsWithOverlap(
|
|
existing_items: List[str],
|
|
new_items: List[str],
|
|
iteration: int
|
|
) -> List[str]:
|
|
"""
|
|
Merge list items with sophisticated overlap detection.
|
|
Handles multiple overlapping items and partial overlaps.
|
|
"""
|
|
if not new_items:
|
|
return existing_items
|
|
if not existing_items:
|
|
return new_items
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates")
|
|
return existing_items + new_items[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last item
|
|
if len(existing_items) > 0 and len(new_items) > 0:
|
|
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0])
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging")
|
|
return existing_items[:-1] + [merged_item] + new_items[1:]
|
|
|
|
# Strategy 3: Simple first/last comparison (fallback)
|
|
if existing_items[-1] == new_items[0]:
|
|
logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)")
|
|
return existing_items + new_items[1:]
|
|
|
|
# No overlap detected - append all new items
|
|
return existing_items + new_items
|
|
|
|
@staticmethod
|
|
def mergeDeepStructures(
|
|
existing: Any,
|
|
new: Any,
|
|
iteration: int,
|
|
path: str = "root"
|
|
) -> Any:
|
|
"""
|
|
FULLY GENERIC recursive merge for ANY JSON structure of arbitrary depth/complexity.
|
|
|
|
Handles ALL cases generically:
|
|
1. Arrays/Lists: Overlap detection (suffix/prefix), partial overlap, no overlap (continuation)
|
|
2. Objects/Dicts: Key-by-key merge with overlap detection for nested structures
|
|
3. Primitives: Equality check, replacement if different
|
|
4. Nested structures: Recursively handles any depth/complexity
|
|
|
|
Overlap detection strategies (all generic):
|
|
- Array overlap: Finds longest common suffix/prefix, handles partial overlaps
|
|
- Object overlap: Detected recursively through key matching and deep comparison
|
|
- No overlap: Appends/merges continuation content after cut-off point
|
|
|
|
CRITICAL: Fully generic - no specific logic for content types.
|
|
Works for ANY JSON structure: arrays, objects, nested, primitives, any combination.
|
|
"""
|
|
# Type check
|
|
if type(existing) != type(new):
|
|
# Types don't match - return new (replacement)
|
|
logger.debug(f"Iteration {iteration}: Types don't match at {path} ({type(existing).__name__} vs {type(new).__name__}), replacing")
|
|
return new
|
|
|
|
# Lists/arrays - GENERIC merge with overlap detection
|
|
if isinstance(existing, list) and isinstance(new, list):
|
|
if not new:
|
|
return existing
|
|
if not existing:
|
|
return new
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap (handles multiple overlapping elements)
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates")
|
|
return existing + new[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last element (incomplete element completion)
|
|
if len(existing) > 0 and len(new) > 0:
|
|
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0])
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging incomplete element")
|
|
return existing[:-1] + [merged_item] + new[1:]
|
|
|
|
# Strategy 3: No overlap detected - continuation after cut-off point
|
|
# This handles the case where new data starts exactly after the cut-off
|
|
logger.debug(f"Iteration {iteration}: No overlap at {path}, appending continuation content ({len(new)} items)")
|
|
return existing + new
|
|
|
|
# Dicts/objects - GENERIC merge with recursive overlap detection
|
|
if isinstance(existing, dict) and isinstance(new, dict):
|
|
merged = existing.copy()
|
|
|
|
# Check for object-level overlap: if new object is subset/superset of existing
|
|
# This handles cases where same object structure appears in both
|
|
existing_keys = set(existing.keys())
|
|
new_keys = set(new.keys())
|
|
|
|
# If new is subset of existing and values match, it's overlap (skip)
|
|
if new_keys.issubset(existing_keys):
|
|
all_match = True
|
|
for key in new_keys:
|
|
if not JsonResponseHandler.deepCompare(existing[key], new[key]):
|
|
all_match = False
|
|
break
|
|
if all_match:
|
|
logger.debug(f"Iteration {iteration}: Object at {path} is subset overlap, skipping")
|
|
return existing
|
|
|
|
# Merge key-by-key with recursive overlap detection
|
|
for key, new_value in new.items():
|
|
if key in merged:
|
|
# Key exists - merge recursively (handles nested overlap detection)
|
|
merged[key] = JsonResponseHandler.mergeDeepStructures(
|
|
merged[key],
|
|
new_value,
|
|
iteration,
|
|
f"{path}.{key}"
|
|
)
|
|
else:
|
|
# New key - add it (continuation content)
|
|
merged[key] = new_value
|
|
logger.debug(f"Iteration {iteration}: Added new key '{key}' at {path} (continuation)")
|
|
|
|
return merged
|
|
|
|
# Primitives - equality check
|
|
if existing == new:
|
|
return existing
|
|
# Different primitive values - return new (continuation/replacement)
|
|
logger.debug(f"Iteration {iteration}: Primitive at {path} differs, using new value")
|
|
return new
|
|
|
|
@staticmethod
|
|
def _mergeFragmentIntoElement(
|
|
last_element: Dict[str, Any],
|
|
fragment_data: Any,
|
|
target_section: Dict[str, Any],
|
|
iteration: int,
|
|
path: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
GENERIC fragment merging for ALL structure types.
|
|
|
|
Automatically detects the structure type and merges fragments accordingly.
|
|
Works for: tables, lists, code blocks, paragraphs, images, and any nested structures.
|
|
|
|
Strategy:
|
|
1. Analyze last_element structure to determine content location (content.rows, content.items, etc.)
|
|
2. Detect fragment type (array, object, primitive)
|
|
3. Merge fragment into appropriate location using mergeDeepStructures
|
|
|
|
Args:
|
|
last_element: The existing element to merge into
|
|
fragment_data: The fragment data to merge (can be any JSON structure)
|
|
target_section: The target section (for content_type detection)
|
|
iteration: Current iteration number
|
|
path: Path for logging
|
|
|
|
Returns:
|
|
Merged element
|
|
"""
|
|
contentType = target_section.get("content_type", "")
|
|
elementType = last_element.get("type", "")
|
|
|
|
# Determine the content structure path based on element type and content type
|
|
# This handles both nested (content.rows) and flat (rows) structures
|
|
contentPath = None
|
|
fragmentIsArray = isinstance(fragment_data, list) and len(fragment_data) > 0
|
|
|
|
# Detect structure type and determine merge path
|
|
if contentType == "table" or elementType == "table":
|
|
# Tables: merge into content.rows or rows
|
|
if "content" in last_element and isinstance(last_element["content"], dict):
|
|
contentPath = "content.rows"
|
|
else:
|
|
contentPath = "rows"
|
|
elif contentType in ["bullet_list", "numbered_list", "list"] or elementType in ["bullet_list", "numbered_list", "list"]:
|
|
# Lists: merge into content.items or items
|
|
if "content" in last_element and isinstance(last_element["content"], dict):
|
|
contentPath = "content.items"
|
|
else:
|
|
contentPath = "items"
|
|
elif contentType == "code_block" or elementType == "code_block":
|
|
# Code blocks: merge into content.code or code
|
|
if "content" in last_element and isinstance(last_element["content"], dict):
|
|
contentPath = "content.code"
|
|
else:
|
|
contentPath = "code"
|
|
elif contentType in ["paragraph", "heading"] or elementType in ["paragraph", "heading"]:
|
|
# Text: merge into content.text or text
|
|
if "content" in last_element and isinstance(last_element["content"], dict):
|
|
contentPath = "content.text"
|
|
else:
|
|
contentPath = "text"
|
|
elif contentType == "image" or elementType == "image":
|
|
# Images: merge into base64Data
|
|
contentPath = "base64Data"
|
|
|
|
# If we have a specific content path, merge into that location
|
|
if contentPath:
|
|
# Split path (e.g., "content.rows" -> ["content", "rows"])
|
|
pathParts = contentPath.split(".")
|
|
|
|
# Ensure nested structure exists
|
|
current = last_element
|
|
for i, part in enumerate(pathParts[:-1]):
|
|
if part not in current:
|
|
current[part] = {}
|
|
elif not isinstance(current[part], dict):
|
|
current[part] = {}
|
|
current = current[part]
|
|
|
|
# Get existing content at target path
|
|
targetKey = pathParts[-1]
|
|
existingContent = current.get(targetKey, [])
|
|
|
|
# Merge fragment into existing content
|
|
# CRITICAL: Handle both array fragments and object fragments generically
|
|
if fragmentIsArray:
|
|
# Fragment is an array - merge arrays
|
|
if isinstance(existingContent, list):
|
|
# Check if fragment is array of arrays (e.g., table rows) or array of primitives
|
|
if len(fragment_data) > 0 and isinstance(fragment_data[0], list):
|
|
# Array of arrays - use rows merge for tables, generic merge for others
|
|
if contentPath.endswith(".rows"):
|
|
mergedContent = JsonResponseHandler.mergeRowsWithOverlap(existingContent, fragment_data, iteration)
|
|
else:
|
|
# Generic array-of-arrays merge
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent,
|
|
fragment_data,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
else:
|
|
# Array of primitives - use items merge for lists, generic merge for others
|
|
if contentPath.endswith(".items"):
|
|
mergedContent = JsonResponseHandler.mergeItemsWithOverlap(existingContent, fragment_data, iteration)
|
|
else:
|
|
# Generic array merge using mergeDeepStructures
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent,
|
|
fragment_data,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
else:
|
|
# Existing content is not a list - replace with fragment
|
|
mergedContent = fragment_data
|
|
elif isinstance(fragment_data, dict):
|
|
# Fragment is an object - check if it contains nested content (e.g., {"content": {"rows": [...]}})
|
|
# If fragment has same structure as target, merge nested content
|
|
if "content" in fragment_data and isinstance(fragment_data["content"], dict):
|
|
fragmentNested = fragment_data["content"]
|
|
# Check if fragment has the same key as our target (e.g., fragment.content.rows)
|
|
if targetKey in fragmentNested:
|
|
# Fragment has nested content matching our target - merge that content
|
|
fragmentNestedContent = fragmentNested[targetKey]
|
|
if isinstance(existingContent, list) and isinstance(fragmentNestedContent, list):
|
|
# Both are lists - merge them
|
|
if contentPath.endswith(".rows"):
|
|
mergedContent = JsonResponseHandler.mergeRowsWithOverlap(existingContent, fragmentNestedContent, iteration)
|
|
elif contentPath.endswith(".items"):
|
|
mergedContent = JsonResponseHandler.mergeItemsWithOverlap(existingContent, fragmentNestedContent, iteration)
|
|
else:
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent,
|
|
fragmentNestedContent,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
else:
|
|
# Use deep merge for nested content
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent if existingContent else {},
|
|
fragmentNestedContent,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
else:
|
|
# Fragment has different structure - merge entire fragment object
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent if existingContent else {},
|
|
fragment_data,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
else:
|
|
# Fragment is a simple object - use deep merge
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent if existingContent else {},
|
|
fragment_data,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
else:
|
|
# Fragment is a primitive or unknown type - use deep merge
|
|
mergedContent = JsonResponseHandler.mergeDeepStructures(
|
|
existingContent if existingContent else {},
|
|
fragment_data,
|
|
iteration,
|
|
f"{path}.{targetKey}"
|
|
)
|
|
|
|
# Update the merged content
|
|
current[targetKey] = mergedContent
|
|
|
|
# Ensure type is set
|
|
if elementType and "type" not in last_element:
|
|
last_element["type"] = elementType
|
|
elif contentType and "type" not in last_element:
|
|
last_element["type"] = contentType
|
|
|
|
logger.info(f"Iteration {iteration}: ✅ Merged fragment into {contentPath} for section '{target_section.get('id')}'")
|
|
return last_element
|
|
|
|
# No specific content path - use generic deep merge
|
|
# This handles any structure type generically
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
last_element,
|
|
fragment_data,
|
|
iteration,
|
|
path
|
|
)
|
|
|
|
logger.info(f"Iteration {iteration}: ✅ Merged GENERIC fragment (type: {type(fragment_data).__name__}) into section '{target_section.get('id')}'")
|
|
return merged_element
|
|
|
|
@staticmethod
|
|
def cleanEncodingIssues(jsonString: str) -> str:
|
|
"""
|
|
GENERIC function to remove problematic encoding parts from JSON string.
|
|
|
|
Works for ANY JSON structure - removes problematic characters/bytes.
|
|
|
|
Args:
|
|
jsonString: JSON string that may have encoding issues
|
|
|
|
Returns:
|
|
Cleaned JSON string
|
|
"""
|
|
try:
|
|
# Try to decode/encode to detect issues
|
|
jsonString.encode('utf-8').decode('utf-8')
|
|
return jsonString
|
|
except UnicodeError:
|
|
# Remove problematic parts
|
|
cleaned = jsonString.encode('utf-8', errors='ignore').decode('utf-8', errors='ignore')
|
|
logger.warning("Removed encoding issues from JSON string")
|
|
return cleaned
|
|
|
|
@staticmethod
|
|
def mergeJsonStringsWithOverlap(
|
|
accumulated: str,
|
|
newFragment: str
|
|
) -> Tuple[str, bool]:
|
|
"""
|
|
Merge JSON fragments intelligently using modular parser.
|
|
|
|
Uses the new ModularJsonMerger for clean, robust merging.
|
|
Falls back to legacy code only if new merger fails completely.
|
|
|
|
Args:
|
|
accumulated: Previously accumulated JSON string (may be incomplete/fragmented)
|
|
newFragment: New fragment string to append (may be incomplete/fragmented)
|
|
|
|
Returns:
|
|
Tuple of (merged_json_string, has_overlap):
|
|
- merged_json_string: Combined JSON string with fragments properly merged
|
|
- has_overlap: True if overlap was found (iterations should continue), False if no overlap (iterations should stop)
|
|
"""
|
|
if not accumulated:
|
|
result = newFragment if newFragment else "{}"
|
|
return (result, False) # No overlap if no accumulated data
|
|
if not newFragment:
|
|
return (accumulated, False) # No overlap if no new fragment
|
|
|
|
# Use new modular merger
|
|
try:
|
|
from modules.services.serviceAi.subJsonMerger import ModularJsonMerger
|
|
result, hasOverlap = ModularJsonMerger.merge(accumulated, newFragment)
|
|
# IMPORTANT: ModularJsonMerger returns unclosed JSON if overlap found (with incomplete element at end)
|
|
# If no overlap, returns closed JSON (iterations should stop)
|
|
if result and result.strip() and result.strip() != "{}":
|
|
# Return result with overlap flag
|
|
return (result, hasOverlap)
|
|
except Exception as e:
|
|
logger.debug(f"Modular merger failed, using fallback: {e}")
|
|
|
|
# Fallback to legacy merger (simplified)
|
|
from modules.shared.jsonUtils import normalizeJsonText, stripCodeFences, closeJsonStructures, tryParseJson
|
|
|
|
accumulatedExtracted = stripCodeFences(normalizeJsonText(accumulated)).strip()
|
|
newFragmentExtracted = stripCodeFences(normalizeJsonText(newFragment)).strip()
|
|
|
|
# Try simple string merge with repair
|
|
try:
|
|
# Close structures
|
|
accClosed = closeJsonStructures(accumulatedExtracted) if accumulatedExtracted else "{}"
|
|
fragClosed = closeJsonStructures(newFragmentExtracted) if newFragmentExtracted else "{}"
|
|
|
|
# Try to parse both
|
|
accParsed, accErr, _ = tryParseJson(accClosed)
|
|
fragParsed, fragErr, _ = tryParseJson(fragClosed)
|
|
|
|
# If both parse, merge structurally
|
|
if accErr is None and fragErr is None:
|
|
merged = JsonResponseHandler._mergeParsedJson(accParsed, fragParsed)
|
|
if merged:
|
|
result = json.dumps(merged, indent=2, ensure_ascii=False)
|
|
return (result, False) # No overlap in fallback - close and stop
|
|
|
|
# If only accumulated parses, return it
|
|
if accErr is None and accParsed:
|
|
result = json.dumps(accParsed, indent=2, ensure_ascii=False)
|
|
return (result, False) # No overlap - close and stop
|
|
except Exception:
|
|
pass
|
|
|
|
# Last resort: return accumulated (at least we have that) - close it
|
|
if accumulatedExtracted:
|
|
try:
|
|
closed = closeJsonStructures(accumulatedExtracted)
|
|
return (closed, False) # No overlap - close and stop
|
|
except Exception:
|
|
return (accumulatedExtracted, False) # No overlap - return as-is
|
|
|
|
result = accumulated if accumulated else "{}"
|
|
return (result, False) # No overlap - return as-is
|
|
|
|
@staticmethod
|
|
def _mergeParsedJson(accParsed: Any, fragParsed: Any) -> Optional[Dict[str, Any]]:
|
|
"""Simple merge of two parsed JSON objects."""
|
|
if isinstance(accParsed, dict) and isinstance(fragParsed, dict):
|
|
# Merge dicts
|
|
merged = accParsed.copy()
|
|
|
|
# Merge elements if both have them
|
|
if "elements" in accParsed and "elements" in fragParsed:
|
|
accElements = accParsed.get("elements", [])
|
|
fragElements = fragParsed.get("elements", [])
|
|
# Simple merge - append new elements
|
|
merged["elements"] = accElements + fragElements
|
|
elif "elements" in fragParsed:
|
|
merged["elements"] = fragParsed["elements"]
|
|
|
|
# Merge other keys
|
|
for key, value in fragParsed.items():
|
|
if key != "elements":
|
|
if key in merged and isinstance(merged[key], list) and isinstance(value, list):
|
|
merged[key] = merged[key] + value
|
|
else:
|
|
merged[key] = value
|
|
|
|
return merged
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def _normalizeToElementsStructure(
|
|
jsonString: str,
|
|
originalString: str
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Normalize any JSON structure (Dict, List, None, or parse error) to {"elements": [...]} format.
|
|
|
|
Handles:
|
|
- Dict with "elements" → return as-is
|
|
- Dict without "elements" but with "type" → wrap in elements array
|
|
- List → wrap in elements structure
|
|
- Parse error → try repairBrokenJson
|
|
- None → return None
|
|
|
|
Args:
|
|
jsonString: Extracted JSON string
|
|
originalString: Original string (for context)
|
|
|
|
Returns:
|
|
Normalized Dict with "elements" array, or None if normalization fails
|
|
"""
|
|
if not jsonString:
|
|
return None
|
|
|
|
from modules.shared.jsonUtils import tryParseJson, repairBrokenJson, closeJsonStructures
|
|
|
|
# Try to parse directly first
|
|
try:
|
|
parsed = json.loads(jsonString)
|
|
parseErr = None
|
|
except Exception as e:
|
|
parseErr = e
|
|
parsed = None
|
|
|
|
# If parsing failed, try closing structures first (for incomplete fragments)
|
|
if parseErr is not None:
|
|
try:
|
|
closed = closeJsonStructures(jsonString)
|
|
parsed = json.loads(closed)
|
|
parseErr = None
|
|
except Exception:
|
|
pass
|
|
|
|
# If still failed, try repairBrokenJson ONLY if it looks like document structure
|
|
# For other structures (like section_content), use fragment detection instead
|
|
if parseErr is not None:
|
|
# Check if this looks like a document structure (has "documents" or "sections")
|
|
isDocumentStructure = '"documents"' in jsonString or '"sections"' in jsonString
|
|
|
|
if isDocumentStructure:
|
|
# Use repairBrokenJson for document structures
|
|
repaired = repairBrokenJson(jsonString)
|
|
if repaired:
|
|
parsed = repaired
|
|
parseErr = None
|
|
else:
|
|
# Still can't parse - try to detect fragment structure
|
|
return JsonResponseHandler._detectAndNormalizeFragment(jsonString, originalString)
|
|
else:
|
|
# For non-document structures, skip repairBrokenJson and go straight to fragment detection
|
|
# repairBrokenJson tries to extract "sections" which doesn't work for other structures
|
|
return JsonResponseHandler._detectAndNormalizeFragment(jsonString, originalString)
|
|
|
|
# Normalize based on type
|
|
if parsed is None:
|
|
return None
|
|
elif isinstance(parsed, dict):
|
|
# Already a dict
|
|
if "elements" in parsed:
|
|
return parsed
|
|
elif "type" in parsed:
|
|
# Single element - wrap in elements array
|
|
return {"elements": [parsed]}
|
|
else:
|
|
# Unknown dict structure - try to extract elements
|
|
return JsonResponseHandler._extractElementsFromDict(parsed)
|
|
elif isinstance(parsed, list):
|
|
# List - check if it's a list of elements or a fragment
|
|
if parsed and isinstance(parsed[0], dict) and "type" in parsed[0]:
|
|
# List of elements
|
|
return {"elements": parsed}
|
|
else:
|
|
# Fragment list (e.g., array of rows) - detect structure
|
|
return JsonResponseHandler._detectAndNormalizeFragment(jsonString, originalString)
|
|
else:
|
|
# Primitive type - can't normalize
|
|
return None
|
|
|
|
@staticmethod
|
|
def _detectAndNormalizeFragment(
|
|
jsonString: str,
|
|
originalString: str
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Detect fragment structure and normalize it.
|
|
|
|
Fragments can be:
|
|
- Array of arrays (table rows): `[["row1"], ["row2"]]` or `["1947", "16883"], ["1948", "16889"]`
|
|
- Array of strings (list items): `["item1", "item2"]`
|
|
- Incomplete structure: `["item1", "item2", ` (ends with comma)
|
|
- Partial object: `{"type": "table", "content": {"rows": [["1947"...` (cut mid-string)
|
|
|
|
Returns normalized structure or None if detection fails.
|
|
"""
|
|
jsonStripped = jsonString.strip()
|
|
|
|
# Strategy 1: Check if it's an array fragment
|
|
if jsonStripped.startswith('['):
|
|
# Try to parse as array
|
|
from modules.shared.jsonUtils import tryParseJson, closeJsonStructures
|
|
|
|
# Close incomplete structures
|
|
closed = closeJsonStructures(jsonStripped)
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
|
|
if parseErr is None and isinstance(parsed, list):
|
|
# Check structure: array of arrays (table rows) or array of strings (list items)
|
|
if parsed and isinstance(parsed[0], list):
|
|
# Array of arrays - likely table rows fragment
|
|
return {
|
|
"elements": [{
|
|
"type": "table",
|
|
"content": {
|
|
"rows": parsed
|
|
}
|
|
}]
|
|
}
|
|
elif parsed and isinstance(parsed[0], str):
|
|
# Array of strings - likely list items fragment
|
|
return {
|
|
"elements": [{
|
|
"type": "bullet_list",
|
|
"content": {
|
|
"items": parsed
|
|
}
|
|
}]
|
|
}
|
|
elif parseErr is not None:
|
|
# Can't parse - try regex extraction for table rows
|
|
rows = JsonResponseHandler._extractRowsFromFragment(jsonStripped)
|
|
if rows:
|
|
return {
|
|
"elements": [{
|
|
"type": "table",
|
|
"content": {
|
|
"rows": rows
|
|
}
|
|
}]
|
|
}
|
|
|
|
# Strategy 2: Check if it's a partial object (cut mid-structure)
|
|
# Look for patterns like: {"elements": [...] or {"type": "table"...
|
|
if jsonStripped.startswith('{'):
|
|
from modules.shared.jsonUtils import tryParseJson, closeJsonStructures
|
|
|
|
# Try to close and parse
|
|
closed = closeJsonStructures(jsonStripped)
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
|
|
if parseErr is None and isinstance(parsed, dict):
|
|
# Successfully parsed - normalize it
|
|
return JsonResponseHandler._normalizeToElementsStructure(closed, originalString)
|
|
elif parseErr is not None:
|
|
# Can't parse - try to extract table rows from the raw string
|
|
# This handles cases like: {"elements": [{"type": "table", "content": {"rows": [["1947"...
|
|
rows = JsonResponseHandler._extractRowsFromFragment(jsonStripped)
|
|
if rows:
|
|
return {
|
|
"elements": [{
|
|
"type": "table",
|
|
"content": {
|
|
"rows": rows
|
|
}
|
|
}]
|
|
}
|
|
|
|
# Try to extract any array patterns that might be table rows
|
|
# Look for patterns like: ["1947", "10000"], ["1948", "10100"]
|
|
import re
|
|
# Pattern: ["value1", "value2"], ["value3", "value4"]
|
|
rowPattern = r'\["([^"]*)",\s*"([^"]*)"\]'
|
|
matches = re.findall(rowPattern, jsonStripped)
|
|
if matches and len(matches) >= 2:
|
|
# Found multiple row patterns - likely table rows
|
|
rows = [[match[0], match[1]] for match in matches]
|
|
return {
|
|
"elements": [{
|
|
"type": "table",
|
|
"content": {
|
|
"rows": rows
|
|
}
|
|
}]
|
|
}
|
|
|
|
# Strategy 3: Try to extract rows from any text (even if not starting with [ or {)
|
|
rows = JsonResponseHandler._extractRowsFromFragment(jsonStripped)
|
|
if rows:
|
|
return {
|
|
"elements": [{
|
|
"type": "table",
|
|
"content": {
|
|
"rows": rows
|
|
}
|
|
}]
|
|
}
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def _extractElementsFromDict(d: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Try to extract elements from unknown dict structure.
|
|
Returns normalized structure or empty elements array.
|
|
"""
|
|
# Check common patterns
|
|
if "sections" in d:
|
|
# Document structure with sections
|
|
sections = d.get("sections", [])
|
|
elements = []
|
|
for section in sections:
|
|
if isinstance(section, dict) and "elements" in section:
|
|
elements.extend(section.get("elements", []))
|
|
return {"elements": elements}
|
|
|
|
# Unknown structure - return empty
|
|
return {"elements": []}
|
|
|
|
@staticmethod
|
|
def _mergeJsonStructuresGeneric(
|
|
accumulatedObj: Dict[str, Any],
|
|
newFragmentObj: Dict[str, Any],
|
|
accumulatedRaw: str,
|
|
newFragmentRaw: str,
|
|
overlapElements: Optional[List[Dict[str, Any]]] = None
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
GENERIC merge of two JSON structures, handling overlaps and missing parts.
|
|
|
|
Strategy:
|
|
1. Extract elements from both structures (both are normalized to {"elements": [...]})
|
|
2. Use overlap elements if provided to identify merge point
|
|
3. Detect if both have same structure (same content type)
|
|
4. Group elements by type
|
|
5. Merge elements of same type using content-type-specific logic with overlap detection
|
|
6. Handle overlaps and missing parts intelligently
|
|
|
|
Args:
|
|
accumulatedObj: Normalized accumulated JSON object (guaranteed to have "elements")
|
|
newFragmentObj: Normalized new fragment JSON object (guaranteed to have "elements")
|
|
accumulatedRaw: Raw accumulated string (for fragment detection)
|
|
newFragmentRaw: Raw new fragment string (for fragment detection)
|
|
overlapElements: Optional list of overlap elements from continuation response
|
|
|
|
Returns:
|
|
Merged JSON object or None if merging fails
|
|
"""
|
|
try:
|
|
# Step 1: Extract elements (both are normalized, so this should always work)
|
|
accumulatedElements = accumulatedObj.get("elements", []) if isinstance(accumulatedObj, dict) else []
|
|
newFragmentElements = newFragmentObj.get("elements", []) if isinstance(newFragmentObj, dict) else []
|
|
|
|
if not accumulatedElements and not newFragmentElements:
|
|
# No elements found - try to extract from raw strings
|
|
# Try to extract any valid JSON structure from raw strings
|
|
from modules.shared.jsonUtils import tryParseJson, closeJsonStructures
|
|
|
|
# Try accumulated first
|
|
if accumulatedRaw:
|
|
try:
|
|
closedAccumulated = closeJsonStructures(accumulatedRaw)
|
|
parsed, parseErr, _ = tryParseJson(closedAccumulated)
|
|
if parseErr is None and parsed:
|
|
normalized = JsonResponseHandler._normalizeToElementsStructure(closedAccumulated, accumulatedRaw)
|
|
if normalized:
|
|
return normalized
|
|
except Exception:
|
|
pass
|
|
|
|
# Try new fragment
|
|
if newFragmentRaw:
|
|
try:
|
|
closedFragment = closeJsonStructures(newFragmentRaw)
|
|
parsed, parseErr, _ = tryParseJson(closedFragment)
|
|
if parseErr is None and parsed:
|
|
normalized = JsonResponseHandler._normalizeToElementsStructure(closedFragment, newFragmentRaw)
|
|
if normalized:
|
|
return normalized
|
|
except Exception:
|
|
pass
|
|
|
|
# If still nothing, return empty structure (never None)
|
|
return {"elements": []}
|
|
|
|
# Step 2: Use overlap elements to identify merge point
|
|
# If overlap elements are provided, use them to find where to merge
|
|
if overlapElements and isinstance(overlapElements, list) and len(overlapElements) > 0:
|
|
# Find overlap in accumulated elements
|
|
overlapStartIndex = JsonResponseHandler._findOverlapStartIndex(accumulatedElements, overlapElements)
|
|
if overlapStartIndex >= 0:
|
|
# Remove overlapping elements from accumulated (they'll be replaced by continuation)
|
|
accumulatedElements = accumulatedElements[:overlapStartIndex]
|
|
logger.debug(f"Found overlap at index {overlapStartIndex}, removed {len(accumulatedElements) - overlapStartIndex} overlapping elements")
|
|
|
|
# Step 3: Detect if newFragment is a continuation fragment
|
|
# Check if newFragment starts with array elements (fragment, not full JSON)
|
|
isFragment = JsonResponseHandler._isFragment(newFragmentRaw, newFragmentElements)
|
|
|
|
# Step 4: Group elements by type for intelligent merging
|
|
accumulatedByType = {}
|
|
for elem in accumulatedElements:
|
|
if isinstance(elem, dict):
|
|
elemType = elem.get("type", "unknown")
|
|
if elemType not in accumulatedByType:
|
|
accumulatedByType[elemType] = []
|
|
accumulatedByType[elemType].append(elem)
|
|
|
|
newFragmentByType = {}
|
|
for elem in newFragmentElements:
|
|
if isinstance(elem, dict):
|
|
elemType = elem.get("type", "unknown")
|
|
if elemType not in newFragmentByType:
|
|
newFragmentByType[elemType] = []
|
|
newFragmentByType[elemType].append(elem)
|
|
|
|
# Step 5: Merge elements intelligently
|
|
mergedElements = []
|
|
allTypes = set(accumulatedByType.keys()) | set(newFragmentByType.keys())
|
|
|
|
for elemType in allTypes:
|
|
accElems = accumulatedByType.get(elemType, [])
|
|
fragElems = newFragmentByType.get(elemType, [])
|
|
|
|
if not accElems:
|
|
# Only in fragment - add all
|
|
mergedElements.extend(fragElems)
|
|
elif not fragElems:
|
|
# Only in accumulated - add all
|
|
mergedElements.extend(accElems)
|
|
else:
|
|
# Both have elements of this type - merge them using content-type-specific logic
|
|
mergedElem = JsonResponseHandler._mergeElementsOfSameTypeGeneric(
|
|
accElems[0], fragElems[0], elemType, accumulatedRaw, newFragmentRaw, isFragment
|
|
)
|
|
if mergedElem:
|
|
mergedElements.append(mergedElem)
|
|
|
|
# Step 6: Reconstruct base structure
|
|
if mergedElements:
|
|
return {"elements": mergedElements}
|
|
else:
|
|
# No merged elements - return accumulated if available (NEVER return None)
|
|
if accumulatedElements:
|
|
return {"elements": accumulatedElements}
|
|
# If no accumulated, return new fragment if available
|
|
if newFragmentElements:
|
|
return {"elements": newFragmentElements}
|
|
# Last resort: return empty structure (never None)
|
|
return {"elements": []}
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Structure-based merge failed: {e}")
|
|
import traceback
|
|
logger.debug(traceback.format_exc())
|
|
return None
|
|
|
|
@staticmethod
|
|
def _isFragment(jsonString: str, elements: List[Dict[str, Any]]) -> bool:
|
|
"""
|
|
Detect if JSON string is a fragment (not a complete JSON object).
|
|
|
|
Fragments:
|
|
- Start with `[` but not `[{"` (array fragment, not full elements array)
|
|
- Start with array elements like `["cell1", "cell2"],` (table rows fragment)
|
|
- Don't have full structure (missing outer object with "elements")
|
|
- Are continuations of previous structure
|
|
"""
|
|
jsonStripped = jsonString.strip()
|
|
|
|
# Check if it starts with array (fragment)
|
|
if jsonStripped.startswith('['):
|
|
# Check if it's a full elements array `[{"type": ...}]` or a fragment `["cell1", "cell2"]`
|
|
if jsonStripped.startswith('[{"') or jsonStripped.startswith('[{'):
|
|
# Could be full structure - check if it has "type" field
|
|
if elements and isinstance(elements[0], dict) and "type" in elements[0]:
|
|
return False # Full structure
|
|
# Otherwise it's a fragment (array of primitives or incomplete)
|
|
return True
|
|
|
|
# Check if it starts with object but missing "elements" wrapper
|
|
if jsonStripped.startswith('{'):
|
|
# Check if it has "elements" field
|
|
if '"elements"' not in jsonStripped[:200]: # Check first 200 chars
|
|
# Might be a single element fragment
|
|
return True
|
|
|
|
# Check if elements are incomplete (no full structure)
|
|
if elements and isinstance(elements[0], dict):
|
|
# Check if first element is missing required fields
|
|
firstElem = elements[0]
|
|
if "type" not in firstElem and "content" not in firstElem:
|
|
return True
|
|
|
|
return False
|
|
|
|
@staticmethod
|
|
def _mergeElementsOfSameTypeGeneric(
|
|
accumulatedElem: Dict[str, Any],
|
|
newFragmentElem: Dict[str, Any],
|
|
elemType: str,
|
|
accumulatedRaw: str,
|
|
newFragmentRaw: str,
|
|
isFragment: bool
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
GENERIC merge of two elements of the same type, with content-type-specific optimizations.
|
|
|
|
Content-type-specific merging:
|
|
- table: Merge rows arrays with overlap detection
|
|
- paragraph: Merge text content
|
|
- code_block: Merge code strings
|
|
- bullet_list/numbered_list: Merge items arrays
|
|
- heading: Use new fragment (usually complete)
|
|
- image: Use new fragment (usually complete)
|
|
- Other: Generic deep merge
|
|
|
|
Args:
|
|
accumulatedElem: Accumulated element
|
|
newFragmentElem: New fragment element
|
|
elemType: Content type (table, paragraph, etc.)
|
|
accumulatedRaw: Raw accumulated string
|
|
newFragmentRaw: Raw new fragment string
|
|
isFragment: Whether newFragment is a fragment (continuation)
|
|
|
|
Returns:
|
|
Merged element or None if merging fails
|
|
"""
|
|
if elemType == "table":
|
|
return JsonResponseHandler._mergeTableElementsGeneric(
|
|
accumulatedElem, newFragmentElem, accumulatedRaw, newFragmentRaw, isFragment
|
|
)
|
|
elif elemType == "paragraph":
|
|
return JsonResponseHandler._mergeParagraphElements(
|
|
accumulatedElem, newFragmentElem, isFragment
|
|
)
|
|
elif elemType == "code_block":
|
|
return JsonResponseHandler._mergeCodeBlockElements(
|
|
accumulatedElem, newFragmentElem, isFragment
|
|
)
|
|
elif elemType in ["bullet_list", "numbered_list"]:
|
|
return JsonResponseHandler._mergeListElements(
|
|
accumulatedElem, newFragmentElem, isFragment
|
|
)
|
|
elif elemType in ["heading", "image"]:
|
|
# Usually complete - use new fragment if it exists, otherwise accumulated
|
|
return newFragmentElem if newFragmentElem else accumulatedElem
|
|
else:
|
|
# Generic merge: use mergeDeepStructures
|
|
return JsonResponseHandler.mergeDeepStructures(
|
|
accumulatedElem, newFragmentElem, 0, f"element_merge.{elemType}"
|
|
)
|
|
|
|
@staticmethod
|
|
def _mergeTableElementsGeneric(
|
|
accumulatedElem: Dict[str, Any],
|
|
newFragmentElem: Dict[str, Any],
|
|
accumulatedRaw: str,
|
|
newFragmentRaw: str,
|
|
isFragment: bool
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
GENERIC merge of two table elements with content-type-specific optimizations.
|
|
|
|
Handles:
|
|
- Overlapping rows (detect duplicates by comparing row content)
|
|
- Missing headers (complete with existing headers)
|
|
- Incomplete rows (complete with null values if needed)
|
|
- Fragment rows (if newFragment is a fragment, extract rows from raw string)
|
|
|
|
Args:
|
|
accumulatedElem: Accumulated table element
|
|
newFragmentElem: New fragment table element
|
|
accumulatedRaw: Raw accumulated string (for fragment detection)
|
|
newFragmentRaw: Raw new fragment string (for fragment extraction)
|
|
isFragment: Whether newFragment is a fragment
|
|
|
|
Returns:
|
|
Merged table element
|
|
"""
|
|
# Extract content (handle both nested and flat structures)
|
|
accContent = accumulatedElem.get("content", {})
|
|
if not accContent and "rows" in accumulatedElem:
|
|
accContent = accumulatedElem
|
|
|
|
fragContent = newFragmentElem.get("content", {})
|
|
if not fragContent and "rows" in newFragmentElem:
|
|
fragContent = newFragmentElem
|
|
|
|
# Extract rows
|
|
accRows = accContent.get("rows", []) if isinstance(accContent, dict) else []
|
|
|
|
# If fragment, try to extract rows from raw string
|
|
fragRows = fragContent.get("rows", []) if isinstance(fragContent, dict) else []
|
|
if isFragment and not fragRows:
|
|
fragRows = JsonResponseHandler._extractRowsFromFragment(newFragmentRaw)
|
|
|
|
# Extract headers (complete missing with existing)
|
|
accHeaders = accContent.get("headers", []) if isinstance(accContent, dict) else []
|
|
fragHeaders = fragContent.get("headers", []) if isinstance(fragContent, dict) else []
|
|
mergedHeaders = accHeaders if accHeaders else fragHeaders
|
|
|
|
# Merge rows with overlap detection
|
|
mergedRows = JsonResponseHandler._mergeRowsWithOverlapDetection(accRows, fragRows)
|
|
|
|
# Reconstruct table element
|
|
mergedContent = {
|
|
"headers": mergedHeaders,
|
|
"rows": mergedRows
|
|
}
|
|
|
|
# Preserve other fields (caption, etc.)
|
|
if isinstance(accContent, dict) and "caption" in accContent:
|
|
mergedContent["caption"] = accContent["caption"]
|
|
elif isinstance(fragContent, dict) and "caption" in fragContent:
|
|
mergedContent["caption"] = fragContent["caption"]
|
|
|
|
return {
|
|
"type": "table",
|
|
"content": mergedContent
|
|
}
|
|
|
|
@staticmethod
|
|
def _extractRowsFromFragment(fragmentRaw: str) -> List[List[str]]:
|
|
"""
|
|
Extract table rows from fragment string.
|
|
|
|
Handles fragments like:
|
|
- `["1947", "16883"], ["1948", "16889"], ...`
|
|
- `"rows": [["1947", "10000"], ["1948", "10100"]...`
|
|
- Incomplete fragments cut mid-string
|
|
Also handles fragments with more than 2 columns.
|
|
"""
|
|
import re
|
|
rows = []
|
|
|
|
# Pattern 1: Array of arrays with 2 columns `["cell1", "cell2"], ["cell3", "cell4"]`
|
|
# This pattern matches complete arrays: ["value1", "value2"]
|
|
pattern2Col = r'\["([^"]*)",\s*"([^"]*)"\]'
|
|
matches2Col = re.findall(pattern2Col, fragmentRaw)
|
|
|
|
if matches2Col and len(matches2Col) >= 2: # Need at least 2 rows to be confident
|
|
for match in matches2Col:
|
|
if len(match) == 2:
|
|
rows.append([match[0], match[1]])
|
|
if rows:
|
|
return rows
|
|
|
|
# Pattern 2: Array of arrays with variable columns (more robust)
|
|
# Find all array patterns: ["...", "...", ...]
|
|
# Use non-greedy matching but ensure we get complete arrays
|
|
arrayPattern = r'\[(.*?)\]'
|
|
arrayMatches = re.findall(arrayPattern, fragmentRaw)
|
|
|
|
# Filter to only arrays that look like table rows (have multiple quoted values)
|
|
validArrays = []
|
|
for arrayContent in arrayMatches:
|
|
# Extract quoted strings from array content
|
|
cellPattern = r'"([^"]*)"'
|
|
cells = re.findall(cellPattern, arrayContent)
|
|
# Only consider arrays with 2+ cells (likely table rows)
|
|
if len(cells) >= 2:
|
|
validArrays.append(cells)
|
|
|
|
if validArrays and len(validArrays) >= 2: # Need at least 2 rows
|
|
return validArrays
|
|
|
|
# Pattern 3: Look for "rows": [...] pattern in incomplete JSON
|
|
# This handles cases like: "rows": [["1947", "10000"], ["1948", "10100"]...
|
|
rowsPattern = r'"rows"\s*:\s*\[(.*?)(?:\]|$)'
|
|
rowsMatch = re.search(rowsPattern, fragmentRaw, re.DOTALL)
|
|
if rowsMatch:
|
|
rowsContent = rowsMatch.group(1)
|
|
# Extract all array patterns from rows content
|
|
arrayPattern = r'\[(.*?)\]'
|
|
arrayMatches = re.findall(arrayPattern, rowsContent)
|
|
for arrayContent in arrayMatches:
|
|
cellPattern = r'"([^"]*)"'
|
|
cells = re.findall(cellPattern, arrayContent)
|
|
if len(cells) >= 2: # At least 2 columns
|
|
rows.append(cells)
|
|
if rows:
|
|
return rows
|
|
|
|
# Pattern 4: Try to parse as JSON array (handles complete arrays)
|
|
from modules.shared.jsonUtils import tryParseJson, closeJsonStructures
|
|
|
|
# Try to close incomplete structures
|
|
closed = closeJsonStructures(fragmentRaw.strip())
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
|
|
if parseErr is None and isinstance(parsed, list):
|
|
if parsed and isinstance(parsed[0], list):
|
|
# Array of arrays - table rows
|
|
return parsed
|
|
elif parsed and isinstance(parsed[0], str):
|
|
# Array of strings - might be single column table
|
|
return [[item] for item in parsed]
|
|
|
|
# Pattern 5: Last resort - extract any array patterns we can find
|
|
# Even if incomplete, try to extract what we can
|
|
if not rows:
|
|
# Find all patterns like ["value1", "value2"] even if incomplete
|
|
# Use a more lenient pattern that handles incomplete strings
|
|
incompletePattern = r'\["([^"]*)"(?:,\s*"([^"]*)")?'
|
|
incompleteMatches = re.findall(incompletePattern, fragmentRaw)
|
|
for match in incompleteMatches:
|
|
if match[0]: # First value exists
|
|
if match[1]: # Second value exists
|
|
rows.append([match[0], match[1]])
|
|
else:
|
|
# Only one value - might be incomplete, skip for now
|
|
pass
|
|
|
|
return rows
|
|
|
|
@staticmethod
|
|
def _mergeParagraphElements(
|
|
accumulatedElem: Dict[str, Any],
|
|
newFragmentElem: Dict[str, Any],
|
|
isFragment: bool
|
|
) -> Dict[str, Any]:
|
|
"""Merge two paragraph elements."""
|
|
accContent = accumulatedElem.get("content", {})
|
|
fragContent = newFragmentElem.get("content", {})
|
|
|
|
accText = accContent.get("text", "") if isinstance(accContent, dict) else ""
|
|
fragText = fragContent.get("text", "") if isinstance(fragContent, dict) else ""
|
|
|
|
# Merge text (remove overlap if fragment)
|
|
mergedText = accText + fragText if not isFragment else (accText.rstrip() + " " + fragText.lstrip())
|
|
|
|
return {
|
|
"type": "paragraph",
|
|
"content": {"text": mergedText}
|
|
}
|
|
|
|
@staticmethod
|
|
def _mergeCodeBlockElements(
|
|
accumulatedElem: Dict[str, Any],
|
|
newFragmentElem: Dict[str, Any],
|
|
isFragment: bool
|
|
) -> Dict[str, Any]:
|
|
"""Merge two code block elements."""
|
|
accContent = accumulatedElem.get("content", {})
|
|
fragContent = newFragmentElem.get("content", {})
|
|
|
|
accCode = accContent.get("code", "") if isinstance(accContent, dict) else ""
|
|
fragCode = fragContent.get("code", "") if isinstance(fragContent, dict) else ""
|
|
|
|
accLanguage = accContent.get("language") if isinstance(accContent, dict) else None
|
|
fragLanguage = fragContent.get("language") if isinstance(fragContent, dict) else None
|
|
|
|
mergedCode = accCode + "\n" + fragCode if fragCode else accCode
|
|
mergedLanguage = accLanguage or fragLanguage
|
|
|
|
result = {
|
|
"type": "code_block",
|
|
"content": {"code": mergedCode}
|
|
}
|
|
if mergedLanguage:
|
|
result["content"]["language"] = mergedLanguage
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def _mergeListElements(
|
|
accumulatedElem: Dict[str, Any],
|
|
newFragmentElem: Dict[str, Any],
|
|
isFragment: bool
|
|
) -> Dict[str, Any]:
|
|
"""Merge two list elements (bullet_list or numbered_list)."""
|
|
accContent = accumulatedElem.get("content", {})
|
|
fragContent = newFragmentElem.get("content", {})
|
|
|
|
accItems = accContent.get("items", []) if isinstance(accContent, dict) else []
|
|
fragItems = fragContent.get("items", []) if isinstance(fragContent, dict) else []
|
|
|
|
# Merge items with overlap detection
|
|
mergedItems = JsonResponseHandler._mergeItemsWithOverlapDetection(accItems, fragItems)
|
|
|
|
elemType = accumulatedElem.get("type") or newFragmentElem.get("type")
|
|
|
|
return {
|
|
"type": elemType,
|
|
"content": {"items": mergedItems}
|
|
}
|
|
|
|
@staticmethod
|
|
def _findOverlapStartIndex(
|
|
accumulatedElements: List[Dict[str, Any]],
|
|
overlapElements: List[Dict[str, Any]]
|
|
) -> int:
|
|
"""
|
|
Find the start index in accumulatedElements where overlapElements begin.
|
|
|
|
This helps identify where to merge continuation elements by matching
|
|
the overlap elements with the end of accumulated elements.
|
|
|
|
Args:
|
|
accumulatedElements: List of accumulated elements
|
|
overlapElements: List of overlap elements from continuation response
|
|
|
|
Returns:
|
|
Index where overlap starts, or -1 if not found
|
|
"""
|
|
if not overlapElements or not accumulatedElements:
|
|
return -1
|
|
|
|
# Try to find overlap by matching element structures
|
|
# Start from the end of accumulatedElements and work backwards
|
|
overlapLen = len(overlapElements)
|
|
accLen = len(accumulatedElements)
|
|
|
|
if overlapLen > accLen:
|
|
return -1
|
|
|
|
# Try matching from different start positions
|
|
for startIdx in range(max(0, accLen - overlapLen), accLen):
|
|
# Check if elements from startIdx match overlapElements
|
|
matches = True
|
|
for i in range(min(overlapLen, accLen - startIdx)):
|
|
accElem = accumulatedElements[startIdx + i]
|
|
overlapElem = overlapElements[i]
|
|
|
|
# Compare element types
|
|
if isinstance(accElem, dict) and isinstance(overlapElem, dict):
|
|
accType = accElem.get("type")
|
|
overlapType = overlapElem.get("type")
|
|
if accType != overlapType:
|
|
matches = False
|
|
break
|
|
|
|
# For tables, compare row counts or last rows
|
|
if accType == "table":
|
|
accRows = accElem.get("rows", []) or (accElem.get("content", {}).get("rows", []) if isinstance(accElem.get("content"), dict) else [])
|
|
overlapRows = overlapElem.get("rows", []) or (overlapElem.get("content", {}).get("rows", []) if isinstance(overlapElem.get("content"), dict) else [])
|
|
if accRows and overlapRows:
|
|
# Check if last rows match
|
|
if len(accRows) >= len(overlapRows):
|
|
lastAccRows = accRows[-len(overlapRows):]
|
|
if lastAccRows != overlapRows:
|
|
matches = False
|
|
break
|
|
# For lists, compare items
|
|
elif accType in ["bullet_list", "numbered_list"]:
|
|
accItems = accElem.get("items", []) or (accElem.get("content", {}).get("items", []) if isinstance(accElem.get("content"), dict) else [])
|
|
overlapItems = overlapElem.get("items", []) or (overlapElem.get("content", {}).get("items", []) if isinstance(overlapElem.get("content"), dict) else [])
|
|
if accItems and overlapItems:
|
|
if len(accItems) >= len(overlapItems):
|
|
lastAccItems = accItems[-len(overlapItems):]
|
|
if lastAccItems != overlapItems:
|
|
matches = False
|
|
break
|
|
else:
|
|
matches = False
|
|
break
|
|
|
|
if matches:
|
|
return startIdx
|
|
|
|
return -1
|
|
|
|
@staticmethod
|
|
def _mergeRowsWithOverlapDetection(
|
|
accRows: List[List[str]],
|
|
fragRows: List[List[str]]
|
|
) -> List[List[str]]:
|
|
"""
|
|
Merge two row arrays, detecting and removing overlaps.
|
|
|
|
Overlap detection: Compare rows to find duplicates.
|
|
Missing parts: Complete with null values if needed.
|
|
"""
|
|
if not accRows:
|
|
return fragRows
|
|
if not fragRows:
|
|
return accRows
|
|
|
|
# Find overlap by comparing last rows of accRows with first rows of fragRows
|
|
overlapStart = 0
|
|
maxOverlap = min(len(accRows), len(fragRows))
|
|
|
|
# Find the longest overlap
|
|
for overlapLen in range(maxOverlap, 0, -1):
|
|
accSuffix = accRows[-overlapLen:]
|
|
fragPrefix = fragRows[:overlapLen]
|
|
|
|
# Compare rows (exact match)
|
|
if accSuffix == fragPrefix:
|
|
overlapStart = overlapLen
|
|
break
|
|
|
|
# Merge: accumulated rows + non-overlapping fragment rows
|
|
merged = accRows + fragRows[overlapStart:]
|
|
|
|
return merged
|
|
|
|
@staticmethod
|
|
def _mergeItemsWithOverlapDetection(
|
|
accItems: List[str],
|
|
fragItems: List[str]
|
|
) -> List[str]:
|
|
"""
|
|
Merge two item arrays (for lists), detecting and removing overlaps.
|
|
|
|
Overlap detection: Compare items to find duplicates.
|
|
"""
|
|
if not accItems:
|
|
return fragItems
|
|
if not fragItems:
|
|
return accItems
|
|
|
|
# Find overlap by comparing last items of accItems with first items of fragItems
|
|
overlapStart = 0
|
|
maxOverlap = min(len(accItems), len(fragItems))
|
|
|
|
# Find the longest overlap
|
|
for overlapLen in range(maxOverlap, 0, -1):
|
|
accSuffix = accItems[-overlapLen:]
|
|
fragPrefix = fragItems[:overlapLen]
|
|
|
|
# Compare items (exact match)
|
|
if accSuffix == fragPrefix:
|
|
overlapStart = overlapLen
|
|
break
|
|
|
|
# Merge: accumulated items + non-overlapping fragment items
|
|
merged = accItems + fragItems[overlapStart:]
|
|
|
|
return merged
|
|
|
|
@staticmethod
|
|
def _extractOverlapAndContinuation(jsonString: str) -> Tuple[Optional[List[Dict[str, Any]]], Optional[str]]:
|
|
"""
|
|
Extract overlap and continuation sections from AI response with explicit overlap structure.
|
|
|
|
Expected format:
|
|
{
|
|
"overlap": [...], // Elements to repeat for merging
|
|
"continuation": [...] // New elements to add
|
|
}
|
|
|
|
Or alternative format:
|
|
{
|
|
"overlap": "...", // Overlap as string
|
|
"continuation": "..." // Continuation as string
|
|
}
|
|
|
|
Args:
|
|
jsonString: JSON string that may contain overlap/continuation structure
|
|
|
|
Returns:
|
|
Tuple of (overlap_elements, continuation_json_string) or (None, None) if not found
|
|
"""
|
|
if not jsonString:
|
|
return None, None
|
|
|
|
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText, tryParseJson, closeJsonStructures
|
|
|
|
# Extract and normalize JSON
|
|
extracted = stripCodeFences(normalizeJsonText(jsonString)).strip()
|
|
if not extracted:
|
|
return None, None
|
|
|
|
# Try to parse
|
|
try:
|
|
closed = closeJsonStructures(extracted)
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
|
|
if parseErr is None and isinstance(parsed, dict):
|
|
# Check for overlap/continuation structure
|
|
overlap = parsed.get("overlap")
|
|
continuation = parsed.get("continuation")
|
|
|
|
if overlap is not None and continuation is not None:
|
|
# Found explicit overlap structure
|
|
overlapElements = None
|
|
continuationJson = None
|
|
|
|
# Extract overlap elements
|
|
if isinstance(overlap, list):
|
|
overlapElements = overlap
|
|
elif isinstance(overlap, str):
|
|
# Overlap is a string - try to parse it
|
|
try:
|
|
overlapParsed, _, _ = tryParseJson(closeJsonStructures(overlap))
|
|
if isinstance(overlapParsed, list):
|
|
overlapElements = overlapParsed
|
|
except Exception:
|
|
pass
|
|
|
|
# Extract continuation JSON
|
|
if isinstance(continuation, (dict, list)):
|
|
continuationJson = json.dumps(continuation, indent=2, ensure_ascii=False)
|
|
elif isinstance(continuation, str):
|
|
continuationJson = continuation
|
|
|
|
if overlapElements is not None and continuationJson:
|
|
return overlapElements, continuationJson
|
|
except Exception:
|
|
pass
|
|
|
|
return None, None
|
|
|
|
@staticmethod
|
|
def _mergeWithExplicitOverlap(
|
|
accumulated: str,
|
|
continuationJson: str,
|
|
overlapElements: List[Dict[str, Any]]
|
|
) -> str:
|
|
"""
|
|
Merge accumulated JSON with continuation JSON using explicit overlap information.
|
|
|
|
Strategy:
|
|
1. Find overlap in accumulated using overlapElements
|
|
2. Remove overlapping elements from accumulated
|
|
3. Append continuation JSON
|
|
|
|
Args:
|
|
accumulated: Previously accumulated JSON string
|
|
continuationJson: Continuation JSON string (new content)
|
|
overlapElements: List of overlap elements from AI response
|
|
|
|
Returns:
|
|
Merged JSON string
|
|
"""
|
|
if not accumulated:
|
|
return continuationJson
|
|
if not continuationJson:
|
|
return accumulated
|
|
|
|
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText, tryParseJson, closeJsonStructures
|
|
|
|
# Normalize accumulated
|
|
accumulatedExtracted = stripCodeFences(normalizeJsonText(accumulated)).strip()
|
|
accumulatedNormalized = JsonResponseHandler._normalizeToElementsStructure(
|
|
accumulatedExtracted, accumulated
|
|
)
|
|
|
|
# Normalize continuation
|
|
continuationExtracted = stripCodeFences(normalizeJsonText(continuationJson)).strip()
|
|
continuationNormalized = JsonResponseHandler._normalizeToElementsStructure(
|
|
continuationExtracted, continuationJson
|
|
)
|
|
|
|
# If both normalized successfully, use structure-based merge with overlap
|
|
if accumulatedNormalized and continuationNormalized:
|
|
merged = JsonResponseHandler._mergeJsonStructuresGeneric(
|
|
accumulatedNormalized, continuationNormalized, accumulatedExtracted, continuationExtracted,
|
|
overlapElements=overlapElements
|
|
)
|
|
if merged:
|
|
return json.dumps(merged, indent=2, ensure_ascii=False)
|
|
|
|
# Fallback: use overlap elements to find merge point in accumulated
|
|
# Find where overlap elements match in accumulated
|
|
if accumulatedNormalized and overlapElements:
|
|
accumulatedElements = accumulatedNormalized.get("elements", [])
|
|
overlapStartIndex = JsonResponseHandler._findOverlapStartIndex(accumulatedElements, overlapElements)
|
|
|
|
if overlapStartIndex >= 0:
|
|
# Remove overlapping elements
|
|
accumulatedElements = accumulatedElements[:overlapStartIndex]
|
|
accumulatedNormalized["elements"] = accumulatedElements
|
|
|
|
# Merge continuation
|
|
if continuationNormalized:
|
|
continuationElements = continuationNormalized.get("elements", [])
|
|
accumulatedElements.extend(continuationElements)
|
|
accumulatedNormalized["elements"] = accumulatedElements
|
|
return json.dumps(accumulatedNormalized, indent=2, ensure_ascii=False)
|
|
|
|
# Last resort: simple concatenation
|
|
return JsonResponseHandler._mergeJsonStringsWithOverlapFallback(accumulated, continuationJson)
|
|
|
|
@staticmethod
|
|
def _extractValidJsonPrefix(jsonString: str) -> str:
|
|
"""
|
|
Extract the longest valid JSON prefix from a string that may be cut randomly.
|
|
|
|
Strategy:
|
|
1. Try to find the longest prefix that can be closed and parsed
|
|
2. Handle random cuts (mid-string, mid-number, etc.)
|
|
3. Return the longest valid prefix found
|
|
|
|
Args:
|
|
jsonString: JSON string that may be cut randomly
|
|
|
|
Returns:
|
|
Longest valid JSON prefix, or empty string if none found
|
|
"""
|
|
if not jsonString or not jsonString.strip():
|
|
return ""
|
|
|
|
from modules.shared.jsonUtils import tryParseJson, closeJsonStructures
|
|
|
|
# Strategy 1: Try progressive truncation to find longest valid JSON
|
|
# Use binary search-like approach for efficiency
|
|
bestValid = ""
|
|
bestLength = 0
|
|
maxLen = len(jsonString)
|
|
|
|
# Generate test lengths: full, 95%, 90%, ..., 10%
|
|
testLengths = []
|
|
for percent in range(100, 9, -5):
|
|
testLen = int(maxLen * percent / 100)
|
|
if testLen > bestLength:
|
|
testLengths.append(testLen)
|
|
|
|
# Also test specific points near the end (common cut points)
|
|
for offset in [200, 100, 50, 20, 10, 5, 2, 1]:
|
|
if maxLen > offset:
|
|
testLen = maxLen - offset
|
|
if testLen > bestLength:
|
|
testLengths.append(testLen)
|
|
|
|
# Sort and deduplicate
|
|
testLengths = sorted(set(testLengths), reverse=True)
|
|
|
|
for testLen in testLengths:
|
|
if testLen <= bestLength:
|
|
continue # Already found better
|
|
|
|
testStr = jsonString[:testLen]
|
|
if not testStr.strip():
|
|
continue
|
|
|
|
# Try to close and parse
|
|
try:
|
|
closed = closeJsonStructures(testStr)
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
|
|
if parseErr is None and parsed is not None:
|
|
# Valid JSON found
|
|
if testLen > bestLength:
|
|
bestValid = closed
|
|
bestLength = testLen
|
|
except Exception:
|
|
continue
|
|
|
|
# Strategy 2: If we found valid JSON, return it
|
|
if bestValid:
|
|
return bestValid
|
|
|
|
# Strategy 3: Try to extract balanced JSON (find first complete structure)
|
|
jsonStripped = jsonString.strip()
|
|
|
|
if jsonStripped.startswith('{') or jsonStripped.startswith('['):
|
|
# Try to extract balanced JSON
|
|
from modules.shared.jsonUtils import extractFirstBalancedJson
|
|
balanced = extractFirstBalancedJson(jsonStripped)
|
|
if balanced and balanced != jsonStripped:
|
|
try:
|
|
closed = closeJsonStructures(balanced)
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
if parseErr is None:
|
|
return closed
|
|
except Exception:
|
|
pass
|
|
|
|
# Strategy 4: Try to repair by removing incomplete trailing structures
|
|
# Find the last complete element/item before the cut
|
|
try:
|
|
# For arrays: find last complete element
|
|
if jsonStripped.startswith('['):
|
|
# Find last complete array element
|
|
lastComma = jsonStripped.rfind(',')
|
|
if lastComma > 0:
|
|
# Try prefix up to last comma
|
|
prefix = jsonStripped[:lastComma].strip()
|
|
if prefix.endswith(','):
|
|
prefix = prefix[:-1].strip()
|
|
if prefix:
|
|
closed = closeJsonStructures(prefix + ']')
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
if parseErr is None:
|
|
return closed
|
|
|
|
# For objects: find last complete key-value pair
|
|
elif jsonStripped.startswith('{'):
|
|
# Find last complete key-value pair
|
|
lastComma = jsonStripped.rfind(',')
|
|
if lastComma > 0:
|
|
# Try prefix up to last comma
|
|
prefix = jsonStripped[:lastComma].strip()
|
|
if prefix.endswith(','):
|
|
prefix = prefix[:-1].strip()
|
|
if prefix:
|
|
closed = closeJsonStructures(prefix + '}')
|
|
parsed, parseErr, _ = tryParseJson(closed)
|
|
if parseErr is None:
|
|
return closed
|
|
except Exception:
|
|
pass
|
|
|
|
# Last resort: return empty (caller will handle)
|
|
return ""
|
|
|
|
@staticmethod
|
|
def _smartConcatenate(accumulated: str, newFragment: str) -> str:
|
|
"""
|
|
Smart concatenation that tries to merge JSON fragments intelligently.
|
|
|
|
Strategy:
|
|
1. Extract valid JSON from both fragments
|
|
2. Parse both as JSON objects/arrays
|
|
3. Merge them structurally
|
|
4. Return valid JSON
|
|
|
|
Args:
|
|
accumulated: Accumulated JSON string
|
|
newFragment: New fragment to append
|
|
|
|
Returns:
|
|
Merged string with valid JSON, or empty if merging not possible
|
|
"""
|
|
if not accumulated or not newFragment:
|
|
return ""
|
|
|
|
from modules.shared.jsonUtils import closeJsonStructures, tryParseJson
|
|
|
|
# Extract valid JSON prefixes from both
|
|
accumulatedValid = JsonResponseHandler._extractValidJsonPrefix(accumulated)
|
|
newFragmentValid = JsonResponseHandler._extractValidJsonPrefix(newFragment)
|
|
|
|
if not accumulatedValid:
|
|
accumulatedValid = accumulated
|
|
if not newFragmentValid:
|
|
newFragmentValid = newFragment
|
|
|
|
# Try to parse both
|
|
try:
|
|
closedAccumulated = closeJsonStructures(accumulatedValid)
|
|
parsedAccumulated, parseErr1, _ = tryParseJson(closedAccumulated)
|
|
|
|
closedNewFragment = closeJsonStructures(newFragmentValid)
|
|
parsedNewFragment, parseErr2, _ = tryParseJson(closedNewFragment)
|
|
|
|
# If both parse successfully, merge structurally
|
|
if parseErr1 is None and parseErr2 is None:
|
|
# Normalize both to elements structure
|
|
accNormalized = JsonResponseHandler._normalizeToElementsStructure(closedAccumulated, accumulated)
|
|
newNormalized = JsonResponseHandler._normalizeToElementsStructure(closedNewFragment, newFragment)
|
|
|
|
if accNormalized and newNormalized:
|
|
merged = JsonResponseHandler._mergeJsonStructuresGeneric(
|
|
accNormalized, newNormalized, closedAccumulated, closedNewFragment
|
|
)
|
|
if merged:
|
|
return json.dumps(merged, indent=2, ensure_ascii=False)
|
|
|
|
# If only accumulated parses, return it
|
|
if parseErr1 is None and parsedAccumulated:
|
|
return json.dumps(parsedAccumulated, indent=2, ensure_ascii=False)
|
|
|
|
# If only new fragment parses, return it
|
|
if parseErr2 is None and parsedNewFragment:
|
|
return json.dumps(parsedNewFragment, indent=2, ensure_ascii=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# Fallback: Try simple string concatenation with repair
|
|
accumulatedStripped = accumulated.strip()
|
|
newFragmentStripped = newFragment.strip()
|
|
|
|
# If accumulated doesn't end with } or ], it might be incomplete
|
|
if accumulatedStripped and not accumulatedStripped.endswith(('}', ']')):
|
|
try:
|
|
closedAccumulated = closeJsonStructures(accumulatedStripped)
|
|
|
|
# Check if newFragment starts with continuation
|
|
if newFragmentStripped.startswith(','):
|
|
# Remove leading comma and append
|
|
merged = closedAccumulated.rstrip() + newFragmentStripped.lstrip(',').strip()
|
|
elif newFragmentStripped.startswith(('}', ']')):
|
|
# Fragment starts with closing - might be completing accumulated
|
|
merged = closedAccumulated.rstrip() + newFragmentStripped
|
|
else:
|
|
# Try to append as continuation
|
|
# Check if we need a comma separator
|
|
if not closedAccumulated.rstrip().endswith((',', '[', '{')):
|
|
merged = closedAccumulated.rstrip() + ',' + newFragmentStripped
|
|
else:
|
|
merged = closedAccumulated.rstrip() + newFragmentStripped
|
|
|
|
# Try to repair and parse the merged result
|
|
repaired = closeJsonStructures(merged)
|
|
parsed, parseErr, _ = tryParseJson(repaired)
|
|
if parseErr is None:
|
|
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# If smart concatenation failed, return empty (caller will handle)
|
|
return ""
|
|
|
|
@staticmethod
|
|
def _mergeJsonStringsWithOverlapFallback(
|
|
accumulated: str,
|
|
newFragment: str
|
|
) -> str:
|
|
"""
|
|
Fallback overlap detection using string comparison.
|
|
Used when both strings are complete JSON structures or fragments.
|
|
|
|
CRITICAL: Never returns empty JSON - always returns at least accumulated.
|
|
"""
|
|
if not accumulated:
|
|
return newFragment if newFragment else "{}"
|
|
if not newFragment:
|
|
return accumulated
|
|
|
|
from modules.shared.jsonUtils import tryParseJson, closeJsonStructures
|
|
|
|
# Strategy 1: Try to extract valid JSON parts from both fragments
|
|
# This handles random cuts better by finding the longest valid prefix/suffix
|
|
|
|
# Extract valid JSON from accumulated (find longest valid prefix)
|
|
accumulatedValid = JsonResponseHandler._extractValidJsonPrefix(accumulated)
|
|
|
|
# Extract valid JSON from newFragment (find longest valid prefix)
|
|
newFragmentValid = JsonResponseHandler._extractValidJsonPrefix(newFragment)
|
|
|
|
# If we have valid JSON from both, try structure-based merge
|
|
if accumulatedValid and newFragmentValid:
|
|
try:
|
|
parsedAccumulated, parseErr1, _ = tryParseJson(closeJsonStructures(accumulatedValid))
|
|
parsedNewFragment, parseErr2, _ = tryParseJson(closeJsonStructures(newFragmentValid))
|
|
|
|
if parseErr1 is None and parseErr2 is None:
|
|
# Both are valid JSON - try structure merge
|
|
accNormalized = JsonResponseHandler._normalizeToElementsStructure(accumulatedValid, accumulated)
|
|
newNormalized = JsonResponseHandler._normalizeToElementsStructure(newFragmentValid, newFragment)
|
|
|
|
if accNormalized and newNormalized:
|
|
merged = JsonResponseHandler._mergeJsonStructuresGeneric(
|
|
accNormalized, newNormalized, accumulatedValid, newFragmentValid
|
|
)
|
|
if merged:
|
|
return json.dumps(merged, indent=2, ensure_ascii=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# Strategy 2: Find longest common suffix/prefix match (character-level overlap)
|
|
maxOverlapLen = min(len(accumulated), len(newFragment))
|
|
|
|
# Start from maximum possible overlap down to 1 character
|
|
# But limit to reasonable overlap (max 50% of shorter string)
|
|
maxReasonableOverlap = min(maxOverlapLen, min(len(accumulated), len(newFragment)) // 2)
|
|
|
|
for overlapLen in range(maxReasonableOverlap, 0, -1):
|
|
accumulatedSuffix = accumulated[-overlapLen:]
|
|
newFragmentPrefix = newFragment[:overlapLen]
|
|
|
|
if accumulatedSuffix == newFragmentPrefix:
|
|
# Found overlap - remove duplicate part
|
|
logger.debug(f"Found overlap of {overlapLen} characters, removing duplicate")
|
|
merged = accumulated + newFragment[overlapLen:]
|
|
# Ensure result is not empty
|
|
if merged and merged.strip():
|
|
return merged
|
|
|
|
# Strategy 3: No overlap found - try smart concatenation
|
|
# Check if we can append newFragment to accumulated without breaking JSON structure
|
|
merged = JsonResponseHandler._smartConcatenate(accumulated, newFragment)
|
|
if merged and merged.strip():
|
|
return merged
|
|
|
|
# Strategy 4: Last resort - simple concatenation (but ensure non-empty and valid JSON)
|
|
result = accumulated + newFragment
|
|
if not result or result.strip() in ['{}', '[]', '']:
|
|
# Return accumulated as fallback (at least we have that)
|
|
return accumulated if accumulated else "{}"
|
|
|
|
# CRITICAL: Try to repair and validate the merged result
|
|
try:
|
|
repaired = closeJsonStructures(result)
|
|
parsed, parseErr, _ = tryParseJson(repaired)
|
|
if parseErr is None:
|
|
# Valid JSON - return it
|
|
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
|
else:
|
|
# Still invalid - try to extract valid parts
|
|
validPrefix = JsonResponseHandler._extractValidJsonPrefix(result)
|
|
if validPrefix:
|
|
parsedPrefix, parseErr2, _ = tryParseJson(validPrefix)
|
|
if parseErr2 is None:
|
|
return json.dumps(parsedPrefix, indent=2, ensure_ascii=False)
|
|
except Exception:
|
|
pass
|
|
|
|
# If repair failed, return accumulated (at least we have that)
|
|
if accumulated:
|
|
try:
|
|
repairedAccumulated = closeJsonStructures(accumulated)
|
|
parsedAcc, parseErrAcc, _ = tryParseJson(repairedAccumulated)
|
|
if parseErrAcc is None:
|
|
return json.dumps(parsedAcc, indent=2, ensure_ascii=False)
|
|
except Exception:
|
|
pass
|
|
return accumulated
|
|
|
|
# Last resort: return empty structure
|
|
return "{}"
|
|
|
|
@staticmethod
|
|
def isJsonComplete(parsedJson: Dict[str, Any]) -> bool:
|
|
"""
|
|
GENERIC function to check if parsed JSON structure is complete.
|
|
|
|
Works for ANY JSON structure - no specific logic for content types.
|
|
|
|
Completeness checks (all generic):
|
|
- All arrays are properly closed
|
|
- All objects are properly closed
|
|
- No incomplete structures
|
|
- Recursive validation of nested structures
|
|
|
|
Args:
|
|
parsedJson: Parsed JSON object
|
|
|
|
Returns:
|
|
True if JSON is complete, False otherwise
|
|
"""
|
|
def _checkStructureComplete(obj: Any, depth: int = 0) -> bool:
|
|
"""Recursively check if structure is complete."""
|
|
if depth > 50: # Prevent infinite recursion
|
|
return True
|
|
|
|
if isinstance(obj, dict):
|
|
# Check all values recursively
|
|
for value in obj.values():
|
|
if not _checkStructureComplete(value, depth + 1):
|
|
return False
|
|
return True
|
|
elif isinstance(obj, list):
|
|
# Check all items recursively
|
|
for item in obj:
|
|
if not _checkStructureComplete(item, depth + 1):
|
|
return False
|
|
return True
|
|
else:
|
|
# Primitive value - always complete
|
|
return True
|
|
|
|
try:
|
|
return _checkStructureComplete(parsedJson)
|
|
except Exception as e:
|
|
logger.debug(f"Error checking JSON completeness: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def finalizeJson(parsedJson: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
GENERIC function to finalize complete JSON by adding missing closing elements and repairing corruption.
|
|
|
|
Works for ANY JSON structure - no specific logic for content types.
|
|
|
|
Steps (all generic):
|
|
1. Analyze structure for missing closing elements (recursively)
|
|
2. Add closing brackets/braces where needed
|
|
3. Repair any remaining corruption
|
|
4. Validate final structure
|
|
|
|
Args:
|
|
parsedJson: Parsed JSON object that needs finalization
|
|
|
|
Returns:
|
|
Finalized JSON object
|
|
"""
|
|
# For now, just return as-is since parsing succeeded
|
|
# If needed, can add logic to check for incomplete structures
|
|
# and add closing elements
|
|
return parsedJson
|
|
|
|
@staticmethod
|
|
def extractKpiValuesFromJson(
|
|
parsedJson: Dict[str, Any],
|
|
kpis: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract current KPI values from parsed JSON and update KPI objects.
|
|
|
|
Args:
|
|
parsedJson: Parsed JSON object
|
|
kpis: List of KPI objects (will be updated with currentValue)
|
|
|
|
Returns:
|
|
Updated list of KPI objects with currentValue set
|
|
"""
|
|
updatedKpis = []
|
|
|
|
for kpi in kpis:
|
|
kpiId = kpi.get("id")
|
|
jsonPath = kpi.get("jsonPath")
|
|
|
|
if not kpiId or not jsonPath:
|
|
continue
|
|
|
|
# Create copy of KPI object
|
|
updatedKpi = kpi.copy()
|
|
|
|
try:
|
|
# Extract value using JSON path
|
|
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
|
|
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
|
|
|
|
# Handle None (path doesn't exist - incomplete JSON)
|
|
if value is None:
|
|
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
|
logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
|
|
# Count items/rows/elements based on type
|
|
elif isinstance(value, list):
|
|
updatedKpi["currentValue"] = len(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
|
|
elif isinstance(value, (int, float)):
|
|
updatedKpi["currentValue"] = int(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: numeric value {int(value)}")
|
|
else:
|
|
updatedKpi["currentValue"] = 0
|
|
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: non-list/non-numeric value, set to 0")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting KPI {kpiId} from path {jsonPath}: {e}")
|
|
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
|
|
|
updatedKpis.append(updatedKpi)
|
|
|
|
return updatedKpis
|
|
|
|
@staticmethod
|
|
def extractKpiValuesFromIncompleteJson(
|
|
jsonString: str,
|
|
kpis: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract KPI values from incomplete JSON string.
|
|
Uses existing JSON completion function to close incomplete structures, then extracts KPIs.
|
|
|
|
Args:
|
|
jsonString: Incomplete JSON string
|
|
kpis: List of KPI objects
|
|
|
|
Returns:
|
|
Updated list of KPI objects with currentValue set
|
|
"""
|
|
updatedKpis = []
|
|
|
|
for kpi in kpis:
|
|
kpiId = kpi.get("id")
|
|
jsonPath = kpi.get("jsonPath")
|
|
|
|
if not kpiId or not jsonPath:
|
|
continue
|
|
|
|
updatedKpi = kpi.copy()
|
|
|
|
try:
|
|
# Use existing JSON completion function to close incomplete structures
|
|
from modules.shared.jsonUtils import extractJsonString, closeJsonStructures
|
|
|
|
# Extract JSON string and complete it with missing closing elements
|
|
extracted = extractJsonString(jsonString)
|
|
completed = closeJsonStructures(extracted)
|
|
|
|
# Parse completed JSON
|
|
parsed = json.loads(completed)
|
|
|
|
# Extract value using path
|
|
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
|
|
|
|
# Handle None (path doesn't exist - incomplete JSON)
|
|
if value is None:
|
|
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
|
logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
|
|
# Count items/rows/elements based on type
|
|
elif isinstance(value, list):
|
|
updatedKpi["currentValue"] = len(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
|
|
elif isinstance(value, (int, float)):
|
|
updatedKpi["currentValue"] = int(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from completed JSON: numeric value {int(value)}")
|
|
else:
|
|
updatedKpi["currentValue"] = 0
|
|
logger.debug(f"Extracted KPI {kpiId} from completed JSON: non-list/non-numeric value, set to 0")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting KPI {kpiId} from incomplete JSON: {e}")
|
|
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
|
|
|
updatedKpis.append(updatedKpi)
|
|
|
|
return updatedKpis
|
|
|
|
@staticmethod
|
|
def _extractValueByPath(obj: Any, path: str) -> Any:
|
|
"""
|
|
Extract value from object using dot-notation path with array indices.
|
|
|
|
Example: "sections[0].elements[0].items"
|
|
Returns None if path doesn't exist (for incomplete JSON handling).
|
|
"""
|
|
parts = path.split('.')
|
|
current = obj
|
|
|
|
for part in parts:
|
|
if '[' in part and ']' in part:
|
|
# Handle array access: "sections[0]"
|
|
key = part[:part.index('[')]
|
|
index = int(part[part.index('[') + 1:part.index(']')])
|
|
|
|
if key:
|
|
if isinstance(current, dict):
|
|
current = current.get(key)
|
|
if current is None:
|
|
return None # Key doesn't exist
|
|
else:
|
|
return None # Can't access key on non-dict
|
|
|
|
if isinstance(current, list):
|
|
if 0 <= index < len(current):
|
|
current = current[index]
|
|
else:
|
|
# Index out of range - return None for incomplete JSON
|
|
return None
|
|
else:
|
|
# Not a list, can't index
|
|
return None
|
|
else:
|
|
# Handle dict access
|
|
if isinstance(current, dict):
|
|
current = current.get(part)
|
|
if current is None:
|
|
return None # Key doesn't exist
|
|
else:
|
|
return None # Can't access key on non-dict
|
|
|
|
return current
|
|
|
|
@staticmethod
|
|
def validateKpiProgression(
|
|
accumulationState: JsonAccumulationState,
|
|
updatedKpis: List[Dict[str, Any]]
|
|
) -> Tuple[bool, str]:
|
|
"""
|
|
Validate KPI progression from parsed JSON.
|
|
|
|
Validation rules:
|
|
- Proceed if: At least ONE KPI increased
|
|
- Stop if: Any KPI went backwards → return (False, "KPI went backwards")
|
|
- Stop if: No KPIs progressed → return (False, "No progress")
|
|
- Finish if: All KPIs completed OR JSON is complete → return (True, "Complete")
|
|
|
|
Args:
|
|
accumulationState: Current accumulation state (contains kpis)
|
|
updatedKpis: Updated KPI objects with currentValue set
|
|
|
|
Returns:
|
|
Tuple of (shouldProceed, reason)
|
|
"""
|
|
if not accumulationState.kpis:
|
|
# No KPIs defined - always proceed
|
|
return True, "No KPIs defined"
|
|
|
|
# Build dict of last values for comparison
|
|
lastValues = {kpi.get("id"): kpi.get("currentValue", 0) for kpi in accumulationState.kpis}
|
|
logger.debug(f"KPI validation: lastValues = {lastValues}")
|
|
logger.debug(f"KPI validation: updatedKpis = {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
|
|
|
|
# Check if any KPI went backwards
|
|
for updatedKpi in updatedKpis:
|
|
kpiId = updatedKpi.get("id")
|
|
currentValue = updatedKpi.get("currentValue", 0)
|
|
|
|
if kpiId in lastValues:
|
|
lastValue = lastValues[kpiId]
|
|
if currentValue < lastValue:
|
|
logger.warning(f"KPI {kpiId} went BACKWARDS: {lastValue} → {currentValue}")
|
|
return False, f"KPI {kpiId} went backwards"
|
|
|
|
# Check if all KPIs are completed
|
|
allCompleted = True
|
|
for updatedKpi in updatedKpis:
|
|
targetValue = updatedKpi.get("targetValue", 0)
|
|
currentValue = updatedKpi.get("currentValue", 0)
|
|
|
|
if currentValue < targetValue:
|
|
allCompleted = False
|
|
break
|
|
|
|
if allCompleted:
|
|
logger.info("All KPIs completed")
|
|
return True, "All KPIs completed"
|
|
|
|
# Check if at least one KPI progressed
|
|
atLeastOneProgressed = False
|
|
for updatedKpi in updatedKpis:
|
|
kpiId = updatedKpi.get("id")
|
|
currentValue = updatedKpi.get("currentValue", 0)
|
|
|
|
if kpiId in lastValues:
|
|
lastValue = lastValues[kpiId]
|
|
if currentValue > lastValue:
|
|
atLeastOneProgressed = True
|
|
logger.info(f"KPI {kpiId} progressed: {lastValue} → {currentValue}")
|
|
break
|
|
else:
|
|
# First time seeing this KPI - if it has a value, it's progress
|
|
if currentValue > 0:
|
|
atLeastOneProgressed = True
|
|
logger.info(f"KPI {kpiId} initialized: {currentValue}")
|
|
break
|
|
|
|
if not atLeastOneProgressed:
|
|
logger.warning(f"No KPIs progressed. Last values: {lastValues}, Current values: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
|
|
return False, "No progress"
|
|
|
|
return True, "Progress detected"
|
|
|
|
@staticmethod
|
|
def accumulateAndParseJsonFragments(
|
|
accumulatedJsonString: str,
|
|
newFragmentString: str,
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> Tuple[str, List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
|
|
"""
|
|
Accumulate JSON fragments and parse when complete.
|
|
|
|
GENERIC function that handles:
|
|
1. Concatenating JSON strings with overlap detection
|
|
2. Parsing the accumulated string
|
|
3. Extracting sections (partial if incomplete, final if complete)
|
|
4. Determining completion status
|
|
|
|
Args:
|
|
accumulatedJsonString: Previously accumulated JSON string
|
|
newFragmentString: New fragment string from current iteration
|
|
allSections: Sections extracted so far (for prompt context)
|
|
iteration: Current iteration number
|
|
|
|
Returns:
|
|
Tuple of:
|
|
- accumulatedJsonString: Updated accumulated string
|
|
- sections: Extracted sections (partial if incomplete, final if complete)
|
|
- isComplete: True if JSON is complete and valid
|
|
- parsedResult: Parsed JSON object (if parsing succeeded)
|
|
"""
|
|
|
|
# Step 1: Clean encoding issues from accumulated string (check end of first delivered part)
|
|
cleanedAccumulated = JsonResponseHandler.cleanEncodingIssues(accumulatedJsonString)
|
|
|
|
# Step 2: Clean encoding issues from new fragment
|
|
cleanedFragment = JsonResponseHandler.cleanEncodingIssues(newFragmentString)
|
|
|
|
# Step 3: Concatenate with overlap handling
|
|
combinedString, hasOverlap = JsonResponseHandler.mergeJsonStringsWithOverlap(
|
|
cleanedAccumulated,
|
|
cleanedFragment
|
|
)
|
|
# Note: hasOverlap indicates if iterations should continue, but this function
|
|
# doesn't control iterations, so we just use the merged string
|
|
|
|
# Step 4: Try to parse
|
|
try:
|
|
extracted = extractJsonString(combinedString)
|
|
parsedResult = json.loads(extracted)
|
|
|
|
# Step 5: Parsing succeeded - check completeness
|
|
isComplete = JsonResponseHandler.isJsonComplete(parsedResult)
|
|
|
|
if isComplete:
|
|
# Step 6: Complete JSON - finalize
|
|
finalizedJson = JsonResponseHandler.finalizeJson(parsedResult)
|
|
sections = extractSectionsFromDocument(finalizedJson)
|
|
logger.info(f"Iteration {iteration}: JSON accumulation complete, extracted {len(sections)} sections")
|
|
return combinedString, sections, True, finalizedJson
|
|
else:
|
|
# Step 7: Incomplete but parseable - extract partial sections
|
|
sections = extractSectionsFromDocument(parsedResult)
|
|
logger.info(f"Iteration {iteration}: JSON accumulation incomplete but parseable, extracted {len(sections)} partial sections")
|
|
return combinedString, sections, False, parsedResult
|
|
|
|
except json.JSONDecodeError:
|
|
# Step 8: Still broken - repair and extract partial sections
|
|
repaired = repairBrokenJson(combinedString)
|
|
if repaired:
|
|
sections = extractSectionsFromDocument(repaired)
|
|
logger.info(f"Iteration {iteration}: JSON accumulation repaired, extracted {len(sections)} sections")
|
|
return combinedString, sections, False, repaired
|
|
else:
|
|
# Repair failed - continue with data BEFORE merging the problematic piece
|
|
# Return previous accumulated string (before adding new fragment)
|
|
# This ensures we don't lose previously accumulated data
|
|
logger.warning(f"Iteration {iteration}: Repair failed, continuing with previous accumulated data")
|
|
return accumulatedJsonString, [], False, None
|
|
|