1507 lines
66 KiB
Python
1507 lines
66 KiB
Python
"""
|
|
JSON Response Handling Module
|
|
|
|
Handles merging of JSON responses from multiple AI iterations, including:
|
|
- Section merging with intelligent overlap detection
|
|
- JSON fragment detection and merging
|
|
- Deep recursive structure merging
|
|
- Overlap detection for complex nested structures
|
|
- String accumulation for iterative JSON generation
|
|
"""
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
|
|
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
|
|
from modules.datamodels.datamodelAi import JsonAccumulationState
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JsonResponseHandler:
|
|
"""Handles JSON response merging and fragment detection for iterative AI generation."""
|
|
|
|
@staticmethod
|
|
def mergeSectionsIntelligently(
|
|
existingSections: List[Dict[str, Any]],
|
|
newSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Intelligently merge sections from multiple iterations.
|
|
|
|
This is a GENERIC merging strategy that handles broken JSON iterations.
|
|
The break can occur anywhere - in any section, at any depth.
|
|
|
|
Merging strategies (in order of priority):
|
|
1. Same Section ID: Merge sections with identical IDs
|
|
2. Same Content-Type + Position: If last section is incomplete and new section continues it
|
|
3. Same Order: Merge sections with same order value
|
|
4. Structural Analysis: Detect continuation based on content structure
|
|
|
|
Args:
|
|
existingSections: Sections accumulated from previous iterations
|
|
newSections: Sections extracted from current iteration
|
|
iteration: Current iteration number
|
|
|
|
Returns:
|
|
Merged list of sections
|
|
"""
|
|
if not newSections:
|
|
return existingSections
|
|
|
|
if not existingSections:
|
|
return newSections
|
|
|
|
mergedSections = existingSections.copy()
|
|
|
|
for newSection in newSections:
|
|
merged = False
|
|
|
|
# Strategy 1: Same Section ID - merge directly
|
|
newSectionId = newSection.get("id")
|
|
if newSectionId:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
if existingSection.get("id") == newSectionId:
|
|
# Merge sections with same ID
|
|
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
|
|
existingSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 2: Same Content-Type + Position (continuation detection)
|
|
# Check if last section is incomplete and new section continues it
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
if lastContentType == newContentType:
|
|
# Same content type - check if last section is incomplete
|
|
if JsonResponseHandler.isSectionIncomplete(lastSection):
|
|
# Last section is incomplete, merge with new section
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
|
|
continue
|
|
|
|
# Strategy 3: Same Order value
|
|
newOrder = newSection.get("order")
|
|
if newOrder is not None:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
existingOrder = existingSection.get("order")
|
|
if existingOrder is not None and existingOrder == newOrder:
|
|
# Merge sections with same order
|
|
mergedSections[i] = JsonResponseHandler.mergeSectionContent(
|
|
existingSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 4: Structural Analysis - detect continuation
|
|
# For code_block and table: if last section matches new section type, merge them
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
# Both are code blocks - merge them
|
|
if lastContentType == "code_block" and newContentType == "code_block":
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
|
|
continue
|
|
|
|
# Both are tables - merge them (common case for broken JSON iterations)
|
|
if lastContentType == "table" and newContentType == "table":
|
|
mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
|
|
lastSection, newSection, iteration
|
|
)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
|
|
continue
|
|
|
|
# No merge strategy matched - add as new section
|
|
if not merged:
|
|
mergedSections.append(newSection)
|
|
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
|
|
|
|
return mergedSections
|
|
|
|
@staticmethod
|
|
def isSectionIncomplete(section: Dict[str, Any]) -> bool:
|
|
"""
|
|
Check if a section is incomplete (broken at the end).
|
|
|
|
This detects incomplete sections based on content analysis:
|
|
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
|
|
- Text sections: ends mid-sentence, ends with incomplete structure
|
|
- Other types: check for incomplete elements
|
|
"""
|
|
contentType = section.get("content_type", "")
|
|
elements = section.get("elements", [])
|
|
|
|
if not elements:
|
|
return False
|
|
|
|
# Handle list of elements
|
|
if isinstance(elements, list) and len(elements) > 0:
|
|
lastElement = elements[-1]
|
|
else:
|
|
lastElement = elements
|
|
|
|
if not isinstance(lastElement, dict):
|
|
return False
|
|
|
|
# Check code_block for incomplete code
|
|
if contentType == "code_block":
|
|
code = lastElement.get("code", "")
|
|
if code:
|
|
# Check if code ends incompletely:
|
|
# - Ends with comma (incomplete CSV line)
|
|
# - Ends with number but no newline (incomplete line)
|
|
# - Ends mid-token (e.g., "23431,23" - incomplete number)
|
|
codeStripped = code.rstrip()
|
|
if codeStripped:
|
|
# Check for incomplete patterns
|
|
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
|
|
# Ends with comma or has comma but no final newline - likely incomplete
|
|
return True
|
|
# Check if last line is incomplete (doesn't end with newline and has partial content)
|
|
if not code.endswith('\n') and codeStripped:
|
|
# No final newline - might be incomplete
|
|
# More sophisticated: check if last number is complete
|
|
lastLine = codeStripped.split('\n')[-1]
|
|
if lastLine and ',' in lastLine:
|
|
# Has commas but might be incomplete
|
|
parts = lastLine.split(',')
|
|
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
|
|
return True
|
|
|
|
# Check table for incomplete rows
|
|
if contentType == "table":
|
|
rows = lastElement.get("rows", [])
|
|
if rows:
|
|
# Check if last row is incomplete (ends with incomplete data)
|
|
lastRow = rows[-1] if isinstance(rows, list) else []
|
|
if isinstance(lastRow, list) and lastRow:
|
|
# CRITICAL: Check if last row doesn't have expected number of columns (if headers exist)
|
|
# This is the PRIMARY indicator of incomplete table rows
|
|
headers = lastElement.get("headers", [])
|
|
if headers and isinstance(headers, list):
|
|
expectedCols = len(headers)
|
|
if len(lastRow) < expectedCols:
|
|
logger.debug(f"Table section incomplete: last row has {len(lastRow)} columns, expected {expectedCols}")
|
|
return True
|
|
# Also check if last row ends with incomplete data (e.g., incomplete string)
|
|
lastCell = lastRow[-1] if lastRow else ""
|
|
if isinstance(lastCell, str):
|
|
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
|
|
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
|
|
logger.debug(f"Table section incomplete: last cell appears incomplete: '{lastCell}'")
|
|
return True
|
|
# Additional check: if last row has fewer cells than previous rows, it's likely incomplete
|
|
if len(rows) > 1:
|
|
prevRow = rows[-2] if isinstance(rows, list) and len(rows) > 1 else []
|
|
if isinstance(prevRow, list) and len(prevRow) > len(lastRow):
|
|
logger.debug(f"Table section incomplete: last row has {len(lastRow)} cells, previous row has {len(prevRow)}")
|
|
return True
|
|
|
|
# Check paragraph/text for incomplete sentences
|
|
if contentType in ["paragraph", "heading"]:
|
|
text = lastElement.get("text", "")
|
|
if text:
|
|
# Simple heuristic: if doesn't end with sentence-ending punctuation
|
|
textStripped = text.rstrip()
|
|
if textStripped and not textStripped[-1] in '.!?':
|
|
# Might be incomplete, but this is less reliable
|
|
# Only mark as incomplete if very short (likely cut off)
|
|
if len(textStripped) < 20:
|
|
return True
|
|
|
|
# Check lists for incomplete items
|
|
if contentType in ["bullet_list", "numbered_list"]:
|
|
items = lastElement.get("items", [])
|
|
if items and isinstance(items, list):
|
|
# Check if last item is incomplete (very short or ends with incomplete string)
|
|
lastItem = items[-1] if items else None
|
|
if isinstance(lastItem, str) and len(lastItem) < 3:
|
|
return True
|
|
|
|
# Check image for incomplete base64 data
|
|
if contentType == "image":
|
|
imageData = lastElement.get("base64Data", "")
|
|
if imageData:
|
|
# Base64 strings should end with padding ('=' or '==')
|
|
# If it doesn't, it might be incomplete
|
|
stripped = imageData.rstrip()
|
|
if stripped and not stripped.endswith(('=', '==')):
|
|
# Check if it's a valid base64 character sequence that was cut off
|
|
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
|
|
return True
|
|
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
|
|
if len(stripped) % 4 != 0:
|
|
return True
|
|
|
|
# GENERIC CHECK: Recursively analyze structure for incompleteness
|
|
# This works for ANY structure: arrays, objects, nested, primitives
|
|
return JsonResponseHandler._isStructureIncomplete(lastElement)
|
|
|
|
@staticmethod
|
|
def _isStructureIncomplete(structure: Any, max_depth: int = 10) -> bool:
|
|
"""
|
|
GENERIC recursive check for incomplete structures.
|
|
|
|
Detects incompleteness by analyzing patterns:
|
|
- Arrays: Last item shorter than previous items, incomplete patterns
|
|
- Objects: Last object has fewer keys than pattern, incomplete values
|
|
- Strings: Very short, ends abruptly, incomplete patterns
|
|
- Nested: Recursively checks nested structures
|
|
|
|
Works for ANY JSON structure of any depth/complexity.
|
|
"""
|
|
if max_depth <= 0:
|
|
return False
|
|
|
|
# Arrays/Lists - check for incomplete patterns
|
|
if isinstance(structure, list):
|
|
if len(structure) == 0:
|
|
return False
|
|
|
|
# Check if last item is incomplete compared to previous items
|
|
last_item = structure[-1]
|
|
|
|
# If we have previous items, compare structure
|
|
if len(structure) > 1:
|
|
prev_item = structure[-2]
|
|
|
|
# If last item is a list and previous is a list, check length
|
|
if isinstance(last_item, list) and isinstance(prev_item, list):
|
|
if len(last_item) < len(prev_item):
|
|
return True # Last row/item has fewer elements - likely incomplete
|
|
|
|
# If last item is a dict and previous is a dict, check keys
|
|
if isinstance(last_item, dict) and isinstance(prev_item, dict):
|
|
if len(last_item) < len(prev_item):
|
|
return True # Last object has fewer keys - likely incomplete
|
|
|
|
# Recursively check last item for incompleteness
|
|
if JsonResponseHandler._isStructureIncomplete(last_item, max_depth - 1):
|
|
return True
|
|
|
|
# Objects/Dicts - check for incomplete values
|
|
elif isinstance(structure, dict):
|
|
for key, value in structure.items():
|
|
# Recursively check each value
|
|
if JsonResponseHandler._isStructureIncomplete(value, max_depth - 1):
|
|
return True
|
|
|
|
# Check for incomplete strings
|
|
if isinstance(value, str):
|
|
# Very short strings might be incomplete
|
|
if len(value) > 0 and len(value) < 3:
|
|
return True
|
|
# Strings ending with incomplete patterns (comma, quote, etc.)
|
|
stripped = value.rstrip()
|
|
if stripped and stripped.endswith((',', '"', '\\')):
|
|
return True
|
|
|
|
# Strings - check for incomplete patterns
|
|
elif isinstance(structure, str):
|
|
# Very short strings might be incomplete
|
|
if len(structure) > 0 and len(structure) < 3:
|
|
return True
|
|
# Strings ending with incomplete patterns
|
|
stripped = structure.rstrip()
|
|
if stripped and stripped.endswith((',', '"', '\\')):
|
|
return True
|
|
|
|
return False
|
|
|
|
@staticmethod
|
|
def mergeSectionContent(
|
|
existingSection: Dict[str, Any],
|
|
newSection: Dict[str, Any],
|
|
iteration: int
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Merge content from two sections.
|
|
|
|
Handles different content types:
|
|
- code_block: Append code, handle overlaps, merge incomplete lines
|
|
- paragraph/heading: Append text
|
|
- table: Merge rows
|
|
- list: Merge items
|
|
- Other: Merge elements
|
|
"""
|
|
contentType = existingSection.get("content_type", "")
|
|
existingElements = existingSection.get("elements", [])
|
|
newElements = newSection.get("elements", [])
|
|
|
|
if not newElements:
|
|
return existingSection
|
|
|
|
# Handle list of elements
|
|
if isinstance(existingElements, list):
|
|
existingElem = existingElements[-1] if existingElements else {}
|
|
else:
|
|
existingElem = existingElements
|
|
|
|
if isinstance(newElements, list):
|
|
newElem = newElements[0] if newElements else {}
|
|
else:
|
|
newElem = newElements
|
|
|
|
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
|
|
return existingSection
|
|
|
|
# Merge based on content type
|
|
if contentType == "code_block":
|
|
existingCode = existingElem.get("code", "")
|
|
newCode = newElem.get("code", "")
|
|
|
|
if existingCode and newCode:
|
|
mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration)
|
|
existingElem["code"] = mergedCode
|
|
# Preserve language from existing or new
|
|
if "language" not in existingElem and "language" in newElem:
|
|
existingElem["language"] = newElem["language"]
|
|
|
|
elif contentType in ["paragraph", "heading"]:
|
|
existingText = existingElem.get("text", "")
|
|
newText = newElem.get("text", "")
|
|
|
|
if existingText and newText:
|
|
# Append text with space if needed
|
|
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
|
|
mergedText = existingText.rstrip() + " " + newText.lstrip()
|
|
else:
|
|
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
|
|
existingElem["text"] = mergedText
|
|
|
|
elif contentType == "table":
|
|
# Merge table rows with sophisticated overlap detection
|
|
existingRows = existingElem.get("rows", [])
|
|
newRows = newElem.get("rows", [])
|
|
if existingRows and newRows:
|
|
# Use sophisticated overlap detection that handles multiple overlapping rows
|
|
mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration)
|
|
existingElem["rows"] = mergedRows
|
|
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}")
|
|
elif newRows:
|
|
# If existing has no rows but new does, use new rows
|
|
existingElem["rows"] = newRows
|
|
# Preserve headers from existing (or use new if existing has none)
|
|
if not existingElem.get("headers") and newElem.get("headers"):
|
|
existingElem["headers"] = newElem["headers"]
|
|
# Preserve caption from existing (or use new if existing has none)
|
|
if not existingElem.get("caption") and newElem.get("caption"):
|
|
existingElem["caption"] = newElem.get("caption")
|
|
|
|
elif contentType in ["bullet_list", "numbered_list"]:
|
|
# Merge list items with sophisticated overlap detection
|
|
existingItems = existingElem.get("items", [])
|
|
newItems = newElem.get("items", [])
|
|
if existingItems and newItems:
|
|
mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration)
|
|
existingElem["items"] = mergedItems
|
|
elif newItems:
|
|
existingElem["items"] = newItems
|
|
|
|
elif contentType == "image":
|
|
# Images are typically complete - if new image is provided, replace existing
|
|
# But check if existing image data is incomplete (e.g., base64 string cut off)
|
|
existingImageData = existingElem.get("base64Data", "")
|
|
newImageData = newElem.get("base64Data", "")
|
|
if existingImageData and newImageData:
|
|
# If existing image data doesn't end with valid base64 padding, it might be incomplete
|
|
# Base64 padding is '=' or '==' at the end
|
|
if not existingImageData.rstrip().endswith(('=', '==')):
|
|
# Existing image might be incomplete - merge by appending new data
|
|
# This handles cases where base64 string was cut off
|
|
existingElem["base64Data"] = existingImageData + newImageData
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
|
|
else:
|
|
# Existing image is complete - replace with new (or keep existing if new is empty)
|
|
if newImageData:
|
|
existingElem["base64Data"] = newImageData
|
|
elif newImageData:
|
|
existingElem["base64Data"] = newImageData
|
|
# Preserve other image metadata
|
|
if not existingElem.get("altText") and newElem.get("altText"):
|
|
existingElem["altText"] = newElem["altText"]
|
|
if not existingElem.get("caption") and newElem.get("caption"):
|
|
existingElem["caption"] = newElem["caption"]
|
|
|
|
else:
|
|
# GENERIC FALLBACK: Use deep recursive merging for complex nested structures
|
|
# This handles any content type with arbitrary depth and complexity
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
existingElem,
|
|
newElem,
|
|
iteration,
|
|
f"section.{contentType}"
|
|
)
|
|
existingElem = merged_element
|
|
|
|
# Update section with merged content
|
|
mergedSection = existingSection.copy()
|
|
if isinstance(existingElements, list):
|
|
# Update the last element in the list with merged content
|
|
if existingElements:
|
|
existingElements[-1] = existingElem
|
|
mergedSection["elements"] = existingElements
|
|
else:
|
|
mergedSection["elements"] = existingElem
|
|
|
|
# Preserve metadata from new section if missing in existing
|
|
if "order" not in mergedSection and "order" in newSection:
|
|
mergedSection["order"] = newSection["order"]
|
|
|
|
return mergedSection
|
|
|
|
@staticmethod
|
|
def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str:
|
|
"""
|
|
Merge two code blocks intelligently, handling overlaps and incomplete lines.
|
|
"""
|
|
if not existingCode:
|
|
return newCode
|
|
if not newCode:
|
|
return existingCode
|
|
|
|
existingLines = existingCode.rstrip().split('\n')
|
|
newLines = newCode.strip().split('\n')
|
|
|
|
if not existingLines or not newLines:
|
|
return existingCode + "\n" + newCode
|
|
|
|
lastExistingLine = existingLines[-1].strip()
|
|
firstNewLine = newLines[0].strip()
|
|
|
|
# Strategy 1: Exact overlap - remove duplicate line
|
|
if lastExistingLine == firstNewLine:
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
|
|
|
|
# Strategy 2: Incomplete line merge
|
|
# If last existing line ends with comma or is incomplete, merge with first new line
|
|
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
|
|
# Last line is incomplete - merge with first new line
|
|
# Remove trailing comma from existing line
|
|
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
|
|
existingLines[-1] = mergedLine
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
|
|
|
|
# Strategy 3: Partial overlap detection
|
|
# Check if first new line starts with the end of last existing line
|
|
elif ',' in lastExistingLine and ',' in firstNewLine:
|
|
lastExistingParts = lastExistingLine.split(',')
|
|
firstNewParts = firstNewLine.split(',')
|
|
|
|
# Check for overlap: if last part of existing matches first part of new
|
|
if lastExistingParts and firstNewParts:
|
|
lastExistingPart = lastExistingParts[-1].strip()
|
|
firstNewPart = firstNewParts[0].strip()
|
|
|
|
# If they match, there's overlap
|
|
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
|
|
# Remove overlapping part from new line
|
|
newLines[0] = ','.join(firstNewParts[1:])
|
|
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
|
|
|
|
# Reconstruct merged code
|
|
mergedCode = '\n'.join(existingLines)
|
|
if newLines:
|
|
if mergedCode and not mergedCode.endswith('\n'):
|
|
mergedCode += '\n'
|
|
mergedCode += '\n'.join(newLines)
|
|
|
|
return mergedCode
|
|
|
|
@staticmethod
|
|
def detectAndParseJsonFragment(
|
|
result: str,
|
|
allSections: List[Dict[str, Any]]
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
GENERIC fragment detection for ANY JSON structure.
|
|
|
|
Detects if response is a JSON fragment (continuation content) rather than full document structure.
|
|
Works for ANY JSON type: arrays, objects, primitives, nested structures of any depth/complexity.
|
|
|
|
Fragment = Any JSON that:
|
|
1. Does NOT have "documents" or "sections" keys (not full document structure)
|
|
2. Can be ANY structure: array, object, nested, primitive, etc.
|
|
3. Is continuation content that needs to be merged into existing sections
|
|
|
|
Examples (all handled generically):
|
|
- Array: [["37643", ...], ...] (table rows, list items, any array)
|
|
- Object: {"rows": [...], "headers": [...]} (partial element)
|
|
- Primitive: "continuation text" (rare but possible)
|
|
- Nested: {"data": {"items": [...]}} (any nested structure)
|
|
|
|
Returns fragment info dict with:
|
|
- fragment_data: The parsed fragment content (ANY type)
|
|
- target_section_id: ID of last incomplete section (generic, not type-specific)
|
|
|
|
CRITICAL: Fully generic - no specific logic for tables, paragraphs, etc.
|
|
"""
|
|
try:
|
|
extracted = extractJsonString(result)
|
|
parsed = json.loads(extracted)
|
|
|
|
# GENERIC fragment detection: Check if it's NOT a full document structure
|
|
is_full_document = False
|
|
if isinstance(parsed, dict):
|
|
# Full document structure has "documents" or "sections" keys
|
|
if "documents" in parsed or "sections" in parsed:
|
|
is_full_document = True
|
|
|
|
# If it's a full document structure, it's not a fragment
|
|
if is_full_document:
|
|
return None
|
|
|
|
# Otherwise, it's a fragment (can be ANY structure: array, object, primitive, nested)
|
|
# Find target: last incomplete section (generic, regardless of content type)
|
|
target_section_id = JsonResponseHandler.findLastIncompleteSectionId(allSections)
|
|
|
|
logger.info(f"Detected GENERIC JSON fragment (type: {type(parsed).__name__}), target: {target_section_id}")
|
|
|
|
return {
|
|
"fragment_data": parsed, # Can be ANY JSON structure
|
|
"target_section_id": target_section_id
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error detecting JSON fragment: {e}")
|
|
logger.debug(f"Fragment detection failed for result: {result[:500]}...")
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def findLastIncompleteSectionId(
|
|
allSections: List[Dict[str, Any]]
|
|
) -> Optional[str]:
|
|
"""
|
|
GENERIC: Find the last incomplete section (regardless of content type).
|
|
|
|
This is fully generic - works for ANY content type, ANY structure.
|
|
Returns the ID of the last section that is incomplete, or None if all are complete.
|
|
"""
|
|
# Find the last incomplete section (generic, not type-specific)
|
|
for section in reversed(allSections):
|
|
if JsonResponseHandler.isSectionIncomplete(section):
|
|
return section.get("id")
|
|
# If no incomplete section found, return last section as fallback
|
|
if allSections:
|
|
return allSections[-1].get("id")
|
|
return None
|
|
|
|
@staticmethod
|
|
def mergeFragmentIntoSection(
|
|
fragment: Dict[str, Any],
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> Optional[List[Dict[str, Any]]]:
|
|
"""
|
|
GENERIC fragment merging for ANY JSON structure.
|
|
|
|
Merges a JSON fragment (ANY structure: array, object, nested, primitive) into the last incomplete section.
|
|
Uses ONLY deep recursive merging - no specific logic for content types.
|
|
|
|
Handles ALL cases:
|
|
1. Fragments with overlap (detected and merged intelligently)
|
|
2. Fragments without overlap (continuation after cut-off, appended)
|
|
3. Any JSON structure (arrays, objects, nested, primitives)
|
|
4. Accumulative merging (uses merged data from past iterations)
|
|
|
|
CRITICAL: Fully generic - works for ANY JSON structure, ANY content type.
|
|
NO FALLBACKS: Returns None if merge fails (no target section found).
|
|
"""
|
|
fragment_data = fragment.get("fragment_data")
|
|
target_section_id = fragment.get("target_section_id")
|
|
|
|
if fragment_data is None:
|
|
logger.error(f"Iteration {iteration}: ❌ Fragment has no fragment_data - merge FAILED")
|
|
return None
|
|
|
|
# Find the target section (last incomplete section, generic)
|
|
target_section = None
|
|
target_index = -1
|
|
|
|
if target_section_id:
|
|
for i, section in enumerate(allSections):
|
|
if section.get("id") == target_section_id:
|
|
target_section = section
|
|
target_index = i
|
|
break
|
|
|
|
# NO FALLBACKS: If target not found by ID, try to find incomplete section
|
|
if not target_section:
|
|
for i, section in enumerate(reversed(allSections)):
|
|
if JsonResponseHandler.isSectionIncomplete(section):
|
|
target_section = section
|
|
target_index = len(allSections) - 1 - i
|
|
break
|
|
|
|
# NO FALLBACKS: If no target found, merge FAILS
|
|
if not target_section:
|
|
logger.error(f"Iteration {iteration}: ❌ MERGE FAILED - No target section found for fragment!")
|
|
logger.error(f"Iteration {iteration}: Available sections: {[s.get('id') + ' (' + s.get('content_type', 'unknown') + ')' for s in allSections]}")
|
|
return None
|
|
|
|
# Get the last element from target section (where fragment will be merged)
|
|
merged_section = target_section.copy()
|
|
elements = merged_section.get("elements", [])
|
|
|
|
if not isinstance(elements, list):
|
|
elements = [elements] if elements else []
|
|
|
|
if not elements:
|
|
elements = [{}]
|
|
|
|
last_element = elements[-1] if elements else {}
|
|
if not isinstance(last_element, dict):
|
|
last_element = {}
|
|
elements.append(last_element)
|
|
|
|
# CRITICAL: Use ONLY deep recursive merging for ALL fragment types
|
|
# This handles ANY structure: arrays, objects, nested, primitives
|
|
# Handles overlap detection generically (deep recursive comparison)
|
|
# Handles continuation after cut-off (no overlap case)
|
|
merged_element = JsonResponseHandler.mergeDeepStructures(
|
|
last_element,
|
|
fragment_data,
|
|
iteration,
|
|
f"section.{target_section_id}.fragment"
|
|
)
|
|
|
|
# Update elements with merged content
|
|
elements[-1] = merged_element
|
|
merged_section["elements"] = elements
|
|
|
|
# Update allSections (this ensures accumulative merging - merged data is used for next iteration)
|
|
merged_sections = allSections.copy()
|
|
merged_sections[target_index] = merged_section
|
|
|
|
logger.info(f"Iteration {iteration}: ✅ Merged GENERIC fragment (type: {type(fragment_data).__name__}) into section '{target_section_id}'")
|
|
|
|
# Log merged JSON for debugging
|
|
try:
|
|
from modules.shared.debugLogger import writeDebugFile
|
|
merged_json_str = json.dumps(merged_sections, indent=2, ensure_ascii=False)
|
|
writeDebugFile(merged_json_str, f"merged_json_iteration_{iteration}.json")
|
|
except Exception as e:
|
|
logger.debug(f"Iteration {iteration}: Failed to write merged JSON debug file: {e}")
|
|
|
|
return merged_sections
|
|
|
|
@staticmethod
|
|
def completeIncompleteStructures(allSections: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Complete any incomplete structures in sections by ensuring proper JSON structure.
|
|
|
|
This ensures JSON is properly closed even if merge failed or iterations stopped early.
|
|
Works generically for ANY structure type - recursively processes all nested structures.
|
|
|
|
Returns sections with completed structures.
|
|
"""
|
|
completed_sections = []
|
|
for section in allSections:
|
|
completed_section = JsonResponseHandler._completeStructure(section)
|
|
completed_sections.append(completed_section)
|
|
return completed_sections
|
|
|
|
@staticmethod
|
|
def _completeStructure(structure: Any) -> Any:
|
|
"""
|
|
Recursively complete incomplete structures by ensuring arrays/objects are properly structured.
|
|
Works generically for ANY JSON structure - no specific logic for content types.
|
|
"""
|
|
if isinstance(structure, dict):
|
|
completed = {}
|
|
for key, value in structure.items():
|
|
completed[key] = JsonResponseHandler._completeStructure(value)
|
|
return completed
|
|
elif isinstance(structure, list):
|
|
completed = []
|
|
for item in structure:
|
|
completed.append(JsonResponseHandler._completeStructure(item))
|
|
return completed
|
|
else:
|
|
# Primitive value - return as is
|
|
return structure
|
|
|
|
@staticmethod
|
|
def getContentTypeForFragment(fragment_type: str) -> str:
|
|
"""Map fragment type to content type."""
|
|
mapping = {
|
|
"table_rows": "table",
|
|
"table_element": "table",
|
|
"code_lines": "code_block",
|
|
"code_element": "code_block",
|
|
"list_items": "bullet_list"
|
|
}
|
|
return mapping.get(fragment_type, "paragraph")
|
|
|
|
@staticmethod
|
|
def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool:
|
|
"""
|
|
Deep recursive comparison of two JSON-serializable objects.
|
|
Handles nested structures of any depth and complexity.
|
|
|
|
Args:
|
|
obj1: First object to compare
|
|
obj2: Second object to compare
|
|
max_depth: Maximum recursion depth to prevent infinite loops
|
|
|
|
Returns:
|
|
True if objects are deeply equal, False otherwise
|
|
"""
|
|
if max_depth <= 0:
|
|
return False
|
|
|
|
# Type check
|
|
if type(obj1) != type(obj2):
|
|
return False
|
|
|
|
# Primitive types
|
|
if isinstance(obj1, (str, int, float, bool, type(None))):
|
|
return obj1 == obj2
|
|
|
|
# Lists/arrays - compare element by element
|
|
if isinstance(obj1, list):
|
|
if len(obj1) != len(obj2):
|
|
return False
|
|
return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1)
|
|
for item1, item2 in zip(obj1, obj2))
|
|
|
|
# Dicts/objects - compare key by key
|
|
if isinstance(obj1, dict):
|
|
if set(obj1.keys()) != set(obj2.keys()):
|
|
return False
|
|
return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1)
|
|
for key in obj1.keys())
|
|
|
|
# Fallback for other types
|
|
return obj1 == obj2
|
|
|
|
@staticmethod
|
|
def findLongestCommonSuffix(
|
|
existing_list: List[Any],
|
|
new_list: List[Any],
|
|
min_overlap: int = 1
|
|
) -> int:
|
|
"""
|
|
Find the longest common suffix of existing_list that matches a prefix of new_list.
|
|
|
|
This handles cases where multiple elements overlap:
|
|
- existing: [A, B, C, D]
|
|
- new: [C, D, E, F]
|
|
- overlap: [C, D] (length 2)
|
|
|
|
Returns the length of the overlap (0 if no overlap found).
|
|
"""
|
|
if not existing_list or not new_list:
|
|
return 0
|
|
|
|
max_overlap = min(len(existing_list), len(new_list))
|
|
|
|
# Try all possible overlap lengths (from longest to shortest)
|
|
for overlap_len in range(max_overlap, min_overlap - 1, -1):
|
|
existing_suffix = existing_list[-overlap_len:]
|
|
new_prefix = new_list[:overlap_len]
|
|
|
|
# Deep compare suffix and prefix
|
|
if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i])
|
|
for i in range(overlap_len)):
|
|
return overlap_len
|
|
|
|
return 0
|
|
|
|
@staticmethod
|
|
def findPartialOverlap(
|
|
existing_item: Any,
|
|
new_item: Any
|
|
) -> Tuple[bool, Optional[Any]]:
|
|
"""
|
|
Detect if new_item completes an incomplete existing_item.
|
|
|
|
Handles cases like:
|
|
- existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"]
|
|
- new: ["37643", "37649", ...]
|
|
|
|
Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None).
|
|
"""
|
|
# Check if both are lists
|
|
if isinstance(existing_item, list) and isinstance(new_item, list):
|
|
if not existing_item or not new_item:
|
|
return False, None
|
|
|
|
# Check if last element of existing is incomplete and matches first of new
|
|
last_existing = existing_item[-1]
|
|
first_new = new_item[0]
|
|
|
|
# If last existing is a string and first new is a string
|
|
if isinstance(last_existing, str) and isinstance(first_new, str):
|
|
# Check if last existing is incomplete (very short, ends with number, etc.)
|
|
if len(last_existing) < 10 and first_new.startswith(last_existing):
|
|
# Partial overlap - merge them
|
|
merged_last = last_existing + first_new[len(last_existing):]
|
|
merged_item = existing_item[:-1] + [merged_last] + new_item[1:]
|
|
return True, merged_item
|
|
|
|
# Check if last existing is incomplete list and first new completes it
|
|
if isinstance(last_existing, list) and isinstance(first_new, list):
|
|
if len(last_existing) < len(first_new):
|
|
# Check if last existing is prefix of first new
|
|
if first_new[:len(last_existing)] == last_existing:
|
|
# Merge: replace incomplete last with complete first
|
|
merged_item = existing_item[:-1] + [first_new] + new_item[1:]
|
|
return True, merged_item
|
|
|
|
# Check if existing is incomplete string and new completes it
|
|
if isinstance(existing_item, str) and isinstance(new_item, str):
|
|
if len(existing_item) < 50 and new_item.startswith(existing_item):
|
|
# Partial overlap
|
|
merged = existing_item + new_item[len(existing_item):]
|
|
return True, merged
|
|
|
|
return False, None
|
|
|
|
@staticmethod
|
|
def mergeRowsWithOverlap(
|
|
existing_rows: List[List[str]],
|
|
new_rows: List[List[str]],
|
|
iteration: int
|
|
) -> List[List[str]]:
|
|
"""
|
|
Merge table rows with sophisticated overlap detection.
|
|
Handles multiple overlapping rows and partial overlaps.
|
|
"""
|
|
if not new_rows:
|
|
return existing_rows
|
|
if not existing_rows:
|
|
return new_rows
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates")
|
|
return existing_rows + new_rows[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last row
|
|
if len(existing_rows) > 0 and len(new_rows) > 0:
|
|
last_existing = existing_rows[-1]
|
|
first_new = new_rows[0]
|
|
|
|
is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new)
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging")
|
|
return existing_rows[:-1] + [merged_row] + new_rows[1:]
|
|
|
|
# Strategy 3: Simple first/last comparison (fallback)
|
|
if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list):
|
|
if list(existing_rows[-1]) == list(new_rows[0]):
|
|
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
|
|
return existing_rows + new_rows[1:]
|
|
|
|
# No overlap detected - append all new rows
|
|
return existing_rows + new_rows
|
|
|
|
@staticmethod
|
|
def mergeItemsWithOverlap(
|
|
existing_items: List[str],
|
|
new_items: List[str],
|
|
iteration: int
|
|
) -> List[str]:
|
|
"""
|
|
Merge list items with sophisticated overlap detection.
|
|
Handles multiple overlapping items and partial overlaps.
|
|
"""
|
|
if not new_items:
|
|
return existing_items
|
|
if not existing_items:
|
|
return new_items
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates")
|
|
return existing_items + new_items[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last item
|
|
if len(existing_items) > 0 and len(new_items) > 0:
|
|
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0])
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging")
|
|
return existing_items[:-1] + [merged_item] + new_items[1:]
|
|
|
|
# Strategy 3: Simple first/last comparison (fallback)
|
|
if existing_items[-1] == new_items[0]:
|
|
logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)")
|
|
return existing_items + new_items[1:]
|
|
|
|
# No overlap detected - append all new items
|
|
return existing_items + new_items
|
|
|
|
@staticmethod
|
|
def mergeDeepStructures(
|
|
existing: Any,
|
|
new: Any,
|
|
iteration: int,
|
|
path: str = "root"
|
|
) -> Any:
|
|
"""
|
|
FULLY GENERIC recursive merge for ANY JSON structure of arbitrary depth/complexity.
|
|
|
|
Handles ALL cases generically:
|
|
1. Arrays/Lists: Overlap detection (suffix/prefix), partial overlap, no overlap (continuation)
|
|
2. Objects/Dicts: Key-by-key merge with overlap detection for nested structures
|
|
3. Primitives: Equality check, replacement if different
|
|
4. Nested structures: Recursively handles any depth/complexity
|
|
|
|
Overlap detection strategies (all generic):
|
|
- Array overlap: Finds longest common suffix/prefix, handles partial overlaps
|
|
- Object overlap: Detected recursively through key matching and deep comparison
|
|
- No overlap: Appends/merges continuation content after cut-off point
|
|
|
|
CRITICAL: Fully generic - no specific logic for content types.
|
|
Works for ANY JSON structure: arrays, objects, nested, primitives, any combination.
|
|
"""
|
|
# Type check
|
|
if type(existing) != type(new):
|
|
# Types don't match - return new (replacement)
|
|
logger.debug(f"Iteration {iteration}: Types don't match at {path} ({type(existing).__name__} vs {type(new).__name__}), replacing")
|
|
return new
|
|
|
|
# Lists/arrays - GENERIC merge with overlap detection
|
|
if isinstance(existing, list) and isinstance(new, list):
|
|
if not new:
|
|
return existing
|
|
if not existing:
|
|
return new
|
|
|
|
# Strategy 1: Find longest common suffix/prefix overlap (handles multiple overlapping elements)
|
|
overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1)
|
|
if overlap_len > 0:
|
|
logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates")
|
|
return existing + new[overlap_len:]
|
|
|
|
# Strategy 2: Check for partial overlap in last element (incomplete element completion)
|
|
if len(existing) > 0 and len(new) > 0:
|
|
is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0])
|
|
if is_partial:
|
|
logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging incomplete element")
|
|
return existing[:-1] + [merged_item] + new[1:]
|
|
|
|
# Strategy 3: No overlap detected - continuation after cut-off point
|
|
# This handles the case where new data starts exactly after the cut-off
|
|
logger.debug(f"Iteration {iteration}: No overlap at {path}, appending continuation content ({len(new)} items)")
|
|
return existing + new
|
|
|
|
# Dicts/objects - GENERIC merge with recursive overlap detection
|
|
if isinstance(existing, dict) and isinstance(new, dict):
|
|
merged = existing.copy()
|
|
|
|
# Check for object-level overlap: if new object is subset/superset of existing
|
|
# This handles cases where same object structure appears in both
|
|
existing_keys = set(existing.keys())
|
|
new_keys = set(new.keys())
|
|
|
|
# If new is subset of existing and values match, it's overlap (skip)
|
|
if new_keys.issubset(existing_keys):
|
|
all_match = True
|
|
for key in new_keys:
|
|
if not JsonResponseHandler.deepCompare(existing[key], new[key]):
|
|
all_match = False
|
|
break
|
|
if all_match:
|
|
logger.debug(f"Iteration {iteration}: Object at {path} is subset overlap, skipping")
|
|
return existing
|
|
|
|
# Merge key-by-key with recursive overlap detection
|
|
for key, new_value in new.items():
|
|
if key in merged:
|
|
# Key exists - merge recursively (handles nested overlap detection)
|
|
merged[key] = JsonResponseHandler.mergeDeepStructures(
|
|
merged[key],
|
|
new_value,
|
|
iteration,
|
|
f"{path}.{key}"
|
|
)
|
|
else:
|
|
# New key - add it (continuation content)
|
|
merged[key] = new_value
|
|
logger.debug(f"Iteration {iteration}: Added new key '{key}' at {path} (continuation)")
|
|
|
|
return merged
|
|
|
|
# Primitives - equality check
|
|
if existing == new:
|
|
return existing
|
|
# Different primitive values - return new (continuation/replacement)
|
|
logger.debug(f"Iteration {iteration}: Primitive at {path} differs, using new value")
|
|
return new
|
|
|
|
@staticmethod
|
|
def cleanEncodingIssues(jsonString: str) -> str:
|
|
"""
|
|
GENERIC function to remove problematic encoding parts from JSON string.
|
|
|
|
Works for ANY JSON structure - removes problematic characters/bytes.
|
|
|
|
Args:
|
|
jsonString: JSON string that may have encoding issues
|
|
|
|
Returns:
|
|
Cleaned JSON string
|
|
"""
|
|
try:
|
|
# Try to decode/encode to detect issues
|
|
jsonString.encode('utf-8').decode('utf-8')
|
|
return jsonString
|
|
except UnicodeError:
|
|
# Remove problematic parts
|
|
cleaned = jsonString.encode('utf-8', errors='ignore').decode('utf-8', errors='ignore')
|
|
logger.warning("Removed encoding issues from JSON string")
|
|
return cleaned
|
|
|
|
@staticmethod
|
|
def mergeJsonStringsWithOverlap(
|
|
accumulated: str,
|
|
newFragment: str
|
|
) -> str:
|
|
"""
|
|
GENERIC function to merge two JSON strings, handling overlaps intelligently.
|
|
|
|
Works for ANY JSON structure - no specific logic for content types.
|
|
|
|
Overlap scenarios (all handled generically):
|
|
- Exact continuation: newFragment starts exactly where accumulated ends
|
|
- Partial overlap: newFragment overlaps with end of accumulated
|
|
- Full overlap: newFragment is subset of accumulated
|
|
|
|
Strategy:
|
|
1. Find longest common suffix/prefix match (string-based comparison)
|
|
2. Remove duplicate content
|
|
3. Concatenate remaining parts
|
|
|
|
Args:
|
|
accumulated: Previously accumulated JSON string
|
|
newFragment: New fragment string to append
|
|
|
|
Returns:
|
|
Combined JSON string with overlaps removed
|
|
"""
|
|
if not accumulated:
|
|
return newFragment
|
|
if not newFragment:
|
|
return accumulated
|
|
|
|
# Find longest common suffix/prefix match
|
|
# Try different overlap lengths (from longest to shortest)
|
|
# Overlaps can be as small as 1 character, so we check all possible lengths
|
|
maxOverlapLen = min(len(accumulated), len(newFragment))
|
|
|
|
# Start from maximum possible overlap down to 1 character
|
|
# This ensures we find the longest overlap, even if it's just 1 character
|
|
for overlapLen in range(maxOverlapLen, 0, -1):
|
|
accumulatedSuffix = accumulated[-overlapLen:]
|
|
newFragmentPrefix = newFragment[:overlapLen]
|
|
|
|
if accumulatedSuffix == newFragmentPrefix:
|
|
# Found overlap - remove duplicate part
|
|
logger.debug(f"Found overlap of {overlapLen} characters, removing duplicate")
|
|
return accumulated + newFragment[overlapLen:]
|
|
|
|
# No overlap found - simple concatenation
|
|
return accumulated + newFragment
|
|
|
|
@staticmethod
|
|
def isJsonComplete(parsedJson: Dict[str, Any]) -> bool:
|
|
"""
|
|
GENERIC function to check if parsed JSON structure is complete.
|
|
|
|
Works for ANY JSON structure - no specific logic for content types.
|
|
|
|
Completeness checks (all generic):
|
|
- All arrays are properly closed
|
|
- All objects are properly closed
|
|
- No incomplete structures
|
|
- Recursive validation of nested structures
|
|
|
|
Args:
|
|
parsedJson: Parsed JSON object
|
|
|
|
Returns:
|
|
True if JSON is complete, False otherwise
|
|
"""
|
|
def _checkStructureComplete(obj: Any, depth: int = 0) -> bool:
|
|
"""Recursively check if structure is complete."""
|
|
if depth > 50: # Prevent infinite recursion
|
|
return True
|
|
|
|
if isinstance(obj, dict):
|
|
# Check all values recursively
|
|
for value in obj.values():
|
|
if not _checkStructureComplete(value, depth + 1):
|
|
return False
|
|
return True
|
|
elif isinstance(obj, list):
|
|
# Check all items recursively
|
|
for item in obj:
|
|
if not _checkStructureComplete(item, depth + 1):
|
|
return False
|
|
return True
|
|
else:
|
|
# Primitive value - always complete
|
|
return True
|
|
|
|
try:
|
|
return _checkStructureComplete(parsedJson)
|
|
except Exception as e:
|
|
logger.debug(f"Error checking JSON completeness: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def finalizeJson(parsedJson: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
GENERIC function to finalize complete JSON by adding missing closing elements and repairing corruption.
|
|
|
|
Works for ANY JSON structure - no specific logic for content types.
|
|
|
|
Steps (all generic):
|
|
1. Analyze structure for missing closing elements (recursively)
|
|
2. Add closing brackets/braces where needed
|
|
3. Repair any remaining corruption
|
|
4. Validate final structure
|
|
|
|
Args:
|
|
parsedJson: Parsed JSON object that needs finalization
|
|
|
|
Returns:
|
|
Finalized JSON object
|
|
"""
|
|
# For now, just return as-is since parsing succeeded
|
|
# If needed, can add logic to check for incomplete structures
|
|
# and add closing elements
|
|
return parsedJson
|
|
|
|
@staticmethod
|
|
def extractKpiValuesFromJson(
|
|
parsedJson: Dict[str, Any],
|
|
kpis: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract current KPI values from parsed JSON and update KPI objects.
|
|
|
|
Args:
|
|
parsedJson: Parsed JSON object
|
|
kpis: List of KPI objects (will be updated with currentValue)
|
|
|
|
Returns:
|
|
Updated list of KPI objects with currentValue set
|
|
"""
|
|
updatedKpis = []
|
|
|
|
for kpi in kpis:
|
|
kpiId = kpi.get("id")
|
|
jsonPath = kpi.get("jsonPath")
|
|
|
|
if not kpiId or not jsonPath:
|
|
continue
|
|
|
|
# Create copy of KPI object
|
|
updatedKpi = kpi.copy()
|
|
|
|
try:
|
|
# Extract value using JSON path
|
|
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
|
|
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
|
|
|
|
# Count items/rows/elements based on type
|
|
if isinstance(value, list):
|
|
updatedKpi["currentValue"] = len(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
|
|
elif isinstance(value, (int, float)):
|
|
updatedKpi["currentValue"] = int(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: numeric value {int(value)}")
|
|
else:
|
|
updatedKpi["currentValue"] = 0
|
|
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: non-list/non-numeric value, set to 0")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting KPI {kpiId} from path {jsonPath}: {e}")
|
|
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
|
|
|
updatedKpis.append(updatedKpi)
|
|
|
|
return updatedKpis
|
|
|
|
@staticmethod
|
|
def extractKpiValuesFromIncompleteJson(
|
|
jsonString: str,
|
|
kpis: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract KPI values from incomplete JSON string.
|
|
Uses existing JSON completion function to close incomplete structures, then extracts KPIs.
|
|
|
|
Args:
|
|
jsonString: Incomplete JSON string
|
|
kpis: List of KPI objects
|
|
|
|
Returns:
|
|
Updated list of KPI objects with currentValue set
|
|
"""
|
|
updatedKpis = []
|
|
|
|
for kpi in kpis:
|
|
kpiId = kpi.get("id")
|
|
jsonPath = kpi.get("jsonPath")
|
|
|
|
if not kpiId or not jsonPath:
|
|
continue
|
|
|
|
updatedKpi = kpi.copy()
|
|
|
|
try:
|
|
# Use existing JSON completion function to close incomplete structures
|
|
from modules.shared.jsonUtils import extractJsonString, closeJsonStructures
|
|
|
|
# Extract JSON string and complete it with missing closing elements
|
|
extracted = extractJsonString(jsonString)
|
|
completed = closeJsonStructures(extracted)
|
|
|
|
# Parse completed JSON
|
|
parsed = json.loads(completed)
|
|
|
|
# Extract value using path
|
|
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
|
|
|
|
# Count items/rows/elements based on type
|
|
if isinstance(value, list):
|
|
updatedKpi["currentValue"] = len(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
|
|
elif isinstance(value, (int, float)):
|
|
updatedKpi["currentValue"] = int(value)
|
|
logger.debug(f"Extracted KPI {kpiId} from completed JSON: numeric value {int(value)}")
|
|
else:
|
|
updatedKpi["currentValue"] = 0
|
|
logger.debug(f"Extracted KPI {kpiId} from completed JSON: non-list/non-numeric value, set to 0")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting KPI {kpiId} from incomplete JSON: {e}")
|
|
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
|
|
|
|
updatedKpis.append(updatedKpi)
|
|
|
|
return updatedKpis
|
|
|
|
@staticmethod
|
|
def _extractValueByPath(obj: Any, path: str) -> Any:
|
|
"""
|
|
Extract value from object using dot-notation path with array indices.
|
|
|
|
Example: "sections[0].elements[0].items"
|
|
"""
|
|
parts = path.split('.')
|
|
current = obj
|
|
|
|
for part in parts:
|
|
if '[' in part and ']' in part:
|
|
# Handle array access: "sections[0]"
|
|
key = part[:part.index('[')]
|
|
index = int(part[part.index('[') + 1:part.index(']')])
|
|
|
|
if key:
|
|
current = current.get(key, [])
|
|
if isinstance(current, list) and 0 <= index < len(current):
|
|
current = current[index]
|
|
else:
|
|
raise KeyError(f"Invalid index {index} for {key}")
|
|
else:
|
|
# Handle dict access
|
|
if isinstance(current, dict):
|
|
current = current.get(part)
|
|
else:
|
|
raise KeyError(f"Cannot access {part} on {type(current)}")
|
|
|
|
if current is None:
|
|
raise KeyError(f"Path {path} returned None at {part}")
|
|
|
|
return current
|
|
|
|
@staticmethod
|
|
def validateKpiProgression(
|
|
accumulationState: JsonAccumulationState,
|
|
updatedKpis: List[Dict[str, Any]]
|
|
) -> Tuple[bool, str]:
|
|
"""
|
|
Validate KPI progression from parsed JSON.
|
|
|
|
Validation rules:
|
|
- Proceed if: At least ONE KPI increased
|
|
- Stop if: Any KPI went backwards → return (False, "KPI went backwards")
|
|
- Stop if: No KPIs progressed → return (False, "No progress")
|
|
- Finish if: All KPIs completed OR JSON is complete → return (True, "Complete")
|
|
|
|
Args:
|
|
accumulationState: Current accumulation state (contains kpis)
|
|
updatedKpis: Updated KPI objects with currentValue set
|
|
|
|
Returns:
|
|
Tuple of (shouldProceed, reason)
|
|
"""
|
|
if not accumulationState.kpis:
|
|
# No KPIs defined - always proceed
|
|
return True, "No KPIs defined"
|
|
|
|
# Build dict of last values for comparison
|
|
lastValues = {kpi.get("id"): kpi.get("currentValue", 0) for kpi in accumulationState.kpis}
|
|
logger.debug(f"KPI validation: lastValues = {lastValues}")
|
|
logger.debug(f"KPI validation: updatedKpis = {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
|
|
|
|
# Check if any KPI went backwards
|
|
for updatedKpi in updatedKpis:
|
|
kpiId = updatedKpi.get("id")
|
|
currentValue = updatedKpi.get("currentValue", 0)
|
|
|
|
if kpiId in lastValues:
|
|
lastValue = lastValues[kpiId]
|
|
if currentValue < lastValue:
|
|
logger.warning(f"KPI {kpiId} went BACKWARDS: {lastValue} → {currentValue}")
|
|
return False, f"KPI {kpiId} went backwards"
|
|
|
|
# Check if all KPIs are completed
|
|
allCompleted = True
|
|
for updatedKpi in updatedKpis:
|
|
targetValue = updatedKpi.get("targetValue", 0)
|
|
currentValue = updatedKpi.get("currentValue", 0)
|
|
|
|
if currentValue < targetValue:
|
|
allCompleted = False
|
|
break
|
|
|
|
if allCompleted:
|
|
logger.info("All KPIs completed")
|
|
return True, "All KPIs completed"
|
|
|
|
# Check if at least one KPI progressed
|
|
atLeastOneProgressed = False
|
|
for updatedKpi in updatedKpis:
|
|
kpiId = updatedKpi.get("id")
|
|
currentValue = updatedKpi.get("currentValue", 0)
|
|
|
|
if kpiId in lastValues:
|
|
lastValue = lastValues[kpiId]
|
|
if currentValue > lastValue:
|
|
atLeastOneProgressed = True
|
|
logger.info(f"KPI {kpiId} progressed: {lastValue} → {currentValue}")
|
|
break
|
|
else:
|
|
# First time seeing this KPI - if it has a value, it's progress
|
|
if currentValue > 0:
|
|
atLeastOneProgressed = True
|
|
logger.info(f"KPI {kpiId} initialized: {currentValue}")
|
|
break
|
|
|
|
if not atLeastOneProgressed:
|
|
logger.warning(f"No KPIs progressed. Last values: {lastValues}, Current values: {[(kpi.get('id'), kpi.get('currentValue')) for kpi in updatedKpis]}")
|
|
return False, "No progress"
|
|
|
|
return True, "Progress detected"
|
|
|
|
@staticmethod
|
|
def accumulateAndParseJsonFragments(
|
|
accumulatedJsonString: str,
|
|
newFragmentString: str,
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> Tuple[str, List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
|
|
"""
|
|
Accumulate JSON fragments and parse when complete.
|
|
|
|
GENERIC function that handles:
|
|
1. Concatenating JSON strings with overlap detection
|
|
2. Parsing the accumulated string
|
|
3. Extracting sections (partial if incomplete, final if complete)
|
|
4. Determining completion status
|
|
|
|
Args:
|
|
accumulatedJsonString: Previously accumulated JSON string
|
|
newFragmentString: New fragment string from current iteration
|
|
allSections: Sections extracted so far (for prompt context)
|
|
iteration: Current iteration number
|
|
|
|
Returns:
|
|
Tuple of:
|
|
- accumulatedJsonString: Updated accumulated string
|
|
- sections: Extracted sections (partial if incomplete, final if complete)
|
|
- isComplete: True if JSON is complete and valid
|
|
- parsedResult: Parsed JSON object (if parsing succeeded)
|
|
"""
|
|
|
|
# Step 1: Clean encoding issues from accumulated string (check end of first delivered part)
|
|
cleanedAccumulated = JsonResponseHandler.cleanEncodingIssues(accumulatedJsonString)
|
|
|
|
# Step 2: Clean encoding issues from new fragment
|
|
cleanedFragment = JsonResponseHandler.cleanEncodingIssues(newFragmentString)
|
|
|
|
# Step 3: Concatenate with overlap handling
|
|
combinedString = JsonResponseHandler.mergeJsonStringsWithOverlap(
|
|
cleanedAccumulated,
|
|
cleanedFragment
|
|
)
|
|
|
|
# Step 4: Try to parse
|
|
try:
|
|
extracted = extractJsonString(combinedString)
|
|
parsedResult = json.loads(extracted)
|
|
|
|
# Step 5: Parsing succeeded - check completeness
|
|
isComplete = JsonResponseHandler.isJsonComplete(parsedResult)
|
|
|
|
if isComplete:
|
|
# Step 6: Complete JSON - finalize
|
|
finalizedJson = JsonResponseHandler.finalizeJson(parsedResult)
|
|
sections = extractSectionsFromDocument(finalizedJson)
|
|
logger.info(f"Iteration {iteration}: JSON accumulation complete, extracted {len(sections)} sections")
|
|
return combinedString, sections, True, finalizedJson
|
|
else:
|
|
# Step 7: Incomplete but parseable - extract partial sections
|
|
sections = extractSectionsFromDocument(parsedResult)
|
|
logger.info(f"Iteration {iteration}: JSON accumulation incomplete but parseable, extracted {len(sections)} partial sections")
|
|
return combinedString, sections, False, parsedResult
|
|
|
|
except json.JSONDecodeError:
|
|
# Step 8: Still broken - repair and extract partial sections
|
|
repaired = repairBrokenJson(combinedString)
|
|
if repaired:
|
|
sections = extractSectionsFromDocument(repaired)
|
|
logger.info(f"Iteration {iteration}: JSON accumulation repaired, extracted {len(sections)} sections")
|
|
return combinedString, sections, False, repaired
|
|
else:
|
|
# Repair failed - continue with data BEFORE merging the problematic piece
|
|
# Return previous accumulated string (before adding new fragment)
|
|
# This ensures we don't lose previously accumulated data
|
|
logger.warning(f"Iteration {iteration}: Repair failed, continuing with previous accumulated data")
|
|
return accumulatedJsonString, [], False, None
|
|
|