""" JSON Response Handling Module Handles merging of JSON responses from multiple AI iterations, including: - Section merging with intelligent overlap detection - JSON fragment detection and merging - Deep recursive structure merging - Overlap detection for complex nested structures """ import json import logging from typing import Dict, Any, List, Optional, Tuple from modules.shared.jsonUtils import extractJsonString logger = logging.getLogger(__name__) class JsonResponseHandler: """Handles JSON response merging and fragment detection for iterative AI generation.""" @staticmethod def mergeSectionsIntelligently( existingSections: List[Dict[str, Any]], newSections: List[Dict[str, Any]], iteration: int ) -> List[Dict[str, Any]]: """ Intelligently merge sections from multiple iterations. This is a GENERIC merging strategy that handles broken JSON iterations. The break can occur anywhere - in any section, at any depth. Merging strategies (in order of priority): 1. Same Section ID: Merge sections with identical IDs 2. Same Content-Type + Position: If last section is incomplete and new section continues it 3. Same Order: Merge sections with same order value 4. Structural Analysis: Detect continuation based on content structure Args: existingSections: Sections accumulated from previous iterations newSections: Sections extracted from current iteration iteration: Current iteration number Returns: Merged list of sections """ if not newSections: return existingSections if not existingSections: return newSections mergedSections = existingSections.copy() for newSection in newSections: merged = False # Strategy 1: Same Section ID - merge directly newSectionId = newSection.get("id") if newSectionId: for i, existingSection in enumerate(mergedSections): if existingSection.get("id") == newSectionId: # Merge sections with same ID mergedSections[i] = JsonResponseHandler.mergeSectionContent( existingSection, newSection, iteration ) merged = True logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'") break if merged: continue # Strategy 2: Same Content-Type + Position (continuation detection) # Check if last section is incomplete and new section continues it if mergedSections: lastSection = mergedSections[-1] lastContentType = lastSection.get("content_type") newContentType = newSection.get("content_type") if lastContentType == newContentType: # Same content type - check if last section is incomplete if JsonResponseHandler.isSectionIncomplete(lastSection): # Last section is incomplete, merge with new section mergedSections[-1] = JsonResponseHandler.mergeSectionContent( lastSection, newSection, iteration ) merged = True logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})") continue # Strategy 3: Same Order value newOrder = newSection.get("order") if newOrder is not None: for i, existingSection in enumerate(mergedSections): existingOrder = existingSection.get("order") if existingOrder is not None and existingOrder == newOrder: # Merge sections with same order mergedSections[i] = JsonResponseHandler.mergeSectionContent( existingSection, newSection, iteration ) merged = True logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}") break if merged: continue # Strategy 4: Structural Analysis - detect continuation # For code_block and table: if last section matches new section type, merge them if mergedSections: lastSection = mergedSections[-1] lastContentType = lastSection.get("content_type") newContentType = newSection.get("content_type") # Both are code blocks - merge them if lastContentType == "code_block" and newContentType == "code_block": mergedSections[-1] = JsonResponseHandler.mergeSectionContent( lastSection, newSection, iteration ) merged = True logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis") continue # Both are tables - merge them (common case for broken JSON iterations) if lastContentType == "table" and newContentType == "table": mergedSections[-1] = JsonResponseHandler.mergeSectionContent( lastSection, newSection, iteration ) merged = True logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis") continue # No merge strategy matched - add as new section if not merged: mergedSections.append(newSection) logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})") return mergedSections @staticmethod def isSectionIncomplete(section: Dict[str, Any]) -> bool: """ Check if a section is incomplete (broken at the end). This detects incomplete sections based on content analysis: - Code blocks: ends mid-line, ends with comma, ends with incomplete structure - Text sections: ends mid-sentence, ends with incomplete structure - Other types: check for incomplete elements """ contentType = section.get("content_type", "") elements = section.get("elements", []) if not elements: return False # Handle list of elements if isinstance(elements, list) and len(elements) > 0: lastElement = elements[-1] else: lastElement = elements if not isinstance(lastElement, dict): return False # Check code_block for incomplete code if contentType == "code_block": code = lastElement.get("code", "") if code: # Check if code ends incompletely: # - Ends with comma (incomplete CSV line) # - Ends with number but no newline (incomplete line) # - Ends mid-token (e.g., "23431,23" - incomplete number) codeStripped = code.rstrip() if codeStripped: # Check for incomplete patterns if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')): # Ends with comma or has comma but no final newline - likely incomplete return True # Check if last line is incomplete (doesn't end with newline and has partial content) if not code.endswith('\n') and codeStripped: # No final newline - might be incomplete # More sophisticated: check if last number is complete lastLine = codeStripped.split('\n')[-1] if lastLine and ',' in lastLine: # Has commas but might be incomplete parts = lastLine.split(',') if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete return True # Check table for incomplete rows if contentType == "table": rows = lastElement.get("rows", []) if rows: # Check if last row is incomplete (ends with incomplete data) lastRow = rows[-1] if isinstance(rows, list) else [] if isinstance(lastRow, list) and lastRow: # Check if last row ends with incomplete data (e.g., incomplete string) lastCell = lastRow[-1] if lastRow else "" if isinstance(lastCell, str): # If last cell is incomplete (ends with quote or is very short), section might be incomplete if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell): return True # Also check if last row doesn't have expected number of columns (if headers exist) headers = lastElement.get("headers", []) if headers and isinstance(headers, list): expectedCols = len(headers) if len(lastRow) < expectedCols: return True # Check paragraph/text for incomplete sentences if contentType in ["paragraph", "heading"]: text = lastElement.get("text", "") if text: # Simple heuristic: if doesn't end with sentence-ending punctuation textStripped = text.rstrip() if textStripped and not textStripped[-1] in '.!?': # Might be incomplete, but this is less reliable # Only mark as incomplete if very short (likely cut off) if len(textStripped) < 20: return True # Check lists for incomplete items if contentType in ["bullet_list", "numbered_list"]: items = lastElement.get("items", []) if items and isinstance(items, list): # Check if last item is incomplete (very short or ends with incomplete string) lastItem = items[-1] if items else None if isinstance(lastItem, str) and len(lastItem) < 3: return True # Check image for incomplete base64 data if contentType == "image": imageData = lastElement.get("base64Data", "") if imageData: # Base64 strings should end with padding ('=' or '==') # If it doesn't, it might be incomplete stripped = imageData.rstrip() if stripped and not stripped.endswith(('=', '==')): # Check if it's a valid base64 character sequence that was cut off if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=': return True # If length is not a multiple of 4 (base64 requirement), it might be incomplete if len(stripped) % 4 != 0: return True # GENERIC CHECK: Look for incomplete structures in any element # Check if element has arrays/lists that might be incomplete for key, value in lastElement.items(): if isinstance(value, list) and len(value) > 0: # Check last item in list lastItem = value[-1] if isinstance(lastItem, str): # If last string item is very short, might be incomplete if len(lastItem) < 3: return True elif isinstance(lastItem, dict): # If last dict item has very few keys, might be incomplete if len(lastItem) < 2: return True elif isinstance(value, str): # Check if string ends abruptly (no punctuation, very short) if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n': return True return False @staticmethod def mergeSectionContent( existingSection: Dict[str, Any], newSection: Dict[str, Any], iteration: int ) -> Dict[str, Any]: """ Merge content from two sections. Handles different content types: - code_block: Append code, handle overlaps, merge incomplete lines - paragraph/heading: Append text - table: Merge rows - list: Merge items - Other: Merge elements """ contentType = existingSection.get("content_type", "") existingElements = existingSection.get("elements", []) newElements = newSection.get("elements", []) if not newElements: return existingSection # Handle list of elements if isinstance(existingElements, list): existingElem = existingElements[-1] if existingElements else {} else: existingElem = existingElements if isinstance(newElements, list): newElem = newElements[0] if newElements else {} else: newElem = newElements if not isinstance(existingElem, dict) or not isinstance(newElem, dict): return existingSection # Merge based on content type if contentType == "code_block": existingCode = existingElem.get("code", "") newCode = newElem.get("code", "") if existingCode and newCode: mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration) existingElem["code"] = mergedCode # Preserve language from existing or new if "language" not in existingElem and "language" in newElem: existingElem["language"] = newElem["language"] elif contentType in ["paragraph", "heading"]: existingText = existingElem.get("text", "") newText = newElem.get("text", "") if existingText and newText: # Append text with space if needed if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n': mergedText = existingText.rstrip() + " " + newText.lstrip() else: mergedText = existingText.rstrip() + "\n" + newText.lstrip() existingElem["text"] = mergedText elif contentType == "table": # Merge table rows with sophisticated overlap detection existingRows = existingElem.get("rows", []) newRows = newElem.get("rows", []) if existingRows and newRows: # Use sophisticated overlap detection that handles multiple overlapping rows mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration) existingElem["rows"] = mergedRows logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}") elif newRows: # If existing has no rows but new does, use new rows existingElem["rows"] = newRows # Preserve headers from existing (or use new if existing has none) if not existingElem.get("headers") and newElem.get("headers"): existingElem["headers"] = newElem["headers"] # Preserve caption from existing (or use new if existing has none) if not existingElem.get("caption") and newElem.get("caption"): existingElem["caption"] = newElem.get("caption") elif contentType in ["bullet_list", "numbered_list"]: # Merge list items with sophisticated overlap detection existingItems = existingElem.get("items", []) newItems = newElem.get("items", []) if existingItems and newItems: mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration) existingElem["items"] = mergedItems elif newItems: existingElem["items"] = newItems elif contentType == "image": # Images are typically complete - if new image is provided, replace existing # But check if existing image data is incomplete (e.g., base64 string cut off) existingImageData = existingElem.get("base64Data", "") newImageData = newElem.get("base64Data", "") if existingImageData and newImageData: # If existing image data doesn't end with valid base64 padding, it might be incomplete # Base64 padding is '=' or '==' at the end if not existingImageData.rstrip().endswith(('=', '==')): # Existing image might be incomplete - merge by appending new data # This handles cases where base64 string was cut off existingElem["base64Data"] = existingImageData + newImageData logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data") else: # Existing image is complete - replace with new (or keep existing if new is empty) if newImageData: existingElem["base64Data"] = newImageData elif newImageData: existingElem["base64Data"] = newImageData # Preserve other image metadata if not existingElem.get("altText") and newElem.get("altText"): existingElem["altText"] = newElem["altText"] if not existingElem.get("caption") and newElem.get("caption"): existingElem["caption"] = newElem["caption"] else: # GENERIC FALLBACK: Use deep recursive merging for complex nested structures # This handles any content type with arbitrary depth and complexity merged_element = JsonResponseHandler.mergeDeepStructures( existingElem, newElem, iteration, f"section.{contentType}" ) existingElem = merged_element # Update section with merged content mergedSection = existingSection.copy() if isinstance(existingElements, list): # Update the last element in the list with merged content if existingElements: existingElements[-1] = existingElem mergedSection["elements"] = existingElements else: mergedSection["elements"] = existingElem # Preserve metadata from new section if missing in existing if "order" not in mergedSection and "order" in newSection: mergedSection["order"] = newSection["order"] return mergedSection @staticmethod def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str: """ Merge two code blocks intelligently, handling overlaps and incomplete lines. """ if not existingCode: return newCode if not newCode: return existingCode existingLines = existingCode.rstrip().split('\n') newLines = newCode.strip().split('\n') if not existingLines or not newLines: return existingCode + "\n" + newCode lastExistingLine = existingLines[-1].strip() firstNewLine = newLines[0].strip() # Strategy 1: Exact overlap - remove duplicate line if lastExistingLine == firstNewLine: newLines = newLines[1:] logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge") # Strategy 2: Incomplete line merge # If last existing line ends with comma or is incomplete, merge with first new line elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5): # Last line is incomplete - merge with first new line # Remove trailing comma from existing line mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip() existingLines[-1] = mergedLine newLines = newLines[1:] logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation") # Strategy 3: Partial overlap detection # Check if first new line starts with the end of last existing line elif ',' in lastExistingLine and ',' in firstNewLine: lastExistingParts = lastExistingLine.split(',') firstNewParts = firstNewLine.split(',') # Check for overlap: if last part of existing matches first part of new if lastExistingParts and firstNewParts: lastExistingPart = lastExistingParts[-1].strip() firstNewPart = firstNewParts[0].strip() # If they match, there's overlap if lastExistingPart == firstNewPart and len(lastExistingParts) > 1: # Remove overlapping part from new line newLines[0] = ','.join(firstNewParts[1:]) logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge") # Reconstruct merged code mergedCode = '\n'.join(existingLines) if newLines: if mergedCode and not mergedCode.endswith('\n'): mergedCode += '\n' mergedCode += '\n'.join(newLines) return mergedCode @staticmethod def detectAndParseJsonFragment( result: str, allSections: List[Dict[str, Any]] ) -> Optional[Dict[str, Any]]: """ Detect if response is a JSON fragment (continuation content) rather than full document structure. Fragments are continuation content that needs to be merged into existing sections. Examples: - Array of table rows: [["37643", "37649", ...], ...] - Array of code lines: ["line1", "line2", ...] - Array of list items: ["item1", "item2", ...] Returns fragment info dict with: - fragment_type: "table_rows", "code_lines", "list_items", etc. - fragment_data: The parsed fragment content - target_section_id: ID of section to merge into (if identifiable) """ try: extracted = extractJsonString(result) parsed = json.loads(extracted) # Check if it's a JSON fragment (not full document structure) # Fragment indicators: # 1. It's an array (not an object) # 2. It doesn't have "documents" or "sections" keys # 3. It's continuation content (rows, lines, items, etc.) if isinstance(parsed, list): # It's an array - check if it looks like continuation content if len(parsed) > 0: first_item = parsed[0] # Check if it's an array of arrays (table rows) if isinstance(first_item, list): # This looks like table rows: [["col1", "col2"], ["col3", "col4"], ...] logger.debug("Detected JSON fragment: table rows array") return { "fragment_type": "table_rows", "fragment_data": parsed, "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table") } # Check if it's an array of strings (code lines or list items) elif isinstance(first_item, str): # Could be code lines or list items - check context # If we have a code_block section, it's likely code lines # If we have a list section, it's likely list items target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "code_block") if target_section_id: logger.debug("Detected JSON fragment: code lines array") return { "fragment_type": "code_lines", "fragment_data": parsed, "target_section_id": target_section_id } target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "bullet_list") if target_section_id: logger.debug("Detected JSON fragment: list items array") return { "fragment_type": "list_items", "fragment_data": parsed, "target_section_id": target_section_id } # Default to code lines if no context logger.debug("Detected JSON fragment: string array (assuming code lines)") return { "fragment_type": "code_lines", "fragment_data": parsed, "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block") } # Check if it's a partial object that's missing document structure elif isinstance(parsed, dict): # If it has "rows" but no "documents" or "sections", it might be a table element fragment if "rows" in parsed and "documents" not in parsed and "sections" not in parsed: logger.debug("Detected JSON fragment: table element with rows") return { "fragment_type": "table_element", "fragment_data": parsed, "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table") } # If it has "code" but no "documents" or "sections", it might be a code element fragment if "code" in parsed and "documents" not in parsed and "sections" not in parsed: logger.debug("Detected JSON fragment: code element") return { "fragment_type": "code_element", "fragment_data": parsed, "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block") } except Exception as e: logger.debug(f"Error detecting JSON fragment: {e}") return None @staticmethod def findTargetSectionId( allSections: List[Dict[str, Any]], contentType: str ) -> Optional[str]: """Find the last incomplete section of the given content type.""" # Find the last section with matching content type for section in reversed(allSections): if section.get("content_type") == contentType: # Check if it's incomplete if JsonResponseHandler.isSectionIncomplete(section): return section.get("id") # If not incomplete but it's the right type, still return it return section.get("id") return None @staticmethod def mergeFragmentIntoSection( fragment: Dict[str, Any], allSections: List[Dict[str, Any]], iteration: int ) -> List[Dict[str, Any]]: """ Merge a JSON fragment into the appropriate section. This handles the special case where iteration N returns continuation content that needs to be merged into the existing structure at the overlapping point. """ fragment_type = fragment.get("fragment_type") fragment_data = fragment.get("fragment_data") target_section_id = fragment.get("target_section_id") if not fragment_type or not fragment_data: return allSections # Find the target section target_section = None target_index = -1 for i, section in enumerate(allSections): if section.get("id") == target_section_id: target_section = section target_index = i break # If no target section found, try to find last incomplete section of matching type if not target_section: for i, section in enumerate(allSections): if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type): if JsonResponseHandler.isSectionIncomplete(section): target_section = section target_index = i break # If still no target, find last section of matching type if not target_section: for i, section in enumerate(reversed(allSections)): if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type): target_section = section target_index = len(allSections) - 1 - i break if not target_section: logger.warning(f"Iteration {iteration}: No target section found for fragment type {fragment_type}") return allSections # Merge fragment into target section based on type merged_section = target_section.copy() elements = merged_section.get("elements", []) if not isinstance(elements, list): elements = [elements] if elements else [] if not elements: # Create new element if none exists elements = [{}] last_element = elements[-1] if elements else {} if not isinstance(last_element, dict): last_element = {} elements.append(last_element) # Merge based on fragment type using deep recursive merging if fragment_type == "table_rows": existing_rows = last_element.get("rows", []) if not isinstance(existing_rows, list): existing_rows = [] # Merge rows with sophisticated overlap detection new_rows = fragment_data merged_rows = JsonResponseHandler.mergeRowsWithOverlap(existing_rows, new_rows, iteration) last_element["rows"] = merged_rows # Preserve headers if they exist if not last_element.get("headers") and isinstance(fragment_data, list) and len(fragment_data) > 0: # Try to infer headers from first row if it's a header row first_row = fragment_data[0] if isinstance(first_row, list) and len(first_row) > 0: # Check if first row looks like headers (all strings, descriptive) if all(isinstance(cell, str) for cell in first_row): last_element["headers"] = first_row merged_rows = merged_rows[1:] # Remove header row last_element["rows"] = merged_rows elif fragment_type == "code_lines": existing_code = last_element.get("code", "") new_lines = fragment_data # Convert array of strings to code block if isinstance(new_lines, list): new_code = "\n".join(str(line) for line in new_lines) else: new_code = str(new_lines) merged_code = JsonResponseHandler.mergeCodeBlocks(existing_code, new_code, iteration) last_element["code"] = merged_code elif fragment_type == "list_items": existing_items = last_element.get("items", []) if not isinstance(existing_items, list): existing_items = [] new_items = fragment_data if isinstance(fragment_data, list) else [fragment_data] merged_items = JsonResponseHandler.mergeItemsWithOverlap(existing_items, new_items, iteration) last_element["items"] = merged_items elif fragment_type == "table_element": # Use deep recursive merge for complex table structures # This handles nested structures, multiple overlapping rows, etc. merged_element = JsonResponseHandler.mergeDeepStructures( last_element, fragment_data, iteration, f"section.{target_section_id}.table_element" ) last_element = merged_element elif fragment_type == "code_element": # Use deep recursive merge for complex code structures merged_element = JsonResponseHandler.mergeDeepStructures( last_element, fragment_data, iteration, f"section.{target_section_id}.code_element" ) last_element = merged_element else: # Generic fragment - use deep recursive merge # This handles any complex nested structure merged_element = JsonResponseHandler.mergeDeepStructures( last_element, fragment_data, iteration, f"section.{target_section_id}.{fragment_type}" ) last_element = merged_element # Update elements elements[-1] = last_element merged_section["elements"] = elements # Update allSections merged_sections = allSections.copy() merged_sections[target_index] = merged_section logger.info(f"Iteration {iteration}: Merged {fragment_type} fragment into section '{target_section_id}'") return merged_sections @staticmethod def getContentTypeForFragment(fragment_type: str) -> str: """Map fragment type to content type.""" mapping = { "table_rows": "table", "table_element": "table", "code_lines": "code_block", "code_element": "code_block", "list_items": "bullet_list" } return mapping.get(fragment_type, "paragraph") @staticmethod def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool: """ Deep recursive comparison of two JSON-serializable objects. Handles nested structures of any depth and complexity. Args: obj1: First object to compare obj2: Second object to compare max_depth: Maximum recursion depth to prevent infinite loops Returns: True if objects are deeply equal, False otherwise """ if max_depth <= 0: return False # Type check if type(obj1) != type(obj2): return False # Primitive types if isinstance(obj1, (str, int, float, bool, type(None))): return obj1 == obj2 # Lists/arrays - compare element by element if isinstance(obj1, list): if len(obj1) != len(obj2): return False return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1) for item1, item2 in zip(obj1, obj2)) # Dicts/objects - compare key by key if isinstance(obj1, dict): if set(obj1.keys()) != set(obj2.keys()): return False return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1) for key in obj1.keys()) # Fallback for other types return obj1 == obj2 @staticmethod def findLongestCommonSuffix( existing_list: List[Any], new_list: List[Any], min_overlap: int = 1 ) -> int: """ Find the longest common suffix of existing_list that matches a prefix of new_list. This handles cases where multiple elements overlap: - existing: [A, B, C, D] - new: [C, D, E, F] - overlap: [C, D] (length 2) Returns the length of the overlap (0 if no overlap found). """ if not existing_list or not new_list: return 0 max_overlap = min(len(existing_list), len(new_list)) # Try all possible overlap lengths (from longest to shortest) for overlap_len in range(max_overlap, min_overlap - 1, -1): existing_suffix = existing_list[-overlap_len:] new_prefix = new_list[:overlap_len] # Deep compare suffix and prefix if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i]) for i in range(overlap_len)): return overlap_len return 0 @staticmethod def findPartialOverlap( existing_item: Any, new_item: Any ) -> Tuple[bool, Optional[Any]]: """ Detect if new_item completes an incomplete existing_item. Handles cases like: - existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"] - new: ["37643", "37649", ...] Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None). """ # Check if both are lists if isinstance(existing_item, list) and isinstance(new_item, list): if not existing_item or not new_item: return False, None # Check if last element of existing is incomplete and matches first of new last_existing = existing_item[-1] first_new = new_item[0] # If last existing is a string and first new is a string if isinstance(last_existing, str) and isinstance(first_new, str): # Check if last existing is incomplete (very short, ends with number, etc.) if len(last_existing) < 10 and first_new.startswith(last_existing): # Partial overlap - merge them merged_last = last_existing + first_new[len(last_existing):] merged_item = existing_item[:-1] + [merged_last] + new_item[1:] return True, merged_item # Check if last existing is incomplete list and first new completes it if isinstance(last_existing, list) and isinstance(first_new, list): if len(last_existing) < len(first_new): # Check if last existing is prefix of first new if first_new[:len(last_existing)] == last_existing: # Merge: replace incomplete last with complete first merged_item = existing_item[:-1] + [first_new] + new_item[1:] return True, merged_item # Check if existing is incomplete string and new completes it if isinstance(existing_item, str) and isinstance(new_item, str): if len(existing_item) < 50 and new_item.startswith(existing_item): # Partial overlap merged = existing_item + new_item[len(existing_item):] return True, merged return False, None @staticmethod def mergeRowsWithOverlap( existing_rows: List[List[str]], new_rows: List[List[str]], iteration: int ) -> List[List[str]]: """ Merge table rows with sophisticated overlap detection. Handles multiple overlapping rows and partial overlaps. """ if not new_rows: return existing_rows if not existing_rows: return new_rows # Strategy 1: Find longest common suffix/prefix overlap overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1) if overlap_len > 0: logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates") return existing_rows + new_rows[overlap_len:] # Strategy 2: Check for partial overlap in last row if len(existing_rows) > 0 and len(new_rows) > 0: last_existing = existing_rows[-1] first_new = new_rows[0] is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new) if is_partial: logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging") return existing_rows[:-1] + [merged_row] + new_rows[1:] # Strategy 3: Simple first/last comparison (fallback) if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list): if list(existing_rows[-1]) == list(new_rows[0]): logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)") return existing_rows + new_rows[1:] # No overlap detected - append all new rows return existing_rows + new_rows @staticmethod def mergeItemsWithOverlap( existing_items: List[str], new_items: List[str], iteration: int ) -> List[str]: """ Merge list items with sophisticated overlap detection. Handles multiple overlapping items and partial overlaps. """ if not new_items: return existing_items if not existing_items: return new_items # Strategy 1: Find longest common suffix/prefix overlap overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1) if overlap_len > 0: logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates") return existing_items + new_items[overlap_len:] # Strategy 2: Check for partial overlap in last item if len(existing_items) > 0 and len(new_items) > 0: is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0]) if is_partial: logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging") return existing_items[:-1] + [merged_item] + new_items[1:] # Strategy 3: Simple first/last comparison (fallback) if existing_items[-1] == new_items[0]: logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)") return existing_items + new_items[1:] # No overlap detected - append all new items return existing_items + new_items @staticmethod def mergeDeepStructures( existing: Any, new: Any, iteration: int, path: str = "root" ) -> Any: """ Recursively merge two JSON structures of arbitrary depth and complexity. Handles overlaps at any nesting level. Args: existing: Existing structure to merge into new: New structure to merge iteration: Current iteration number for logging path: Current path in structure (for debugging) Returns: Merged structure """ # Type check if type(existing) != type(new): # Types don't match - return new (replacement) logger.debug(f"Iteration {iteration}: Types don't match at {path}, replacing") return new # Lists/arrays - merge with overlap detection if isinstance(existing, list) and isinstance(new, list): if not new: return existing if not existing: return new # Try to find overlap overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1) if overlap_len > 0: logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates") return existing + new[overlap_len:] # Check for partial overlap in last element if len(existing) > 0 and len(new) > 0: is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0]) if is_partial: logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging") return existing[:-1] + [merged_item] + new[1:] # No overlap - append all return existing + new # Dicts/objects - merge recursively if isinstance(existing, dict) and isinstance(new, dict): merged = existing.copy() for key, new_value in new.items(): if key in merged: # Key exists - merge recursively merged[key] = JsonResponseHandler.mergeDeepStructures( merged[key], new_value, iteration, f"{path}.{key}" ) else: # New key - add it merged[key] = new_value return merged # Primitives - if equal, return existing; otherwise return new if existing == new: return existing return new