gateway/modules/services/serviceAi/subJsonMerger.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Modular JSON Merger - Intelligent JSON Fragment Merging

A clean, modular approach to merging JSON fragments that may be cut randomly.
Designed to be simple, robust, and always return valid data.

Architecture:
1. Data Extractor: Extracts all possible data from fragments (even incomplete)
2. Structure Detector: Detects JSON structure type (elements, documents, files, etc.)
3. Data Merger: Intelligently merges data with overlap detection
4. Result Builder: Always returns valid JSON structure
"""

import json
import re
import logging
import os
from datetime import datetime
from typing import Dict, Any, List, Optional, Tuple, Union

from modules.shared.jsonUtils import (
    normalizeJsonText, stripCodeFences, closeJsonStructures, tryParseJson
)

logger = logging.getLogger(__name__)


class JsonMergeLogger:
    """Consolidated logger for JSON merging process."""

    _logBuffer: List[str] = []
    _mergeId: int = 0
    _currentLogFile: Optional[str] = None
    _appendMode: bool = False

    @staticmethod
    def initializeLogFile(logFileName: Optional[str] = None):
        """Initialize a new log file for a test run."""
        JsonMergeLogger._logBuffer = []
        JsonMergeLogger._mergeId = 0

        if logFileName:
            JsonMergeLogger._currentLogFile = logFileName
            JsonMergeLogger._appendMode = False
            # Clear existing file
            try:
                currentFileDir = os.path.dirname(os.path.abspath(__file__))
                logFilePath = os.path.join(currentFileDir, logFileName)
                with open(logFilePath, 'w', encoding='utf-8') as f:
                    f.write("")  # Clear file
            except Exception:
                pass
        else:
            JsonMergeLogger._currentLogFile = None
            JsonMergeLogger._appendMode = False

    @staticmethod
    def startMerge(accumulated: str, newFragment: str) -> str:
        """Start a new merge operation and return merge ID."""
        JsonMergeLogger._mergeId += 1
        mergeId = f"merge_{JsonMergeLogger._mergeId}"

        JsonMergeLogger._log(f"{'='*80}")
        JsonMergeLogger._log(f"JSON MERGE OPERATION #{JsonMergeLogger._mergeId}")
        JsonMergeLogger._log(f"{'='*80}")
        JsonMergeLogger._log(f"Timestamp: {datetime.now().isoformat()}")
        JsonMergeLogger._log("")

        JsonMergeLogger._log("INPUT:")
        JsonMergeLogger._log(f"  Accumulated length: {len(accumulated)} chars")
        JsonMergeLogger._log(f"  New Fragment length: {len(newFragment)} chars")
        # Log only summary (first 5 and last 5 lines) to avoid log spam
        accLines = accumulated.split('\n')
        fragLines = newFragment.split('\n')
        JsonMergeLogger._log(f"  Accumulated: {len(accLines)} lines (showing first 5 and last 5)")
        if len(accLines) > 10:
            for line in accLines[:5]:
                JsonMergeLogger._log(f"    {line}")
            JsonMergeLogger._log(f"    ... ({len(accLines) - 10} lines omitted) ...")
            for line in accLines[-5:]:
                JsonMergeLogger._log(f"    {line}")
        else:
            for line in accLines:
                JsonMergeLogger._log(f"    {line}")
        JsonMergeLogger._log(f"  New Fragment: {len(fragLines)} lines (showing first 5 and last 5)")
        if len(fragLines) > 10:
            for line in fragLines[:5]:
                JsonMergeLogger._log(f"    {line}")
            JsonMergeLogger._log(f"    ... ({len(fragLines) - 10} lines omitted) ...")
            for line in fragLines[-5:]:
                JsonMergeLogger._log(f"    {line}")
        else:
            for line in fragLines:
                JsonMergeLogger._log(f"    {line}")
        JsonMergeLogger._log("")

        return mergeId

    @staticmethod
    def logStep(stepName: str, description: str, result: Any = None, error: Optional[str] = None):
        """Log a step with its result."""
        JsonMergeLogger._log(f"STEP: {stepName}")
        JsonMergeLogger._log(f"  Description: {description}")

        if error:
            JsonMergeLogger._log(f"  ❌ ERROR: {error}")
        elif result is not None:
            if isinstance(result, str):
                resultLines = result.split('\n')
                JsonMergeLogger._log(f"  ✅ Result (string, {len(result)} chars, {len(resultLines)} lines)")
                if len(resultLines) > 10:
                    JsonMergeLogger._log(f"    (showing first 5 and last 5 lines)")
                    for line in resultLines[:5]:
                        JsonMergeLogger._log(f"    {line}")
                    JsonMergeLogger._log(f"    ... ({len(resultLines) - 10} lines omitted) ...")
                    for line in resultLines[-5:]:
                        JsonMergeLogger._log(f"    {line}")
                else:
                    for line in resultLines:
                        JsonMergeLogger._log(f"    {line}")
            elif isinstance(result, dict):
                keys = list(result.keys())
                JsonMergeLogger._log(f"  ✅ Result (dict): keys={keys}, size={len(str(result))} chars")
                # Log full structure with JSON formatting - NO TRUNCATION
                try:
                    jsonStr = json.dumps(result, indent=2, ensure_ascii=False)
                    JsonMergeLogger._log(f"    Full data (COMPLETE, {len(jsonStr)} chars):")
                    JsonMergeLogger._log("    " + "="*76)
                    for line in jsonStr.split('\n'):
                        JsonMergeLogger._log(f"    {line}")
                    JsonMergeLogger._log("    " + "="*76)
                except Exception as e:
                    JsonMergeLogger._log(f"    Could not serialize: {e}")
                    strRepr = str(result)
                    strLines = strRepr.split('\n')
                    JsonMergeLogger._log(f"    String representation ({len(strRepr)} chars, {len(strLines)} lines)")
                    if len(strLines) > 10:
                        JsonMergeLogger._log(f"    (showing first 5 and last 5 lines)")
                        for line in strLines[:5]:
                            JsonMergeLogger._log(f"    {line}")
                        JsonMergeLogger._log(f"    ... ({len(strLines) - 10} lines omitted) ...")
                        for line in strLines[-5:]:
                            JsonMergeLogger._log(f"    {line}")
                    else:
                        for line in strLines:
                            JsonMergeLogger._log(f"    {line}")
                # Log structure details
                if "elements" in result:
                    elemCount = len(result["elements"]) if isinstance(result["elements"], list) else 0
                    JsonMergeLogger._log(f"    - elements: {elemCount} items")
                    if isinstance(result["elements"], list) and elemCount > 0:
                        JsonMergeLogger._log(f"      First element type: {result['elements'][0].get('type', 'unknown') if isinstance(result['elements'][0], dict) else 'not a dict'}")
                if "documents" in result:
                    docCount = len(result["documents"]) if isinstance(result["documents"], list) else 0
                    JsonMergeLogger._log(f"    - documents: {docCount} items")
            elif isinstance(result, list):
                JsonMergeLogger._log(f"  ✅ Result (list): {len(result)} items (COMPLETE)")
                if len(result) > 0:
                    JsonMergeLogger._log(f"    First item type: {type(result[0]).__name__}")
                    try:
                        jsonStr = json.dumps(result, indent=2, ensure_ascii=False)  # ALL items
                        JsonMergeLogger._log(f"    All items (COMPLETE, {len(jsonStr)} chars):")
                        JsonMergeLogger._log("    " + "="*76)
                        for line in jsonStr.split('\n'):
                            JsonMergeLogger._log(f"    {line}")
                        JsonMergeLogger._log("    " + "="*76)
                    except Exception:
                        strRepr = str(result)
                        strLines = strRepr.split('\n')
                        JsonMergeLogger._log(f"    String representation ({len(strRepr)} chars, {len(strLines)} lines)")
                        if len(strLines) > 10:
                            JsonMergeLogger._log(f"    (showing first 5 and last 5 lines)")
                            for line in strLines[:5]:
                                JsonMergeLogger._log(f"    {line}")
                            JsonMergeLogger._log(f"    ... ({len(strLines) - 10} lines omitted) ...")
                            for line in strLines[-5:]:
                                JsonMergeLogger._log(f"    {line}")
                        else:
                            for line in strLines:
                                JsonMergeLogger._log(f"    {line}")
            else:
                JsonMergeLogger._log(f"  ✅ Result: {type(result).__name__} = {str(result)[:200]}")
        else:
            JsonMergeLogger._log(f"  ⏳ In progress...")

        JsonMergeLogger._log("")

    @staticmethod
    def logExtraction(strategy: str, success: bool, data: Any = None, error: Optional[str] = None):
        """Log extraction strategy result."""
        status = "✅ SUCCESS" if success else "❌ FAILED"
        JsonMergeLogger._log(f"  Extraction Strategy: {strategy} - {status}")
        if error:
            JsonMergeLogger._log(f"    Error: {error}")
        elif data is not None:
            if isinstance(data, dict):
                keys = list(data.keys())
                JsonMergeLogger._log(f"    Extracted keys: {keys}")
                # Log full extracted data - NO TRUNCATION
                try:
                    jsonStr = json.dumps(data, indent=2, ensure_ascii=False)
                    JsonMergeLogger._log(f"    Extracted data (COMPLETE, {len(jsonStr)} chars):")
                    JsonMergeLogger._log("    " + "="*76)
                    for line in jsonStr.split('\n'):
                        JsonMergeLogger._log(f"    {line}")
                    JsonMergeLogger._log("    " + "="*76)
                except Exception as e:
                    JsonMergeLogger._log(f"    Could not serialize extracted data: {e}")
                    strRepr = str(data)
                    strLines = strRepr.split('\n')
                    JsonMergeLogger._log(f"    String representation ({len(strRepr)} chars, {len(strLines)} lines)")
                    if len(strLines) > 10:
                        JsonMergeLogger._log(f"    (showing first 5 and last 5 lines)")
                        for line in strLines[:5]:
                            JsonMergeLogger._log(f"    {line}")
                        JsonMergeLogger._log(f"    ... ({len(strLines) - 10} lines omitted) ...")
                        for line in strLines[-5:]:
                            JsonMergeLogger._log(f"    {line}")
                    else:
                        for line in strLines:
                            JsonMergeLogger._log(f"    {line}")
            elif isinstance(data, list):
                JsonMergeLogger._log(f"    Extracted {len(data)} items (COMPLETE)")
                if len(data) > 0:
                    try:
                        jsonStr = json.dumps(data, indent=2, ensure_ascii=False)  # ALL items
                        JsonMergeLogger._log(f"    All items (COMPLETE, {len(jsonStr)} chars):")
                        JsonMergeLogger._log("    " + "="*76)
                        for line in jsonStr.split('\n'):
                            JsonMergeLogger._log(f"    {line}")
                        JsonMergeLogger._log("    " + "="*76)
                    except Exception as e:
                        JsonMergeLogger._log(f"    Could not serialize list: {e}")
                        strRepr = str(data)
                        strLines = strRepr.split('\n')
                        JsonMergeLogger._log(f"    String representation ({len(strRepr)} chars, {len(strLines)} lines)")
                        if len(strLines) > 10:
                            JsonMergeLogger._log(f"    (showing first 5 and last 5 lines)")
                            for line in strLines[:5]:
                                JsonMergeLogger._log(f"    {line}")
                            JsonMergeLogger._log(f"    ... ({len(strLines) - 10} lines omitted) ...")
                            for line in strLines[-5:]:
                                JsonMergeLogger._log(f"    {line}")
                        else:
                            for line in strLines:
                                JsonMergeLogger._log(f"    {line}")

    @staticmethod
    def logOverlap(overlapType: str, overlapLen: int, accSuffix: Any = None, fragPrefix: Any = None):
        """Log overlap detection result."""
        JsonMergeLogger._log(f"  Overlap Detection ({overlapType}):")
        JsonMergeLogger._log(f"    Overlap length: {overlapLen}")
        if overlapLen > 0:
            JsonMergeLogger._log(f"    ✅ Found overlap of {overlapLen} chars")
            if accSuffix is not None:
                if isinstance(accSuffix, str):
                    JsonMergeLogger._log(f"    Accumulated suffix (COMPLETE, {len(accSuffix)} chars):")
                    JsonMergeLogger._log("    " + "="*76)
                    for line in accSuffix.split('\n'):
                        JsonMergeLogger._log(f"    {line}")
                    JsonMergeLogger._log("    " + "="*76)
                else:
                    # For lists/arrays, only log summary to avoid log flooding
                    if isinstance(accSuffix, list):
                        JsonMergeLogger._log(f"    Accumulated suffix: list with {len(accSuffix)} items")
                    else:
                        JsonMergeLogger._log(f"    Accumulated suffix: {type(accSuffix).__name__}")
            if fragPrefix is not None:
                if isinstance(fragPrefix, str):
                    prefixLines = fragPrefix.split('\n')
                    JsonMergeLogger._log(f"    Fragment prefix ({len(fragPrefix)} chars, {len(prefixLines)} lines)")
                    if len(prefixLines) > 10:
                        JsonMergeLogger._log(f"    (showing first 5 and last 5 lines)")
                        for line in prefixLines[:5]:
                            JsonMergeLogger._log(f"    {line}")
                        JsonMergeLogger._log(f"    ... ({len(prefixLines) - 10} lines omitted) ...")
                        for line in prefixLines[-5:]:
                            JsonMergeLogger._log(f"    {line}")
                    else:
                        for line in prefixLines:
                            JsonMergeLogger._log(f"    {line}")
                else:
                    # For lists/arrays, only log summary to avoid log flooding
                    if isinstance(fragPrefix, list):
                        JsonMergeLogger._log(f"    Fragment prefix: list with {len(fragPrefix)} items")
                    else:
                        JsonMergeLogger._log(f"    Fragment prefix: {type(fragPrefix).__name__}")
        else:
            JsonMergeLogger._log(f"    ⚠️  No overlap detected - appending all")

    @staticmethod
    def logValidation(validationType: str, success: bool, error: Optional[str] = None):
        """Log validation result."""
        status = "✅ VALID" if success else "❌ INVALID"
        JsonMergeLogger._log(f"  Validation ({validationType}): {status}")
        if error:
            JsonMergeLogger._log(f"    Error: {error}")

    @staticmethod
    def finishMerge(mergeId: str, finalResult: str, success: bool):
        """Finish merge operation and write log file."""
        JsonMergeLogger._log("")
        JsonMergeLogger._log(f"{'='*80}")
        JsonMergeLogger._log(f"MERGE RESULT: {'✅ SUCCESS' if success else '❌ FAILED'}")
        JsonMergeLogger._log(f"{'='*80}")
        JsonMergeLogger._log(f"Final result length: {len(finalResult)} chars")
        JsonMergeLogger._log("Final result (COMPLETE):")
        JsonMergeLogger._log("="*80)
        for line in finalResult.split('\n'):
            JsonMergeLogger._log(line)
        JsonMergeLogger._log("="*80)
        JsonMergeLogger._log("")

        # Write log content to buffer (will be written at end of test run)
        logContent = "\n".join(JsonMergeLogger._logBuffer)

        # If we have a current log file, append to it
        if JsonMergeLogger._currentLogFile:
            try:
                currentFileDir = os.path.dirname(os.path.abspath(__file__))
                logFilePath = os.path.join(currentFileDir, JsonMergeLogger._currentLogFile)
                mode = 'a' if JsonMergeLogger._appendMode else 'w'
                with open(logFilePath, mode, encoding='utf-8') as f:
                    f.write(logContent)
                    f.write("\n\n")  # Add separator between merges
                JsonMergeLogger._appendMode = True  # Next writes will append
                logger.debug(f"JSON merge log appended to: {logFilePath}")
            except Exception as e:
                logger.error(f"Failed to write merge log file: {e}")
        else:
            # No log file set - write individual file (fallback)
            currentFileDir = os.path.dirname(os.path.abspath(__file__))
            logDir = currentFileDir
            os.makedirs(logDir, exist_ok=True)
            logFilePath = os.path.join(logDir, f"{mergeId}.txt")
            try:
                with open(logFilePath, 'w', encoding='utf-8') as f:
                    f.write(logContent)
                logger.info(f"JSON merge log written to: {logFilePath}")
            except Exception as e:
                logger.error(f"Failed to write merge log file: {e}")

        # Clear buffer for next merge
        JsonMergeLogger._logBuffer = []

    @staticmethod
    def _log(message: str):
        """Internal log method."""
        JsonMergeLogger._logBuffer.append(message)
        logger.debug(message)


class JsonDataExtractor:
    """Extracts data from JSON fragments, even if incomplete."""

    @staticmethod
    def extract(jsonString: str, mergeId: Optional[str] = None, removeFromEnd: bool = True) -> Dict[str, Any]:
        """
        Extract complete data from JSON fragment.

        For merging: We know exactly where to clean:
        - accumulated: remove incomplete parts at the END
        - newFragment: remove incomplete parts at the BEGINNING

        Simple approach: Remove incomplete parts at specified position, then parse.
        """
        if mergeId:
            position = "END" if removeFromEnd else "BEGINNING"
            JsonMergeLogger.logStep("EXTRACTION", f"Extracting data from JSON fragment ({len(jsonString)} chars) - cleaning from {position}")

        if not jsonString or not jsonString.strip():
            if mergeId:
                JsonMergeLogger.logExtraction("Empty input", False, error="Input is empty")
            return {}

        normalized = stripCodeFences(normalizeJsonText(jsonString)).strip()
        if not normalized:
            if mergeId:
                JsonMergeLogger.logExtraction("Normalization", False, error="Normalized string is empty")
            return {}

        # Try to parse as complete JSON first
        parsed, parseErr, _ = tryParseJson(normalized)
        if parseErr is None and parsed is not None:
            if isinstance(parsed, dict):
                finalResult = parsed
            elif isinstance(parsed, list):
                finalResult = {"elements": parsed}
            else:
                finalResult = {"elements": [parsed]} if parsed else {}

            if mergeId:
                JsonMergeLogger.logExtraction("Direct parsing", True, finalResult)
                JsonMergeLogger.logStep("EXTRACTION", "Direct parsing successful", finalResult)

            return finalResult if finalResult else {}

        # Remove incomplete parts from specified position
        if removeFromEnd:
            cleaned = JsonDataExtractor._removeIncompleteFromEnd(normalized)
        else:
            cleaned = JsonDataExtractor._removeIncompleteFromBeginning(normalized)

        if cleaned:
            # Close structures and try to parse
            closed = closeJsonStructures(cleaned)
            parsed, parseErr2, _ = tryParseJson(closed)
            if parseErr2 is None and parsed is not None:
                if isinstance(parsed, dict):
                    finalResult = parsed
                elif isinstance(parsed, list):
                    finalResult = {"elements": parsed}
                else:
                    finalResult = {"elements": [parsed]} if parsed else {}

                if mergeId:
                    JsonMergeLogger.logExtraction("Remove incomplete + close", True, finalResult)
                    JsonMergeLogger.logStep("EXTRACTION", "Remove incomplete + close successful", finalResult)

                return finalResult if finalResult else {}

        # Return empty dict if nothing worked
        if mergeId:
            JsonMergeLogger.logStep("EXTRACTION", "No data extracted", {}, error="All strategies failed")
        return {}

    @staticmethod
    def _removeIncompleteFromEnd(jsonString: str) -> str:
        """
        Remove incomplete parts from the END of JSON string.
        Goes through structure level by level, keeps complete elements, removes incomplete ones at the end.
        """
        # Find first '{' or '[' to start
        startIdx = -1
        for i, char in enumerate(jsonString):
            if char in '{[':
                startIdx = i
                break

        if startIdx == -1:
            return ""

        # Remove incomplete parts from end recursively
        cleaned = JsonDataExtractor._cleanJsonFromEnd(jsonString[startIdx:])
        return cleaned

    @staticmethod
    def _removeIncompleteFromBeginning(jsonString: str) -> str:
        """
        Remove incomplete parts from the BEGINNING of JSON string.
        Finds where valid JSON starts and removes everything before it.
        """
        # Find first '{' or '[' to start
        startIdx = -1
        for i, char in enumerate(jsonString):
            if char in '{[':
                startIdx = i
                break

        if startIdx == -1:
            return ""

        # Return from start position - beginning cleanup is just finding the start
        return jsonString[startIdx:]

    @staticmethod
    def _cleanJsonFromEnd(jsonStr: str) -> str:
        """
        Recursively clean JSON from the END: keep complete elements, remove incomplete ones at the end.
        Goes through structure level by level.
        """
        # Try to parse as-is first
        try:
            parsed = json.loads(jsonStr)
            return jsonStr
        except Exception:
            pass

        # If dict: go through each key-value pair, remove incomplete ones at the end
        if jsonStr.strip().startswith('{'):
            return JsonDataExtractor._cleanDictFromEnd(jsonStr)

        # If array: go through each element, remove incomplete ones at the end
        if jsonStr.strip().startswith('['):
            return JsonDataExtractor._cleanArrayFromEnd(jsonStr)

        return ""

    @staticmethod
    def _cleanDictFromEnd(jsonStr: str) -> str:
        """Clean dict from END: keep complete key-value pairs, remove incomplete ones at the end."""
        if not jsonStr.strip().startswith('{'):
            return ""

        result = ['{']
        i = 1  # Skip opening '{'
        first = True

        while i < len(jsonStr):
            # Skip whitespace
            while i < len(jsonStr) and jsonStr[i] in ' \n\r\t':
                i += 1

            if i >= len(jsonStr):
                break

            # Check if we hit closing brace
            if jsonStr[i] == '}':
                break

            # Skip comma
            if jsonStr[i] == ',':
                i += 1
                continue

            # Try to extract key-value pair
            keyStart = i
            # Find key (string)
            if jsonStr[i] == '"':
                i += 1
                while i < len(jsonStr) and jsonStr[i] != '"':
                    if jsonStr[i] == '\\':
                        i += 2
                    else:
                        i += 1
                if i < len(jsonStr):
                    i += 1  # Skip closing quote
            else:
                # Invalid key - stop here (incomplete at end)
                break

            # Skip whitespace and colon
            while i < len(jsonStr) and jsonStr[i] in ' \n\r\t:':
                i += 1

            if i >= len(jsonStr):
                break

            # Try to extract value
            valueStart = i
            valueEnd = JsonDataExtractor._findCompleteValue(jsonStr, i)

            if valueEnd > valueStart:
                # Try to parse this key-value pair
                pairStr = jsonStr[keyStart:valueEnd]
                try:
                    # Test if it's valid JSON
                    testStr = '{' + pairStr + '}'
                    json.loads(testStr)
                    # Valid pair - add it
                    if not first:
                        result.append(',')
                    result.append(pairStr)
                    first = False
                    i = valueEnd
                except Exception:
                    # Invalid pair - stop here (incomplete at end)
                    break
            else:
                # Incomplete value - stop here (incomplete at end)
                break

        result.append('}')
        return ''.join(result)

    @staticmethod
    def _cleanArrayFromEnd(jsonStr: str) -> str:
        """Clean array from END: keep complete elements, remove incomplete ones at the end."""
        if not jsonStr.strip().startswith('['):
            return ""

        result = ['[']
        i = 1  # Skip opening '['
        first = True

        while i < len(jsonStr):
            # Skip whitespace
            while i < len(jsonStr) and jsonStr[i] in ' \n\r\t':
                i += 1

            if i >= len(jsonStr):
                break

            # Check if we hit closing bracket
            if jsonStr[i] == ']':
                break

            # Skip comma
            if jsonStr[i] == ',':
                i += 1
                continue

            # Try to extract element
            elemStart = i
            elemEnd = JsonDataExtractor._findCompleteValue(jsonStr, i)

            if elemEnd > elemStart:
                # Try to parse this element
                elemStr = jsonStr[elemStart:elemEnd]
                try:
                    # Test if it's valid JSON
                    json.loads(elemStr)
                    # Valid element - add it
                    if not first:
                        result.append(',')
                    result.append(elemStr)
                    first = False
                    i = elemEnd
                except Exception:
                    # Invalid element - stop here (incomplete at end)
                    break
            else:
                # Incomplete element - stop here (incomplete at end)
                break

        result.append(']')
        return ''.join(result)

    @staticmethod
    def _findCompleteValue(jsonStr: str, start: int) -> int:
        """Find the end of a complete JSON value starting at start position."""
        if start >= len(jsonStr):
            return start

        i = start

        # Skip whitespace
        while i < len(jsonStr) and jsonStr[i] in ' \n\r\t':
            i += 1

        if i >= len(jsonStr):
            return start

        char = jsonStr[i]

        # String
        if char == '"':
            i += 1
            while i < len(jsonStr):
                if jsonStr[i] == '\\':
                    i += 2
                elif jsonStr[i] == '"':
                    return i + 1
                else:
                    i += 1
            return start  # Incomplete string

        # Number, boolean, null
        if char in '-0123456789tfn':
            while i < len(jsonStr) and jsonStr[i] not in ',}]':
                i += 1
            return i

        # Object
        if char == '{':
            braceCount = 1
            i += 1
            while i < len(jsonStr) and braceCount > 0:
                if jsonStr[i] == '\\':
                    i += 2
                elif jsonStr[i] == '"':
                    # Skip string
                    i += 1
                    while i < len(jsonStr):
                        if jsonStr[i] == '\\':
                            i += 2
                        elif jsonStr[i] == '"':
                            i += 1
                            break
                        else:
                            i += 1
                elif jsonStr[i] == '{':
                    braceCount += 1
                    i += 1
                elif jsonStr[i] == '}':
                    braceCount -= 1
                    i += 1
                else:
                    i += 1
            if braceCount == 0:
                return i
            return start  # Incomplete object

        # Array
        if char == '[':
            bracketCount = 1
            i += 1
            while i < len(jsonStr) and bracketCount > 0:
                if jsonStr[i] == '\\':
                    i += 2
                elif jsonStr[i] == '"':
                    # Skip string
                    i += 1
                    while i < len(jsonStr):
                        if jsonStr[i] == '\\':
                            i += 2
                        elif jsonStr[i] == '"':
                            i += 1
                            break
                        else:
                            i += 1
                elif jsonStr[i] == '[':
                    bracketCount += 1
                    i += 1
                elif jsonStr[i] == ']':
                    bracketCount -= 1
                    i += 1
                else:
                    i += 1
            if bracketCount == 0:
                return i
            return start  # Incomplete array

        return start

    @staticmethod
    def _extractAllCompleteObjects(jsonString: str) -> List[Dict[str, Any]]:
        """
        Extract ALL complete objects from JSON string using balanced brace matching.
        Ignores incomplete objects at the end.

        Core principle: Every fragment can be cut anywhere - extract only complete objects.
        """
        foundObjs = []
        braceCount = 0
        startPos = -1

        for i, char in enumerate(jsonString):
            if char == '{':
                if braceCount == 0:
                    startPos = i
                braceCount += 1
            elif char == '}':
                braceCount -= 1
                if braceCount == 0 and startPos >= 0:
                    # Found a complete object
                    objStr = jsonString[startPos:i+1]
                    try:
                        obj = json.loads(objStr)
                        if isinstance(obj, dict) and obj:
                            foundObjs.append(obj)
                    except Exception:
                        # Not valid JSON - skip it
                        pass
                    startPos = -1
                elif braceCount < 0:
                    # Unbalanced - reset
                    braceCount = 0
                    startPos = -1

        # If we end with an incomplete object (startPos >= 0 and braceCount > 0), ignore it
        # It will be in the next fragment

        return foundObjs

    @staticmethod
    def _extractElements(jsonString: str) -> List[Dict[str, Any]]:
        """Extract elements array from JSON string - extracts ALL complete elements."""
        elements = []

        # Pattern 1: Look for "elements": [...] (including incomplete at end)
        elementsPattern = r'"elements"\s*:\s*\[(.*)'
        match = re.search(elementsPattern, jsonString, re.DOTALL)
        if match:
            elementsContent = match.group(1)
            # Extract ALL complete element objects using balanced brace matching
            braceCount = 0
            startPos = -1
            for i, char in enumerate(elementsContent):
                if char == '{':
                    if braceCount == 0:
                        startPos = i
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                    if braceCount == 0 and startPos >= 0:
                        elementStr = elementsContent[startPos:i+1]
                        try:
                            element = json.loads(elementStr)
                            if isinstance(element, dict):
                                elements.append(element)
                        except Exception:
                            # Try to extract table rows from incomplete element
                            rows = JsonDataExtractor._extractTableRowsFromElement(elementStr)
                            if rows:
                                elements.append({
                                    "type": "table",
                                    "content": {
                                        "rows": rows
                                    }
                                })
                        startPos = -1
                    elif braceCount < 0:
                        break  # Unbalanced - stop

        # Pattern 2: Look for table structure directly (even if incomplete)
        if not elements:
            # Look for "type": "table" pattern
            tablePattern = r'"type"\s*:\s*"table"[^}]*"rows"\s*:\s*\[(.*?)(?:\]|$)'
            tableMatch = re.search(tablePattern, jsonString, re.DOTALL)
            if tableMatch:
                rowsContent = tableMatch.group(1)
                rows = JsonDataExtractor._extractRowsFromContent(rowsContent)
                if rows:
                    elements.append({
                        "type": "table",
                        "content": {
                            "rows": rows
                        }
                    })

        # Pattern 3: Look for table rows directly (without structure)
        if not elements:
            rows = JsonDataExtractor._extractTableRows(jsonString)
            if rows:
                elements.append({
                    "type": "table",
                    "content": {
                        "rows": rows
                    }
                })

        return elements

    @staticmethod
    def _extractTableRowsFromElement(elementStr: str) -> List[List[str]]:
        """Extract table rows from incomplete element string."""
        # Look for rows array in element
        rowsPattern = r'"rows"\s*:\s*\[(.*?)(?:\]|$)'
        match = re.search(rowsPattern, elementStr, re.DOTALL)
        if match:
            return JsonDataExtractor._extractRowsFromContent(match.group(1))
        return []

    @staticmethod
    def _extractRowsFromContent(rowsContent: str) -> List[List[str]]:
        """Extract rows from rows content string."""
        rows = []
        # Extract all array patterns: ["value1", "value2"]
        # Use non-greedy matching but ensure we get complete arrays
        arrayPattern = r'\[(.*?)\]'
        arrayMatches = re.findall(arrayPattern, rowsContent)
        for arrayContent in arrayMatches:
            # Extract cells - handle both quoted strings and numbers
            # First try to find quoted strings
            cellPattern = r'"([^"]*)"'
            cells = re.findall(cellPattern, arrayContent)
            # If no quoted strings, try numbers or other values
            if not cells:
                # Try to find any values (numbers, booleans, etc.)
                valuePattern = r'(-?\d+\.?\d*|true|false|null)'
                cells = re.findall(valuePattern, arrayContent)
            # Only add rows with at least 1 cell (allow single-column tables)
            if len(cells) >= 1:
                rows.append(cells)
        return rows

    @staticmethod
    def _extractTableRows(jsonString: str) -> List[List[str]]:
        """Extract table rows from JSON string using multiple strategies."""
        rows = []

        # Strategy 1: Look for "rows": [[...], [...]]
        rowsPattern = r'"rows"\s*:\s*\[(.*?)(?:\]|$)'
        match = re.search(rowsPattern, jsonString, re.DOTALL)
        if match:
            rowsContent = match.group(1)
            rows = JsonDataExtractor._extractRowsFromContent(rowsContent)
            if rows:
                return rows

        # Strategy 2: Look for standalone array patterns ["value1", "value2"]
        # Pattern for complete arrays with 2 columns
        completeArrayPattern = r'\["([^"]*)",\s*"([^"]*)"\]'
        matches = re.findall(completeArrayPattern, jsonString)
        if len(matches) >= 2:  # Need at least 2 rows to be confident
            return [[m[0], m[1]] for m in matches]

        # Strategy 3: Extract any array patterns (more lenient)
        # Find all [ ... ] patterns that contain quoted strings
        allArrays = re.findall(r'\[([^\]]*)\]', jsonString)
        for arrayContent in allArrays:
            # Extract quoted strings
            cells = re.findall(r'"([^"]*)"', arrayContent)
            if len(cells) >= 2:  # At least 2 columns
                rows.append(cells)

        # Only return if we have multiple rows (likely a table)
        if len(rows) >= 2:
            return rows

        return []

    @staticmethod
    def _extractDocuments(jsonString: str) -> List[Dict[str, Any]]:
        """
        Extract documents structure from JSON string - extracts ALL complete documents/chapters/sections.
        Ignores incomplete ones at the end.

        Core principle: Fragment can be cut anywhere - extract only complete objects.
        """
        documents = []

        # Pattern 1: Look for "documents": [...] structure (including incomplete at end)
        documentsPattern = r'"documents"\s*:\s*\[(.*)'
        match = re.search(documentsPattern, jsonString, re.DOTALL)
        if match:
            documentsContent = match.group(1)
            # Extract ALL complete document objects using balanced brace matching
            braceCount = 0
            startPos = -1
            for i, char in enumerate(documentsContent):
                if char == '{':
                    if braceCount == 0:
                        startPos = i
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                    if braceCount == 0 and startPos >= 0:
                        # Found a complete document object
                        docStr = documentsContent[startPos:i+1]
                        try:
                            doc = json.loads(docStr)
                            if isinstance(doc, dict):
                                # Extract chapters/sections from document
                                chapters = JsonDataExtractor._extractChaptersFromDocument(docStr)
                                sections = JsonDataExtractor._extractSectionsFromDocument(docStr)
                                if chapters:
                                    doc["chapters"] = chapters
                                if sections:
                                    doc["sections"] = sections
                                if doc:
                                    documents.append(doc)
                        except Exception:
                            # Not valid JSON - try to extract chapters/sections directly
                            chapters = JsonDataExtractor._extractChaptersFromDocument(docStr)
                            sections = JsonDataExtractor._extractSectionsFromDocument(docStr)
                            if chapters or sections:
                                doc = {}
                                if chapters:
                                    doc["chapters"] = chapters
                                if sections:
                                    doc["sections"] = sections
                                if doc:
                                    documents.append(doc)
                        startPos = -1
                    elif braceCount < 0:
                        break

            # If we end with an incomplete document (startPos >= 0 and braceCount > 0), ignore it
            # It will be in the next fragment

            if documents:
                return documents

        # Pattern 2: Look for "chapters": [...] pattern directly (fragment might start mid-document)
        chapters = JsonDataExtractor._extractChaptersFromString(jsonString)
        if chapters:
            documents.append({"chapters": chapters})

        # Pattern 3: Look for "sections": [...] pattern directly
        sections = JsonDataExtractor._extractSectionsFromString(jsonString)
        if sections:
            documents.append({"sections": sections})

        return documents

    @staticmethod
    def _extractChaptersFromDocument(docStr: str) -> List[Dict[str, Any]]:
        """Extract chapters array from document string."""
        return JsonDataExtractor._extractChaptersFromString(docStr)

    @staticmethod
    def _extractChaptersFromString(jsonString: str) -> List[Dict[str, Any]]:
        """
        Extract chapters array from JSON string - extracts ALL complete chapters.
        Ignores incomplete chapters at the end.

        Core principle: Fragment can be cut anywhere - extract only complete objects.
        """
        chapters = []

        # Look for "chapters": [...] pattern (including incomplete at end)
        chaptersPattern = r'"chapters"\s*:\s*\[(.*)'
        match = re.search(chaptersPattern, jsonString, re.DOTALL)
        if match:
            chaptersContent = match.group(1)
            # Extract ALL complete chapter objects using balanced brace matching
            braceCount = 0
            startPos = -1
            for i, char in enumerate(chaptersContent):
                if char == '{':
                    if braceCount == 0:
                        startPos = i
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                    if braceCount == 0 and startPos >= 0:
                        # Found a complete chapter object
                        chapterStr = chaptersContent[startPos:i+1]
                        try:
                            chapter = json.loads(chapterStr)
                            if isinstance(chapter, dict):
                                chapters.append(chapter)
                        except Exception:
                            # Not valid JSON - skip it (incomplete chapter)
                            pass
                        startPos = -1
                    elif braceCount < 0:
                        # Unbalanced - stop here
                        break

            # If we end with an incomplete chapter (startPos >= 0 and braceCount > 0), ignore it
            # It will be in the next fragment

        # Also try to extract chapters that might be standalone (fragment starts mid-array)
        # Look for complete chapter objects anywhere in the string
        if not chapters:
            # Try to find complete chapter objects using balanced brace matching
            allObjs = JsonDataExtractor._extractAllCompleteObjects(jsonString)
            # Filter for objects that look like chapters (have id and title)
            for obj in allObjs:
                if isinstance(obj, dict) and "id" in obj and "title" in obj:
                    chapters.append(obj)

        return chapters

    @staticmethod
    def _extractSectionsFromDocument(docStr: str) -> List[Dict[str, Any]]:
        """Extract sections array from document string."""
        return JsonDataExtractor._extractSectionsFromString(docStr)

    @staticmethod
    def _extractSectionsFromString(jsonString: str) -> List[Dict[str, Any]]:
        """Extract sections array from JSON string, even if incomplete."""
        sections = []

        # Look for "sections": [...]
        sectionsPattern = r'"sections"\s*:\s*\[(.*?)(?:\]|$)'
        match = re.search(sectionsPattern, jsonString, re.DOTALL)
        if match:
            sectionsContent = match.group(1)
            # Extract section objects using balanced brace matching
            braceCount = 0
            startPos = -1
            for i, char in enumerate(sectionsContent):
                if char == '{':
                    if braceCount == 0:
                        startPos = i
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                    if braceCount == 0 and startPos >= 0:
                        sectionStr = sectionsContent[startPos:i+1]
                        try:
                            section = json.loads(sectionStr)
                            if isinstance(section, dict):
                                sections.append(section)
                        except Exception:
                            # Incomplete section - try to extract what we can
                            idMatch = re.search(r'"id"\s*:\s*"([^"]*)"', sectionStr)
                            contentTypeMatch = re.search(r'"content_type"\s*:\s*"([^"]*)"', sectionStr)
                            if idMatch or contentTypeMatch:
                                section = {}
                                if idMatch:
                                    section["id"] = idMatch.group(1)
                                if contentTypeMatch:
                                    section["content_type"] = contentTypeMatch.group(1)
                                if section:
                                    sections.append(section)
                        startPos = -1

        return sections

    @staticmethod
    def _extractFiles(jsonString: str) -> List[Dict[str, Any]]:
        """Extract files array from JSON string, even if incomplete."""
        files = []

        # Look for "files": [...]
        filesPattern = r'"files"\s*:\s*\[(.*?)(?:\]|$)'
        match = re.search(filesPattern, jsonString, re.DOTALL)
        if match:
            filesContent = match.group(1)
            # Extract file objects using balanced brace matching
            braceCount = 0
            startPos = -1
            for i, char in enumerate(filesContent):
                if char == '{':
                    if braceCount == 0:
                        startPos = i
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                    if braceCount == 0 and startPos >= 0:
                        fileStr = filesContent[startPos:i+1]
                        try:
                            fileObj = json.loads(fileStr)
                            if isinstance(fileObj, dict):
                                files.append(fileObj)
                        except Exception:
                            # Incomplete file - try to extract what we can
                            idMatch = re.search(r'"id"\s*:\s*"([^"]*)"', fileStr)
                            filenameMatch = re.search(r'"filename"\s*:\s*"([^"]*)"', fileStr)
                            if idMatch or filenameMatch:
                                fileObj = {}
                                if idMatch:
                                    fileObj["id"] = idMatch.group(1)
                                if filenameMatch:
                                    fileObj["filename"] = filenameMatch.group(1)
                                if fileObj:
                                    files.append(fileObj)
                        startPos = -1

        return files

    @staticmethod
    def _extractImages(jsonString: str) -> List[Dict[str, Any]]:
        """Extract images array from JSON string, even if incomplete."""
        images = []

        # Look for "images": [...]
        imagesPattern = r'"images"\s*:\s*\[(.*?)(?:\]|$)'
        match = re.search(imagesPattern, jsonString, re.DOTALL)
        if match:
            imagesContent = match.group(1)
            # Extract image objects using balanced brace matching
            braceCount = 0
            startPos = -1
            for i, char in enumerate(imagesContent):
                if char == '{':
                    if braceCount == 0:
                        startPos = i
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                    if braceCount == 0 and startPos >= 0:
                        imageStr = imagesContent[startPos:i+1]
                        try:
                            image = json.loads(imageStr)
                            if isinstance(image, dict):
                                images.append(image)
                        except Exception:
                            # Incomplete image - try to extract what we can
                            idMatch = re.search(r'"id"\s*:\s*"([^"]*)"', imageStr)
                            urlMatch = re.search(r'"url"\s*:\s*"([^"]*)"', imageStr)
                            if idMatch or urlMatch:
                                image = {}
                                if idMatch:
                                    image["id"] = idMatch.group(1)
                                if urlMatch:
                                    image["url"] = urlMatch.group(1)
                                if image:
                                    images.append(image)
                        startPos = -1

        return images


class JsonStructureDetector:
    """Detects JSON structure type from extracted data."""

    @staticmethod
    def detect(data: Dict[str, Any], mergeId: Optional[str] = None) -> str:
        """
        Detect structure type from data - GENERIC approach.

        Only checks for top-level keys, no content analysis.

        Returns:
            Structure type: "elements", "documents", "files", "images", or "unknown"
        """
        if "elements" in data:
            structureType = "elements"
        elif "documents" in data:
            structureType = "documents"
        elif "files" in data:
            structureType = "files"
        elif "images" in data:
            structureType = "images"
        else:
            # Unknown structure - will be handled generically
            structureType = "unknown"

        if mergeId:
            JsonMergeLogger.logStep("DETECTION", f"Detected structure type: {structureType}", structureType)

        return structureType


class JsonDataMerger:
    """Merges JSON data intelligently with overlap detection."""

    @staticmethod
    def merge(
        accumulated: Dict[str, Any],
        newFragment: Dict[str, Any],
        structureType: str,
        mergeId: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Merge two JSON data structures.

        Args:
            accumulated: Previously accumulated data
            newFragment: New fragment data
            structureType: Detected structure type
            mergeId: Optional merge ID for logging

        Returns:
            Merged data structure
        """
        if mergeId:
            JsonMergeLogger.logStep("MERGING", f"Merging {structureType} structures", {
                "acc_keys": list(accumulated.keys()) if accumulated else [],
                "frag_keys": list(newFragment.keys()) if newFragment else []
            })

        if not accumulated:
            if mergeId:
                JsonMergeLogger.logStep("MERGING", "No accumulated data, returning fragment", newFragment)
            return newFragment if newFragment else {}
        if not newFragment:
            if mergeId:
                JsonMergeLogger.logStep("MERGING", "No fragment data, returning accumulated", accumulated)
            return accumulated

        # Merge based on structure type
        if structureType == "elements":
            result = JsonDataMerger._mergeElements(accumulated, newFragment)
        elif structureType == "documents":
            result = JsonDataMerger._mergeDocuments(accumulated, newFragment)
        elif structureType == "files":
            result = JsonDataMerger._mergeFiles(accumulated, newFragment)
        elif structureType == "images":
            result = JsonDataMerger._mergeImages(accumulated, newFragment)
        else:
            # Unknown structure - try to merge generically
            result = JsonDataMerger._mergeGeneric(accumulated, newFragment)

        if mergeId:
            JsonMergeLogger.logStep("MERGING", f"Merged {structureType} structures", result)

        return result

    @staticmethod
    def _mergeElements(accumulated: Dict[str, Any], newFragment: Dict[str, Any]) -> Dict[str, Any]:
        """Merge elements structures."""
        accElements = accumulated.get("elements", [])
        fragElements = newFragment.get("elements", [])

        if not accElements:
            return {"elements": fragElements} if fragElements else accumulated
        if not fragElements:
            return {"elements": accElements}

        # Merge elements with overlap detection
        mergedElements = JsonDataMerger._mergeElementList(accElements, fragElements)

        return {"elements": mergedElements}

    @staticmethod
    def _mergeElementList(accElements: List[Dict[str, Any]], fragElements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Merge two element lists with overlap detection."""
        if not accElements:
            return fragElements
        if not fragElements:
            return accElements

        # Special handling: if both have table elements, merge them intelligently
        accTables = [e for e in accElements if isinstance(e, dict) and e.get("type") == "table"]
        fragTables = [e for e in fragElements if isinstance(e, dict) and e.get("type") == "table"]

        if accTables and fragTables:
            # Merge table elements
            mergedTable = JsonDataMerger._mergeTableElements(accTables[0], fragTables[0])
            if mergedTable:
                # Replace tables with merged table
                otherAccElements = [e for e in accElements if not (isinstance(e, dict) and e.get("type") == "table")]
                otherFragElements = [e for e in fragElements if not (isinstance(e, dict) and e.get("type") == "table")]
                return otherAccElements + [mergedTable] + otherFragElements

        # Find overlap by comparing elements
        overlapStart = JsonDataMerger._findOverlap(accElements, fragElements, None, "elements")

        if overlapStart > 0:
            # Found overlap - remove overlapping elements from fragment
            merged = accElements + fragElements[overlapStart:]
            return merged
        else:
            # No overlap - append all
            return accElements + fragElements

    @staticmethod
    def _mergeTableElements(accTable: Dict[str, Any], fragTable: Dict[str, Any]) -> Dict[str, Any]:
        """Merge two table elements by merging their rows."""
        accRows = JsonDataMerger._getTableRows(accTable)
        fragRows = JsonDataMerger._getTableRows(fragTable)

        if not accRows:
            return fragTable
        if not fragRows:
            return accTable

        # Find overlap in rows
        overlapStart = JsonDataMerger._findOverlap(accRows, fragRows, None, "table_rows")

        # Merge rows
        mergedRows = accRows + fragRows[overlapStart:] if overlapStart > 0 else accRows + fragRows

        # Build merged table
        mergedTable = accTable.copy()
        content = mergedTable.get("content", {})
        if not isinstance(content, dict):
            content = {}
        content["rows"] = mergedRows

        # Preserve headers
        if "headers" not in content:
            fragContent = fragTable.get("content", {})
            if isinstance(fragContent, dict) and "headers" in fragContent:
                content["headers"] = fragContent["headers"]

        mergedTable["content"] = content
        return mergedTable

    @staticmethod
    def _findOverlap(accList: List[Any], fragList: List[Any], mergeId: Optional[str] = None, overlapType: str = "generic") -> int:
        """Find overlap between two lists. Returns index where overlap starts in fragList."""
        if not accList or not fragList:
            if mergeId:
                JsonMergeLogger.logOverlap(overlapType, 0)
            return 0

        # Try to find longest common suffix/prefix
        maxOverlap = min(len(accList), len(fragList))

        for overlapLen in range(maxOverlap, 0, -1):
            accSuffix = accList[-overlapLen:]
            fragPrefix = fragList[:overlapLen]

            # Compare elements
            if JsonDataMerger._listsEqual(accSuffix, fragPrefix):
                if mergeId:
                    JsonMergeLogger.logOverlap(overlapType, overlapLen, accSuffix, fragPrefix)
                return overlapLen

        if mergeId:
            JsonMergeLogger.logOverlap(overlapType, 0)
        return 0

    @staticmethod
    def _listsEqual(list1: List[Any], list2: List[Any]) -> bool:
        """Check if two lists are equal (deep comparison for dicts)."""
        if len(list1) != len(list2):
            return False

        for i in range(len(list1)):
            if isinstance(list1[i], dict) and isinstance(list2[i], dict):
                # Compare dicts by comparing their content
                if not JsonDataMerger._dictsEqual(list1[i], list2[i]):
                    return False
            elif list1[i] != list2[i]:
                return False

        return True

    @staticmethod
    def _dictsEqual(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> bool:
        """Check if two dicts are equal (comparing key content)."""
        # For table elements, compare rows
        if dict1.get("type") == "table" and dict2.get("type") == "table":
            rows1 = JsonDataMerger._getTableRows(dict1)
            rows2 = JsonDataMerger._getTableRows(dict2)
            return rows1 == rows2

        # For other elements, compare type and key content
        if dict1.get("type") != dict2.get("type"):
            return False

        # Compare content
        content1 = dict1.get("content", {})
        content2 = dict2.get("content", {})

        if isinstance(content1, dict) and isinstance(content2, dict):
            # Compare rows for tables
            if "rows" in content1 and "rows" in content2:
                return content1["rows"] == content2["rows"]
            # Compare items for lists
            if "items" in content1 and "items" in content2:
                return content1["items"] == content2["items"]

        return dict1 == dict2

    @staticmethod
    def _getTableRows(element: Dict[str, Any]) -> List[List[str]]:
        """Extract table rows from element."""
        content = element.get("content", {})
        if isinstance(content, dict):
            return content.get("rows", [])
        return element.get("rows", [])

    @staticmethod
    def _mergeDocuments(accumulated: Dict[str, Any], newFragment: Dict[str, Any]) -> Dict[str, Any]:
        """Merge documents structures."""
        accDocs = accumulated.get("documents", [])
        fragDocs = newFragment.get("documents", [])

        if not accDocs:
            return {"documents": fragDocs} if fragDocs else accumulated
        if not fragDocs:
            return {"documents": accDocs}

        # Merge documents (simplified - would need proper merging logic)
        mergedDocs = accDocs + fragDocs
        return {"documents": mergedDocs}

    @staticmethod
    def _mergeFiles(accumulated: Dict[str, Any], newFragment: Dict[str, Any]) -> Dict[str, Any]:
        """Merge files structures."""
        accFiles = accumulated.get("files", [])
        fragFiles = newFragment.get("files", [])

        if not accFiles:
            return {"files": fragFiles} if fragFiles else accumulated
        if not fragFiles:
            return {"files": accFiles}

        mergedFiles = accFiles + fragFiles
        return {"files": mergedFiles}

    @staticmethod
    def _mergeImages(accumulated: Dict[str, Any], newFragment: Dict[str, Any]) -> Dict[str, Any]:
        """Merge images structures."""
        accImages = accumulated.get("images", [])
        fragImages = newFragment.get("images", [])

        if not accImages:
            return {"images": fragImages} if fragImages else accumulated
        if not fragImages:
            return {"images": accImages}

        mergedImages = accImages + fragImages
        return {"images": mergedImages}

    @staticmethod
    def _mergeGeneric(accumulated: Dict[str, Any], newFragment: Dict[str, Any]) -> Dict[str, Any]:
        """Generic merge for unknown structures."""
        # Try to merge by combining keys
        merged = accumulated.copy()
        for key, value in newFragment.items():
            if key in merged:
                # Key exists - try to merge values
                if isinstance(merged[key], list) and isinstance(value, list):
                    merged[key] = merged[key] + value
                elif isinstance(merged[key], dict) and isinstance(value, dict):
                    merged[key] = JsonDataMerger._mergeGeneric(merged[key], value)
                else:
                    merged[key] = value
            else:
                merged[key] = value

        return merged


class JsonResultBuilder:
    """Builds final JSON result, ensuring it's always valid."""

    @staticmethod
    def build(mergedData: Dict[str, Any], structureType: str, mergeId: Optional[str] = None) -> str:
        """
        Build final JSON string from merged data.

        Args:
            mergedData: Merged data structure
            structureType: Detected structure type

        Returns:
            Valid JSON string (never empty)
        """
        if not mergedData:
            # Return empty structure based on type
            if structureType == "elements":
                return json.dumps({"elements": []}, indent=2, ensure_ascii=False)
            elif structureType == "documents":
                return json.dumps({"documents": [{}]}, indent=2, ensure_ascii=False)
            elif structureType == "files":
                return json.dumps({"files": []}, indent=2, ensure_ascii=False)
            elif structureType == "images":
                return json.dumps({"images": []}, indent=2, ensure_ascii=False)
            else:
                return json.dumps({}, indent=2, ensure_ascii=False)

        # Ensure structure is correct - GENERIC approach
        if structureType == "elements" and "elements" not in mergedData:
            # Try to wrap data in elements structure
            if isinstance(mergedData, dict):
                # Generic: If it has any data, wrap it as an element
                if mergedData:
                    mergedData = {"elements": [mergedData]}
                    if mergeId:
                        JsonMergeLogger.logStep("BUILDING", "Wrapping single object as element (generic)", mergedData)
                else:
                    # Empty dict - return empty elements
                    mergedData = {"elements": []}

        elif structureType == "documents" and "documents" not in mergedData:
            # Try to wrap data in documents structure
            if isinstance(mergedData, dict):
                if mergedData:
                    # Generic: Wrap single object in documents structure
                    # Try to detect if it should be chapters or sections by checking accumulated data
                    # But for now, use generic approach: wrap in documents with a generic key
                    mergedData = {"documents": [mergedData]}
                    if mergeId:
                        JsonMergeLogger.logStep("BUILDING", "Wrapping single object in documents structure (generic)", mergedData)
                else:
                    mergedData = {"documents": [{}]}

        elif structureType == "files" and "files" not in mergedData:
            # Try to wrap data in files structure
            if isinstance(mergedData, dict):
                if mergedData:
                    mergedData = {"files": [mergedData]}
                    if mergeId:
                        JsonMergeLogger.logStep("BUILDING", "Wrapping single object in files structure (generic)", mergedData)
                else:
                    mergedData = {"files": []}

        elif structureType == "images" and "images" not in mergedData:
            # Try to wrap data in images structure
            if isinstance(mergedData, dict):
                if mergedData:
                    mergedData = {"images": [mergedData]}
                    if mergeId:
                        JsonMergeLogger.logStep("BUILDING", "Wrapping single object in images structure (generic)", mergedData)
                else:
                    mergedData = {"images": []}

        elif structureType == "unknown" and isinstance(mergedData, dict) and mergedData:
            # Unknown structure but has data - wrap generically as elements
            mergedData = {"elements": [mergedData]}
            if mergeId:
                JsonMergeLogger.logStep("BUILDING", "Unknown structure, wrapping as elements (generic)", mergedData)

        # Clean data structure before serialization
        cleanedData = JsonResultBuilder._cleanDataStructure(mergedData)

        # Try to serialize
        try:
            jsonString = json.dumps(cleanedData, indent=2, ensure_ascii=False)

            # Validate the JSON string by trying to parse it
            try:
                parsed, parseErr, _ = tryParseJson(jsonString)
                if parseErr is None:
                    # Valid JSON - return it
                    return jsonString
                else:
                    # Invalid JSON - try to repair
                    logger.warning(f"Generated JSON is invalid: {parseErr}, attempting repair")
                    repaired = closeJsonStructures(jsonString)
                    parsed2, parseErr2, _ = tryParseJson(repaired)
                    if parseErr2 is None:
                        return repaired
                    else:
                        # Repair failed - return minimal valid structure
                        logger.error(f"Repair failed: {parseErr2}, returning minimal structure")
                        return json.dumps({"elements": []}, indent=2, ensure_ascii=False)
            except Exception as parseEx:
                # Parse validation failed - try repair
                logger.warning(f"Parse validation failed: {parseEx}, attempting repair")
                try:
                    repaired = closeJsonStructures(jsonString)
                    parsed2, parseErr2, _ = tryParseJson(repaired)
                    if parseErr2 is None:
                        return repaired
                except Exception:
                    pass
                # Return minimal valid structure
                return json.dumps({"elements": []}, indent=2, ensure_ascii=False)

        except (TypeError, ValueError) as e:
            logger.error(f"Error serializing JSON: {e}")
            # Try to clean more aggressively and retry
            try:
                cleanedData2 = JsonResultBuilder._cleanDataStructure(cleanedData, aggressive=True)
                jsonString = json.dumps(cleanedData2, indent=2, ensure_ascii=False)
                # Validate
                parsed, parseErr, _ = tryParseJson(jsonString)
                if parseErr is None:
                    return jsonString
            except Exception:
                pass
            # Fallback to empty structure
            return json.dumps({"elements": []}, indent=2, ensure_ascii=False)
        except Exception as e:
            logger.error(f"Unexpected error building JSON: {e}")
            # Fallback to empty structure
            return json.dumps({"elements": []}, indent=2, ensure_ascii=False)

    @staticmethod
    def _cleanDataStructure(data: Any, aggressive: bool = False) -> Any:
        """
        Clean data structure to ensure it's JSON-serializable.

        Removes None values, ensures lists contain only valid items,
        and repairs incomplete structures.
        """
        if data is None:
            return {} if aggressive else None

        if isinstance(data, dict):
            cleaned = {}
            for key, value in data.items():
                if value is None and aggressive:
                    continue  # Skip None values in aggressive mode
                cleaned[key] = JsonResultBuilder._cleanDataStructure(value, aggressive)
            return cleaned

        elif isinstance(data, list):
            cleaned = []
            for item in data:
                cleanedItem = JsonResultBuilder._cleanDataStructure(item, aggressive)
                if cleanedItem is not None or not aggressive:
                    cleaned.append(cleanedItem)
            return cleaned

        elif isinstance(data, (str, int, float, bool)):
            return data

        else:
            # Unknown type - try to convert to string or skip
            if aggressive:
                return str(data)
            return data


class ModularJsonMerger:
    """
    Modular JSON Merger - Main entry point.

    Simple pipeline:
    1. Find overlap between JSON strings
    2. Merge strings together
    3. Parse and clean the merged JSON
    """

    @staticmethod
    def _findStringOverlap(accStr: str, fragStr: str, mergeId: Optional[str] = None) -> int:
        """
        Find overlap between two JSON strings - GENERIC solution.

        Works for any JSON structure (arrays, objects, nested, minified, formatted).
        Uses multiple strategies to find overlap regardless of JSON format.

        Strategy:
        1. Exact suffix/prefix match (fastest, works for any format)
        2. Structure-aware: Find last complete JSON elements in accumulated that match start of fragment
        3. Line-based: If JSON is formatted, use line matching (for better performance)
        4. Partial match: Handle incomplete elements at cut point

        Returns the length of the overlap (number of characters).
        """
        if not accStr or not fragStr:
            if mergeId:
                JsonMergeLogger.logOverlap("string", 0)
            return 0

        # Strategy 1: Try exact suffix/prefix match (fastest, works for any format)
        maxOverlap = min(len(accStr), len(fragStr))

        # Start from maximum possible overlap and work backwards
        for overlapLen in range(maxOverlap, 0, -1):
            accSuffix = accStr[-overlapLen:]
            fragPrefix = fragStr[:overlapLen]

            if accSuffix == fragPrefix:
                if mergeId:
                    JsonMergeLogger.logOverlap("string (exact)", overlapLen, accSuffix[:200], fragPrefix[:200])
                return overlapLen

        # Strategy 2: Structure-aware overlap detection (GENERIC - works for any JSON structure)
        # Find last complete JSON elements in accumulated and check if they appear at start of fragment
        overlapLen = ModularJsonMerger._findStructureBasedOverlap(accStr, fragStr, mergeId)
        if overlapLen > 0:
            return overlapLen

        # Strategy 3: Line-based overlap (works well for formatted JSON)
        # Only use if JSON appears to be formatted (has newlines)
        if '\n' in accStr and '\n' in fragStr:
            overlapLen = ModularJsonMerger._findLineBasedOverlap(accStr, fragStr, mergeId)
            if overlapLen > 0:
                return overlapLen

        # Strategy 4: Partial overlap (incomplete element at cut point)
        overlapLen = ModularJsonMerger._findPartialOverlap(accStr, fragStr, mergeId)
        if overlapLen > 0:
            return overlapLen

        if mergeId:
            JsonMergeLogger.logOverlap("string", 0)
        return 0

    @staticmethod
    def _findStructureBasedOverlap(accStr: str, fragStr: str, mergeId: Optional[str] = None) -> int:
        """
        Find overlap by detecting complete JSON elements (structure-aware, GENERIC).

        Works for ANY JSON structure:
        - Arrays: Finds last complete array elements
        - Objects: Finds last complete object properties
        - Nested structures: Recursively finds complete elements
        - Minified or formatted JSON: Structure-aware, not format-dependent
        - Any use case: section_content, chapter_structure, code_structure, etc.

        Strategy: Find last complete JSON elements in accumulated that match start of fragment.
        Uses balanced bracket/brace matching to identify complete elements regardless of format.
        """
        accTrimmed = accStr.rstrip()
        fragTrimmed = fragStr.lstrip()

        if not accTrimmed or not fragTrimmed:
            return 0

        # Find last complete elements in accumulated by parsing backwards
        # Look for complete array elements or object properties

        # Strategy: Find where accumulated has complete elements at the end
        # and check if fragment starts with the same elements

        # Use a sliding window approach: check different suffix lengths from accumulated
        maxCheckLength = min(2000, len(accTrimmed), len(fragTrimmed))

        # Check in reverse order (largest to smallest) to find longest overlap first
        for checkLen in range(maxCheckLength, 50, -5):  # Step by 5 for performance
            if checkLen > len(accTrimmed) or checkLen > len(fragTrimmed):
                continue

            accSuffix = accTrimmed[-checkLen:]
            fragPrefix = fragTrimmed[:checkLen]

            # Check if accSuffix ends with complete JSON element(s) and fragPrefix starts with same
            # A complete element ends with proper closing brackets/braces

            # Verify that accSuffix ends with complete structure
            # and fragPrefix starts with the same structure
            if ModularJsonMerger._isCompleteJsonElement(accSuffix) and \
               ModularJsonMerger._startsWithSameElement(accSuffix, fragPrefix):
                # Found overlap! Verify it's meaningful (not just whitespace)
                if len(accSuffix.strip()) > 20:
                    if mergeId:
                        JsonMergeLogger.logOverlap("string (structure-based)", checkLen, accSuffix[:200], fragPrefix[:200])
                    return checkLen

        # Alternative: Try to find common substring that represents complete elements
        # Look for patterns like complete array rows or object properties
        # Check last 500 chars of accumulated against first 500 chars of fragment
        checkWindow = min(500, len(accTrimmed), len(fragTrimmed))
        if checkWindow > 100:
            accWindow = accTrimmed[-checkWindow:]
            fragWindow = fragTrimmed[:checkWindow]

            # Find longest common substring that represents complete elements
            # Look for boundaries like ], [ or }, { or ", "
            for i in range(checkWindow - 50, 50, -5):
                accSub = accWindow[-i:]
                fragSub = fragWindow[:i]

                if accSub == fragSub:
                    # Check if it's a complete element boundary
                    if ModularJsonMerger._isCompleteElementBoundary(accSub):
                        if mergeId:
                            JsonMergeLogger.logOverlap("string (structure-boundary)", i, accSub[:200], fragSub[:200])
                        return i

        return 0

    @staticmethod
    def _isCompleteJsonElement(jsonStr: str) -> bool:
        """Check if string ends with a complete JSON element (balanced brackets/braces)."""
        jsonStr = jsonStr.strip()
        if not jsonStr:
            return False

        # Check if it ends with complete structure markers
        # Complete array element: ends with ] or ], or ],
        # Complete object element: ends with } or }, or },
        if jsonStr[-1] in ']}':
            # Check if brackets/braces are balanced
            braceCount = jsonStr.count('{') - jsonStr.count('}')
            bracketCount = jsonStr.count('[') - jsonStr.count(']')
            return braceCount == 0 and bracketCount == 0

        return False

    @staticmethod
    def _startsWithSameElement(accSuffix: str, fragPrefix: str) -> bool:
        """Check if fragment prefix starts with the same element as accumulated suffix."""
        # Normalize whitespace for comparison
        accNorm = accSuffix.strip()
        fragNorm = fragPrefix.strip()

        # Check if fragPrefix starts with accSuffix (or vice versa for partial matches)
        if fragNorm.startswith(accNorm):
            return True

        # Check if they have common prefix (for partial element completion)
        minLen = min(len(accNorm), len(fragNorm))
        if minLen > 20:
            # Check if first 80% of accSuffix matches start of fragPrefix
            checkLen = int(minLen * 0.8)
            return accNorm[:checkLen] == fragNorm[:checkLen]

        return False

    @staticmethod
    def _isCompleteElementBoundary(jsonStr: str) -> bool:
        """Check if string represents a complete element boundary (e.g., ], [ or }, {)."""
        jsonStr = jsonStr.strip()
        if not jsonStr:
            return False

        # Check if it contains complete element boundaries
        # Pattern: ends with ], or }, or ],\n or },\n
        if jsonStr.rstrip().endswith(('],', '},', ']', '}')):
            return True

        # Check if it's a complete array element or object property
        if '],' in jsonStr or '},' in jsonStr:
            return True

        return False

    @staticmethod
    def _findLineBasedOverlap(accStr: str, fragStr: str, mergeId: Optional[str] = None) -> int:
        """
        Find overlap using line-based matching (for formatted JSON).
        """
        accLines = accStr.rstrip().split('\n')
        fragLines = fragStr.lstrip().split('\n')

        # Try to find matching lines from the end of accumulated at the start of fragment
        maxLinesToCheck = min(10, len(accLines), len(fragLines))

        for numLines in range(maxLinesToCheck, 0, -1):
            # Get last N lines from accumulated (excluding empty lines)
            accLastLines = [line.strip() for line in accLines[-numLines:] if line.strip()]
            # Get first N lines from fragment (excluding empty lines)
            fragFirstLines = [line.strip() for line in fragLines[:numLines] if line.strip()]

            # Check if they match
            if len(accLastLines) > 0 and len(fragFirstLines) > 0:
                # Try to find where accLastLines match fragFirstLines
                for i in range(len(accLastLines)):
                    # Check if accLastLines[i:] matches fragFirstLines[:len(accLastLines)-i]
                    accSuffixLines = accLastLines[i:]
                    fragPrefixLines = fragFirstLines[:len(accSuffixLines)]

                    if accSuffixLines == fragPrefixLines and len(accSuffixLines) > 0:
                        # Found overlap! Calculate character length
                        accSuffixText = '\n'.join(accLastLines[i:])
                        fragPrefixText = '\n'.join(fragPrefixLines)

                        # Find where this text appears in the original strings
                        accPos = accStr.rfind(accSuffixText)
                        fragPos = fragStr.find(fragPrefixText)

                        if accPos >= 0 and fragPos == 0:
                            # Found valid overlap
                            overlapLen = len(accSuffixText)
                            if mergeId:
                                JsonMergeLogger.logOverlap("string (line-based)", overlapLen, accSuffixText[:200], fragPrefixText[:200])
                            return overlapLen

        return 0

    @staticmethod
    def _findPartialOverlap(accStr: str, fragStr: str, mergeId: Optional[str] = None) -> int:
        """
        Find partial overlap (incomplete element at cut point).
        """
        accLines = accStr.rstrip().split('\n')
        fragLines = fragStr.lstrip().split('\n')

        if accLines and fragLines:
            lastAccLine = accLines[-1].strip()
            firstFragLine = fragLines[0].strip()

            # Check if lastAccLine is a prefix of firstFragLine (incomplete line completed)
            if lastAccLine and firstFragLine.startswith(lastAccLine):
                # Also check if there are more matching lines after
                overlapLen = len(lastAccLine)
                # Try to extend overlap with more lines
                for i in range(1, min(len(accLines), len(fragLines))):
                    if accLines[-1-i].strip() == fragLines[i].strip():
                        overlapLen += len('\n' + fragLines[i])
                    else:
                        break

                if overlapLen > 20:  # Only if meaningful overlap
                    if mergeId:
                        JsonMergeLogger.logOverlap("string (partial line)", overlapLen, lastAccLine[:200], firstFragLine[:200])
                    return overlapLen

        return 0

    @staticmethod
    def _mergeStrings(accStr: str, fragStr: str, overlapLength: int) -> str:
        """
        Merge two JSON strings together, removing the overlap.
        Handles whitespace at cut points properly for seamless merging.
        """
        if overlapLength > 0:
            # Remove overlap from fragment and append
            # CRITICAL: Handle whitespace properly - if accumulated ends with whitespace
            # and fragment starts with the same content, we need to preserve whitespace structure
            merged = accStr + fragStr[overlapLength:]
        else:
            # No overlap - just concatenate (might need comma or other separator)
            # CRITICAL: Preserve whitespace structure when merging

            # Get trailing whitespace from accumulated (spaces, tabs, but not newlines)
            accTrailingWs = ""
            i = len(accStr) - 1
            while i >= 0 and accStr[i] in [' ', '\t']:
                accTrailingWs = accStr[i] + accTrailingWs
                i -= 1

            # Get leading whitespace from fragment (spaces, tabs, but not newlines)
            fragLeadingWs = ""
            i = 0
            while i < len(fragStr) and fragStr[i] in [' ', '\t']:
                fragLeadingWs += fragStr[i]
                i += 1

            # Trim for content detection but preserve whitespace structure
            accTrimmed = accStr.rstrip().rstrip(',')
            fragTrimmed = fragStr.lstrip().lstrip(',')

            # Check if we need a separator
            if accTrimmed and fragTrimmed:
                # If accumulated ends with } or ] and fragment starts with { or [, we might need comma
                if (accTrimmed[-1] in '}]' and fragTrimmed[0] in '{['):
                    # Add comma with appropriate whitespace
                    merged = accTrimmed + ',' + fragLeadingWs + fragTrimmed
                else:
                    # Merge with preserved whitespace structure
                    # Use the whitespace from fragment (it knows the proper spacing)
                    merged = accTrimmed + accTrailingWs + fragLeadingWs + fragTrimmed
            else:
                # One is empty - just concatenate with preserved whitespace
                merged = accStr + fragStr

        return merged

    @staticmethod
    def merge(accumulated: str, newFragment: str) -> Tuple[str, bool]:
        """
        Merge two JSON fragments intelligently.

        Args:
            accumulated: Previously accumulated JSON string
            newFragment: New fragment JSON string

        Returns:
            Tuple of (merged_json_string, has_overlap):
            - merged_json_string: Merged JSON string (closed if no overlap, unclosed if overlap found)
            - has_overlap: True if overlap was found (iterations should continue), False if no overlap (iterations should stop)
        """
        # Start logging
        mergeId = JsonMergeLogger.startMerge(accumulated, newFragment)

        if not accumulated:
            result = newFragment if newFragment else "{}"
            JsonMergeLogger.finishMerge(mergeId, result, True)
            return (result, False)  # No overlap if no accumulated data
        if not newFragment:
            JsonMergeLogger.finishMerge(mergeId, accumulated, True)
            return (accumulated, False)  # No overlap if no new fragment

        try:
            # Normalize both strings
            accNormalized = stripCodeFences(normalizeJsonText(accumulated)).strip()
            fragNormalized = stripCodeFences(normalizeJsonText(newFragment)).strip()

            JsonMergeLogger._log(f"\n  Normalized Accumulated ({len(accNormalized)} chars)")
            accNormLines = accNormalized.split('\n')
            if len(accNormLines) > 10:
                JsonMergeLogger._log(f"  (showing first 5 and last 5 of {len(accNormLines)} lines)")
                for line in accNormLines[:5]:
                    JsonMergeLogger._log(f"  {line}")
                JsonMergeLogger._log(f"  ... ({len(accNormLines) - 10} lines omitted) ...")
                for line in accNormLines[-5:]:
                    JsonMergeLogger._log(f"  {line}")
            else:
                for line in accNormLines:
                    JsonMergeLogger._log(f"  {line}")
            JsonMergeLogger._log(f"\n  Normalized New Fragment ({len(fragNormalized)} chars)")
            fragNormLines = fragNormalized.split('\n')
            if len(fragNormLines) > 10:
                JsonMergeLogger._log(f"  (showing first 5 and last 5 of {len(fragNormLines)} lines)")
                for line in fragNormLines[:5]:
                    JsonMergeLogger._log(f"  {line}")
                JsonMergeLogger._log(f"  ... ({len(fragNormLines) - 10} lines omitted) ...")
                for line in fragNormLines[-5:]:
                    JsonMergeLogger._log(f"  {line}")
            else:
                for line in fragNormLines:
                    JsonMergeLogger._log(f"  {line}")

            # Step 1: Find overlap between JSON strings
            JsonMergeLogger.logStep("PHASE 1", "Finding overlap between JSON strings", None)
            overlapLength = ModularJsonMerger._findStringOverlap(accNormalized, fragNormalized, mergeId)

            if overlapLength > 0:
                accSuffix = accNormalized[-overlapLength:]
                fragPrefix = fragNormalized[:overlapLength]
                JsonMergeLogger._log(f"\n  Overlap found ({overlapLength} chars):")
                JsonMergeLogger._log(f"  Accumulated suffix: {accSuffix}")
                JsonMergeLogger._log(f"  Fragment prefix: {fragPrefix}")
            else:
                # CRITICAL: No overlap found - this means iterations should stop
                JsonMergeLogger._log(f"\n  ⚠️  NO OVERLAP FOUND - This indicates iterations should stop")
                JsonMergeLogger._log(f"  Closing JSON and returning final result")

                # Close the accumulated JSON (it's complete as far as we can tell)
                closedJson = closeJsonStructures(accNormalized)
                JsonMergeLogger._log(f"\n  Closed JSON ({len(closedJson)} chars):")
                JsonMergeLogger._log("  " + "="*78)
                for line in closedJson.split('\n'):
                    JsonMergeLogger._log(f"  {line}")
                JsonMergeLogger._log("  " + "="*78)

                JsonMergeLogger.finishMerge(mergeId, closedJson, True)
                # Return closed JSON with has_overlap=False to indicate iterations should stop
                return (closedJson, False)

            # Step 2: Merge strings together (only if overlap was found)
            JsonMergeLogger.logStep("PHASE 2", f"Merging strings (overlap: {overlapLength} chars)", None)
            mergedString = ModularJsonMerger._mergeStrings(accNormalized, fragNormalized, overlapLength)

            JsonMergeLogger._log(f"\n  Merged String ({len(mergedString)} chars)")
            mergedLines = mergedString.split('\n')
            if len(mergedLines) > 10:
                JsonMergeLogger._log(f"  (showing first 5 and last 5 of {len(mergedLines)} lines)")
                for line in mergedLines[:5]:
                    JsonMergeLogger._log(f"  {line}")
                JsonMergeLogger._log(f"  ... ({len(mergedLines) - 10} lines omitted) ...")
                for line in mergedLines[-5:]:
                    JsonMergeLogger._log(f"  {line}")
            else:
                for line in mergedLines:
                    JsonMergeLogger._log(f"  {line}")

            # Step 3: Return merged string (with incomplete element at end for next iteration)
            JsonMergeLogger.logStep("PHASE 3", "Returning merged string (may be unclosed)", None)
            JsonMergeLogger._log(f"\n  Returning merged string (preserving incomplete element at end for next iteration)")

            JsonMergeLogger.finishMerge(mergeId, mergedString, True)
            # Return merged string with has_overlap=True to indicate iterations should continue
            return (mergedString, True)

        except Exception as e:
            logger.error(f"Error in modular merger: {e}")
            JsonMergeLogger.logStep("ERROR", f"Exception occurred: {str(e)}", None, error=str(e))
            # Fallback: try to return accumulated if valid
            try:
                accParsed, accErr, _ = tryParseJson(accumulated)
                if accErr is None:
                    JsonMergeLogger.finishMerge(mergeId, accumulated, False)
                    return (accumulated, False)  # No overlap on error
            except Exception:
                pass
            # Last resort: return empty valid JSON
            fallback = json.dumps({"elements": []}, indent=2, ensure_ascii=False)
            JsonMergeLogger.finishMerge(mergeId, fallback, False)
            return (fallback, False)  # No overlap on error