# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import json
import logging
import re
from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar
from pydantic import BaseModel, ValidationError
from modules.datamodels.datamodelAi import ContinuationContext

logger = logging.getLogger(__name__)

T = TypeVar('T', bound=BaseModel)


def stripCodeFences(text: str) -> str:
    """Remove ```json / ``` fences and surrounding whitespace if present.
    Also removes [SOURCE: ...] and [END SOURCE] tags that may wrap the JSON."""
    if not text:
        return text
    s = text.strip()
    
    # Remove [SOURCE: ...] tags at the beginning
    if s.startswith("[SOURCE:"):
        # Find the end of the SOURCE tag (newline or end of string)
        end_pos = s.find("\n")
        if end_pos != -1:
            s = s[end_pos+1:]
        else:
            # No newline, entire string is SOURCE tag
            return ""
    
    # Remove [END SOURCE] tags at the end
    if s.endswith("[END SOURCE]"):
        # Find the start of END SOURCE tag (newline before it)
        start_pos = s.rfind("\n[END SOURCE]")
        if start_pos != -1:
            s = s[:start_pos]
        else:
            # No newline, entire string is END SOURCE tag
            return ""
    
    # Handle opening fence (may or may not have closing fence)
    if s.startswith("```"):
        # Remove first triple backticks
        # Commonly starts with ```json\n
        i = 3
        # Skip optional language tag like 'json'
        while i < len(s) and s[i] != '\n':
            i += 1
        if i < len(s) and s[i] == '\n':
            s = s[i+1:]
        # Strip trailing ``` if present
        if s.endswith("```"):
            s = s[:-3]
        return s.strip()
    return s


def extractFirstBalancedJson(text: str) -> str:
    """Return the first balanced JSON object/array substring; otherwise return trimmed input."""
    if not text:
        return text
    s = text.strip()
    # Find first '{' or '['
    brace = s.find('{')
    bracket = s.find('[')
    start = -1
    if brace != -1 and (bracket == -1 or brace < bracket):
        start = brace
    elif bracket != -1:
        start = bracket
    if start == -1:
        return s
    # Scan for matching close using a simple stack
    stack: List[str] = []
    for i in range(start, len(s)):
        ch = s[i]
        if ch in '{[':
            stack.append(ch)
        elif ch in '}]':
            if not stack:
                continue
            opener = stack.pop()
            if (opener == '{' and ch != '}') or (opener == '[' and ch != ']'):
                continue
            if not stack:
                return s[start:i+1].strip()
    return s


def normalizeJsonText(text: str) -> str:
    """Light normalization: remove BOM, normalize smart quotes."""
    if not text:
        return text
    s = text
    # Remove UTF-8 BOM if present
    if s.startswith('\ufeff'):
        s = s.lstrip('\ufeff')
    # Normalize smart quotes to straight quotes
    s = s.replace('“', '"').replace('”', '"').replace('’', "'").replace('‘', "'")
    return s


def extractJsonString(text: str) -> str:
    """Strip code fences, normalize, then extract first balanced JSON substring."""
    s = normalizeJsonText(text)
    s = stripCodeFences(s)
    s = extractFirstBalancedJson(s)
    return s.strip()


def tryParseJson(text: Union[str, bytes]) -> Tuple[Optional[Union[Dict, List]], Optional[Exception], str]:
    """Extract and parse JSON; return (obj, error, cleaned_str)."""
    if isinstance(text, bytes):
        try:
            text = text.decode('utf-8', errors='replace')
        except Exception:
            text = str(text)
    cleaned = extractJsonString(text or "")
    try:
        return json.loads(cleaned), None, cleaned
    except Exception as e:
        return None, e, cleaned


def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
    """
    Attempt to repair broken JSON using multiple strategies.
    Generic solution that works for any content type.
    Returns the best repair attempt or None if all fail.
    
    IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
    Only uses truncation as a last resort when structure closing fails.
    """
    if not text:
        return None
    
    # Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
    # This preserves all data and should be tried first
    closedStr = closeJsonStructures(text)
    obj, err, _ = tryParseJson(closedStr)
    if err is None and isinstance(obj, dict):
        sections = extractSectionsFromDocument(obj)
        if sections:
            logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
            return obj
        else:
            # Structure closing worked but no sections found - still return it
            logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
            return obj
    
    # Strategy 2: Try to extract sections from the entire text using regex
    # This handles cases where the JSON structure is broken but content is intact
    # NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
    extractedSections = _extractSectionsRegex(text)
    if extractedSections:
        logger.info(f"Extracted {len(extractedSections)} sections using regex")
        return {
            "metadata": {
                "split_strategy": "single_document",
                "source_documents": [],
                "extraction_method": "ai_generation"
            },
            "documents": [{"sections": extractedSections}]
        }
    
    # Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
    # WARNING: This strategy truncates the input and loses data after the truncation point
    # Only use as last resort when other strategies fail
    logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
    bestResult = None
    bestValidLength = 0
    
    # Try different step sizes to find the best valid JSON
    for stepSize in [100, 50, 10, 1]:
        for i in range(len(text), 0, -stepSize):
            testStr = text[:i]
            closedStr = closeJsonStructures(testStr)
            obj, err, _ = tryParseJson(closedStr)
            if err is None and isinstance(obj, dict):
                bestResult = obj
                bestValidLength = i
                logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
                break
        if bestResult:
            break
    
    if bestResult:
        logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
        
        # Check if we have sections in the result
        sections = extractSectionsFromDocument(bestResult)
        if sections:
            logger.info(f"Progressive parsing found {len(sections)} sections")
            return bestResult
        else:
            # No sections found in progressive parsing, try to extract from broken part
            logger.info("Progressive parsing found no sections, trying to extract from broken part")
            extractedSections = _extractSectionsRegex(text[bestValidLength:])
            if extractedSections:
                logger.info(f"Extracted {len(extractedSections)} sections from broken part")
                # Merge with the valid part
                if "documents" not in bestResult:
                    bestResult["documents"] = []
                if not bestResult["documents"]:
                    bestResult["documents"] = [{"sections": []}]
                bestResult["documents"][0]["sections"].extend(extractedSections)
                return bestResult
    
    logger.warning("All repair strategies failed")
    return None


def closeJsonStructures(text: str) -> str:
    """
    Close incomplete JSON structures generically and correctly.
    
    Generic approach:
    1. Close unterminated strings (if odd number of quotes)
    2. Track structure opening order with stack (LIFO)
    3. Close structures in reverse order (last opened, first closed)
    4. Remove trailing commas only directly before closing brackets/braces
    """
    if not text:
        return text
    
    result = text
    
    # Step 1: Close unterminated strings
    # Simple: if odd number of quotes, find last unescaped quote and close it
    quoteCount = result.count('"')
    if quoteCount % 2 == 1:
        # Find last unescaped quote
        i = len(result) - 1
        while i >= 0:
            if result[i] == '"':
                # Count backslashes before quote
                escapeCount = 0
                j = i - 1
                while j >= 0 and result[j] == '\\':
                    escapeCount += 1
                    j -= 1
                # If even number of backslashes, quote is not escaped
                if escapeCount % 2 == 0:
                    result += '"'
                    break
            i -= 1
    
    # Step 2: Track structure opening order with stack
    stack = []
    inString = False
    escapeNext = False
    
    for char in result:
        if escapeNext:
            escapeNext = False
            continue
        
        if char == '\\':
            escapeNext = True
            continue
        
        if char == '"':
            inString = not inString
            continue
        
        # Only track braces/brackets outside of strings
        if not inString:
            if char == '{':
                stack.append('}')
            elif char == '[':
                stack.append(']')
            elif char == '}' or char == ']':
                # Pop matching closing bracket/brace from stack
                if stack and stack[-1] == char:
                    stack.pop()
    
    # Step 3: Close remaining structures in reverse order (LIFO)
    # Remove trailing comma ONLY directly before each closing bracket/brace
    while stack:
        closingChar = stack.pop()
        result = result.rstrip()
        # Remove trailing comma if present (invalid before closing)
        if result and result[-1] == ',':
            result = result[:-1].rstrip()
        result += closingChar
    
    return result


def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
    """
    Extract sections from broken/incomplete JSON using structural parsing.
    ROBUST APPROACH: Uses JSON repair and parsing instead of fragile regex patterns.
    Works for any content type, nested structures, and incomplete JSON.
    
    NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
    to avoid circular dependency. Instead, it implements its own repair strategies.
    
    IMPORTANT: Tries to preserve data by using structure closing first before truncation.
    """
    sections = []
    
    # Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
    closed_str = closeJsonStructures(text)
    obj, err, _ = tryParseJson(closed_str)
    if err is None and isinstance(obj, dict):
        extracted_sections = extractSectionsFromDocument(obj)
        if extracted_sections:
            logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
            return extracted_sections
    
    # Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
    # WARNING: This truncates the input and loses data
    # Only use if structure closing failed
    logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
    best_result = None
    best_valid_length = 0
    for step_size in [1000, 500, 100, 50, 10]:
        for i in range(len(text), 0, -step_size):
            test_str = text[:i]
            closed_str = closeJsonStructures(test_str)
            obj, err, _ = tryParseJson(closed_str)
            if err is None and isinstance(obj, dict):
                extracted_sections = extractSectionsFromDocument(obj)
                if extracted_sections:
                    logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
                    return extracted_sections
                # Store best result even if no sections found
                if not best_result:
                    best_result = obj
                    best_valid_length = i
    
    # Strategy 2: Try to find balanced JSON and parse it
    balanced_json_str = extractFirstBalancedJson(text)
    if balanced_json_str and balanced_json_str != text.strip():
        obj, err, _ = tryParseJson(balanced_json_str)
        if err is None and isinstance(obj, dict):
            extracted_sections = extractSectionsFromDocument(obj)
            if extracted_sections:
                logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections from balanced JSON")
                return extracted_sections
    
    # Strategy 3: If we found a valid JSON object but no sections, try to extract sections from it
    if best_result:
        extracted_sections = extractSectionsFromDocument(best_result)
        if extracted_sections:
            logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections from best result")
            return extracted_sections
    
    # Strategy 4: Last resort - try generic content extraction (only if nothing else worked)
    logger.debug(f"_extractSectionsRegex: All structural parsing failed, trying generic content extraction")
    sections = _extractGenericContent(text)
    if sections:
        logger.debug(f"_extractSectionsRegex: Generic content extraction found {len(sections)} sections")
    
    return sections


def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str]:
    """
    Remove the last item if it appears to be incomplete/corrupted.
    This prevents corrupted data from being included in the final result.
    """
    # re is already imported at module level
    
    if not items:
        return items
    
    # Check if the original text ends with incomplete JSON patterns
    # Look for patterns that suggest the last item was cut off
    
    # Pattern 1: Text ends with incomplete string like {"text": "36
    if re.search(r'\{"[^"]*"\s*:\s*"[^"]*$', original_text):
        logger.debug("Detected incomplete string at end - removing last item")
        return items[:-1]
    
    # Pattern 2: Text ends with incomplete boolean like {"bool_flag": tr
    if re.search(r'\{"[^"]*"\s*:\s*(true|false|tr|fa)$', original_text):
        logger.debug("Detected incomplete boolean at end - removing last item")
        return items[:-1]
    
    # Pattern 3: Text ends with incomplete number like {"number": 123
    if re.search(r'\{"[^"]*"\s*:\s*\d+$', original_text):
        logger.debug("Detected incomplete number at end - removing last item")
        return items[:-1]
    
    # Pattern 4: Text ends with incomplete array like {"array": [1,2,3
    if re.search(r'\{"[^"]*"\s*:\s*\[[^\]]*$', original_text):
        logger.debug("Detected incomplete array at end - removing last item")
        return items[:-1]
    
    # Pattern 5: Text ends with incomplete object like {"obj": {"key": "val
    if re.search(r'\{"[^"]*"\s*:\s*\{[^}]*$', original_text):
        logger.debug("Detected incomplete object at end - removing last item")
        return items[:-1]
    
    # Pattern 6: Text ends with trailing comma (common sign of incomplete JSON)
    if original_text.rstrip().endswith(','):
        logger.debug("Detected trailing comma - removing last item")
        return items[:-1]
    
    # If no incomplete patterns detected, return all items
    return items


def _extractGenericContent(text: str) -> List[Dict[str, Any]]:
    """
    Extract generic content when no specific section patterns are found.
    This handles cases where the JSON structure is completely broken.
    Handles incomplete strings and corrupted data.
    Excludes the last incomplete item to prevent corrupted data.
    
    CRITICAL: Must preserve original content_type and id from the JSON structure!
    """
    # re is already imported at module level
    
    sections = []
    
    # CRITICAL: First, try to extract the original section structure from the JSON
    # Look for section patterns with content_type and id preserved
    # Handle both complete and incomplete JSON (may be cut off mid-string)
    # More flexible pattern that handles incomplete structures
    section_pattern = r'"sections"\s*:\s*\[\s*\{[^}]*?"id"\s*:\s*"([^"]+)"[^}]*?"content_type"\s*:\s*"([^"]+)"[^}]*?"elements"\s*:\s*\[(.*?)(?:\]\s*\}|$)'
    section_matches = re.finditer(section_pattern, text, re.DOTALL)
    
    for match in section_matches:
        section_id = match.group(1)
        content_type = match.group(2)
        elements_str = match.group(3)
        
        # Extract elements based on content_type
        elements = []
        
        if content_type == "code_block":
            # Look for {"code": "..."} patterns (complete)
            code_pattern = r'\{"code"\s*:\s*"([^"]*)"(?:\s*,\s*"language"\s*:\s*"([^"]*)")?\}'
            code_matches = re.finditer(code_pattern, elements_str, re.DOTALL)
            for code_match in code_matches:
                code = code_match.group(1)
                language = code_match.group(2) if code_match.lastindex >= 2 else None
                elem = {"code": code}
                if language:
                    elem["language"] = language
                elements.append(elem)
            
            # Also look for incomplete code blocks (cut off mid-string)
            # Pattern: {"code": "..." where string is not closed
            incomplete_code_pattern = r'\{"code"\s*:\s*"([^"]*?)(?:"|$)'
            incomplete_matches = re.finditer(incomplete_code_pattern, elements_str, re.DOTALL)
            for inc_match in incomplete_matches:
                code = inc_match.group(1)
                # Check if this code is already in elements (from complete match)
                if code and code not in [e.get("code", "")[:len(code)] for e in elements]:
                    # Extract language if present before the cut-off
                    language_match = re.search(r'"language"\s*:\s*"([^"]+)"', elements_str[:inc_match.end()])
                    language = language_match.group(1) if language_match else None
                    elem = {"code": code}
                    if language:
                        elem["language"] = language
                    elements.append(elem)
            
            # If still no elements found, try to extract code from the raw elements string
            # This handles cases where the JSON is very broken
            if not elements:
                # Look for any "code": "..." pattern, even if incomplete
                raw_code_pattern = r'"code"\s*:\s*"([^"]*)"'
                raw_code_matches = re.finditer(raw_code_pattern, elements_str, re.DOTALL)
                for raw_match in raw_code_matches:
                    code = raw_match.group(1)
                    if code:
                        elements.append({"code": code})
                
                # If still nothing, try to find incomplete code string
                if not elements:
                    incomplete_raw_pattern = r'"code"\s*:\s*"([^"]*?)(?:"|$)'
                    incomplete_raw_matches = re.finditer(incomplete_raw_pattern, elements_str, re.DOTALL)
                    for inc_raw_match in incomplete_raw_matches:
                        code = inc_raw_match.group(1)
                        if code:
                            elements.append({"code": code})
        
        elif content_type == "table":
            # Look for table elements with rows (handle incomplete JSON)
            # Pattern: {"headers": [...], "rows": [...]} or incomplete version
            # More flexible pattern that handles incomplete rows array
            # Match even if rows array is not closed
            table_pattern = r'\{\s*"headers"\s*:\s*\[([^\]]*)\]\s*,\s*"rows"\s*:\s*\[(.*?)(?:\]\s*\}|$)'
            table_matches = re.finditer(table_pattern, elements_str, re.DOTALL)
            for table_match in table_matches:
                headers_str = table_match.group(1)
                rows_str = table_match.group(2)
                
                # Parse headers
                headers = [h.strip('"') for h in re.findall(r'"([^"]*)"', headers_str)]
                
                # Parse rows (may be incomplete - handle cut-off)
                rows = []
                # Find all complete row arrays: ["...", "..."]
                row_pattern = r'\[([^\]]*)\]'
                row_matches = list(re.finditer(row_pattern, rows_str))
                for row_match in row_matches:
                    row_str = row_match.group(1)
                    row = [cell.strip('"') for cell in re.findall(r'"([^"]*)"', row_str)]
                    if row:
                        rows.append(row)
                
                # Also check for incomplete last row (cut off mid-row)
                # Look for pattern like ["cell1", "cell2", "incomplete
                # Find the last occurrence of [ that doesn't have a matching ]
                if rows_str:
                    # Find all [ positions
                    open_brackets = [i for i, char in enumerate(rows_str) if char == '[']
                    close_brackets = [i for i, char in enumerate(rows_str) if char == ']']
                    
                    # If there are more [ than ], we have an incomplete row
                    if len(open_brackets) > len(close_brackets):
                        # Find the last [ that doesn't have a matching ]
                        last_open = open_brackets[len(close_brackets)]
                        incomplete_row_str = rows_str[last_open+1:]  # Skip the [
                        # Extract cells from incomplete row
                        incomplete_row = [cell.strip('"') for cell in re.findall(r'"([^"]*)"', incomplete_row_str)]
                        if incomplete_row and (not rows or incomplete_row != rows[-1]):
                            rows.append(incomplete_row)
                
                elem = {"headers": headers, "rows": rows}
                elements.append(elem)
        
        elif content_type == "heading":
            # Look for {"level": X, "text": "..."} patterns
            heading_pattern = r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}'
            heading_matches = re.finditer(heading_pattern, elements_str)
            for heading_match in heading_matches:
                level = int(heading_match.group(1))
                text = heading_match.group(2)
                elements.append({"level": level, "text": text})
        
        elif content_type in ["bullet_list", "numbered_list"]:
            # Look for {"items": [...]} patterns (handle incomplete JSON)
            # Pattern: {"items": [...]} or incomplete version
            # More flexible pattern that handles incomplete items array
            items_pattern = r'\{\s*"items"\s*:\s*\[(.*?)(?:\]\s*\}|$)'
            items_matches = re.finditer(items_pattern, elements_str, re.DOTALL)
            for items_match in items_matches:
                items_str = items_match.group(1)
                # Extract all complete items (quoted strings)
                items = [item.strip('"') for item in re.findall(r'"([^"]*)"', items_str)]
                # Also check for incomplete last item (cut off mid-string)
                # Find the last occurrence of " that doesn't have a matching "
                if items_str:
                    # Count quotes - odd number means incomplete item
                    quote_count = items_str.count('"')
                    if quote_count % 2 != 0:
                        # There's an incomplete item at the end
                        # Find the last complete item and the incomplete part
                        last_complete_quote = items_str.rfind('"', 0, items_str.rfind('"'))
                        if last_complete_quote >= 0:
                            incomplete_part = items_str[last_complete_quote+1:]
                            # Extract incomplete item (everything after last complete quote)
                            incomplete_item = incomplete_part.split(',')[0].strip('"')
                            if incomplete_item and incomplete_item not in items:
                                items.append(incomplete_item)
                
                if items:
                    elements.append({"items": items})
        
        elif content_type == "paragraph":
            # Look for {"text": "..."} patterns
            text_pattern = r'\{"text"\s*:\s*"([^"]*)"\}'
            text_matches = re.finditer(text_pattern, elements_str)
            for text_match in text_matches:
                text = text_match.group(1)
                elements.append({"text": text})
        
        if elements:
            sections.append({
                "id": section_id,
                "content_type": content_type,
                "elements": elements,
                "order": len(sections)
            })
    
    # If we found sections with preserved structure, return them
    if sections:
        return sections
    
    # Fallback: Original logic for when structure is completely broken
    # Look for any structured content patterns
    # Pattern 1: Look for code_block {"code": "..."}
    code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\}', text)
    incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', text)
    
    all_code_items = code_items + incomplete_code_items
    unique_code_items = list(dict.fromkeys([item for item in all_code_items if item.strip()]))
    
    if unique_code_items:
        unique_code_items = _removeLastIncompleteItem(unique_code_items, text)
        if unique_code_items:
            # Try to find section ID and language from original JSON
            section_id_match = re.search(r'"id"\s*:\s*"([^"]+)"', text)
            section_id = section_id_match.group(1) if section_id_match else "section_1"
            
            language_match = re.search(r'"language"\s*:\s*"([^"]+)"', text)
            language = language_match.group(1) if language_match else None
            
            elements = [{"code": item} for item in unique_code_items]
            if language and elements:
                elements[0]["language"] = language
            
            sections.append({
                "id": section_id,
                "content_type": "code_block",
                "elements": elements,
                "order": 1
            })
            return sections
    
    # Pattern 2: Look for list items {"text": "..."}, including incomplete ones
    list_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', text)
    incomplete_list_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', text)
    
    all_list_items = list_items + incomplete_list_items
    unique_list_items = list(dict.fromkeys([item for item in all_list_items if item.strip()]))
    
    if unique_list_items:
        unique_list_items = _removeLastIncompleteItem(unique_list_items, text)
        if unique_list_items:
            elements = [{"text": item} for item in unique_list_items]
            sections.append({
                "id": "section_1",
                "content_type": "list",
                "elements": elements,
                "order": 1
            })
            return sections
    
    # Pattern 3: Look for paragraph text {"text": "..."}, including incomplete ones
    if re.search(r'\{"text"\s*:\s*"[^"]*\}', text):
        text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', text)
        incomplete_text_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', text)
        
        all_text_items = text_items + incomplete_text_items
        unique_text_items = list(dict.fromkeys([item for item in all_text_items if item.strip()]))
        
        if unique_text_items:
            unique_text_items = _removeLastIncompleteItem(unique_text_items, text)
            if unique_text_items:
                elements = [{"text": item} for item in unique_text_items]
                sections.append({
                    "id": "section_1",
                    "content_type": "paragraph",
                    "elements": elements,
                    "order": 1
                })
                return sections
    
    # Pattern 4: Look for any quoted strings that might be content, including incomplete ones
    if re.search(r'"([^"]{3,})"', text):
        text_items = re.findall(r'"([^"]{3,})"', text)
        incomplete_text_items = re.findall(r'"([^"]{3,}?)(?:\n|$)', text)
        
        all_text_items = text_items + incomplete_text_items
        content_items = [item for item in all_text_items if not item.startswith(('section_', 'doc_', 'metadata', 'split_strategy', 'source_documents', 'extraction_method', 'id', 'content_type', 'elements', 'order', 'title', 'filename'))]
        
        if content_items:
            content_items = _removeLastIncompleteItem(content_items, text)
            if content_items:
                elements = [{"text": item} for item in content_items[:10]]
                sections.append({
                    "id": "section_1",
                    "content_type": "paragraph",
                    "elements": elements,
                    "order": 1
                })
    
    return sections


def extractSectionsFromDocument(documentData: Dict[str, Any]) -> List[Dict[str, Any]]:
    """
    Extract all sections from document data structure.
    Handles both flat and nested document structures.
    """
    if not isinstance(documentData, dict):
        return []
    
    # Try to extract sections from documents array
    if "documents" in documentData:
        all_sections = []
        for doc in documentData.get("documents", []):
            if isinstance(doc, dict) and "sections" in doc:
                sections = doc.get("sections", [])
                if isinstance(sections, list):
                    all_sections.extend(sections)
        return all_sections
    
    # Try to extract sections directly from root
    if "sections" in documentData:
        sections = documentData.get("sections", [])
        if isinstance(sections, list):
            return sections
    
    return []


def _extractOverlapFromElement(elem: Dict[str, Any], elemType: str) -> Optional[Dict[str, Any]]:
    """
    GENERIC function to extract overlap portion from an element.
    
    Handles elements of any size, including very long strings:
    - Paragraphs: Extract last N characters/words
    - Code blocks: Extract last N lines
    - Tables: Extract last N rows
    - Lists: Extract last N items
    - Other elements: Extract representative portion
    
    Args:
        elem: Element dictionary
        elemType: Element type (table, paragraph, code_block, etc.)
    
    Returns:
        Overlap element dictionary with size-limited content, or None
    """
    if not isinstance(elem, dict):
        return None
    
    # Get content (handle both flat and nested structures)
    content = elem.get("content", {}) if isinstance(elem.get("content"), dict) else {}
    
    if elemType == "table":
        rows = elem.get("rows", []) or content.get("rows", [])
        headers = elem.get("headers", []) or content.get("headers", [])
        
        if rows:
            # Extract last 3-5 rows as overlap (enough for context, not too large)
            overlapRowCount = min(5, len(rows))
            overlapRows = rows[-overlapRowCount:]
            
            overlapElem = {
                "type": "table",
                "content": {
                    "headers": headers,
                    "rows": overlapRows
                }
            }
            return overlapElem
    
    elif elemType in ["bullet_list", "numbered_list"]:
        items = elem.get("items", []) or content.get("items", [])
        
        if items:
            # Extract last 5-10 items as overlap
            overlapItemCount = min(10, len(items))
            overlapItems = items[-overlapItemCount:]
            
            overlapElem = {
                "type": elemType,
                "content": {
                    "items": overlapItems
                }
            }
            return overlapElem
    
    elif elemType == "paragraph":
        text = elem.get("text", "") or content.get("text", "")
        
        if text:
            # Extract last portion of text
            # For very long text, use last 300-500 characters
            # For shorter text, use all of it
            maxOverlapChars = 500
            minOverlapChars = 100
            
            if len(text) > maxOverlapChars:
                # Very long text - extract last portion
                # Try to break at word boundary for readability
                textSnippet = text[-maxOverlapChars:]
                # Find first space/newline to start from word boundary
                firstSpace = textSnippet.find(' ')
                if firstSpace > 0 and firstSpace < 50:
                    textSnippet = textSnippet[firstSpace + 1:]
                overlapText = textSnippet
            elif len(text) > minOverlapChars:
                # Medium text - use last portion
                overlapText = text[-minOverlapChars:]
            else:
                # Short text - use all
                overlapText = text
            
            overlapElem = {
                "type": "paragraph",
                "content": {
                    "text": overlapText
                }
            }
            return overlapElem
    
    elif elemType == "code_block":
        code = elem.get("code", "") or content.get("code", "")
        
        if code:
            # Extract last N lines of code
            codeLines = code.split('\n')
            # Use last 10-20 lines as overlap (enough context for continuation)
            overlapLineCount = min(20, len(codeLines))
            overlapLines = codeLines[-overlapLineCount:]
            overlapCode = '\n'.join(overlapLines)
            
            overlapElem = {
                "type": "code_block",
                "content": {
                    "code": overlapCode
                }
            }
            return overlapElem
    
    elif elemType == "heading":
        # Headings are usually short - return as-is
        return elem
    
    elif elemType == "image":
        # Images are usually small - return as-is
        return elem
    
    else:
        # Generic element - try to extract a representative portion
        # Convert to JSON and limit size
        elemJson = json.dumps(elem, ensure_ascii=False)
        
        # If element is very large, try to extract key fields only
        if len(elemJson) > 1000:
            # Extract only essential fields
            overlapElem = {
                "type": elemType,
                "id": elem.get("id"),
                "content": "..."  # Indicate truncated content
            }
            return overlapElem
        
        # Small element - return as-is
        return elem


def buildContinuationContext(
    allSections: List[Dict[str, Any]], 
    lastRawResponse: Optional[str] = None,
    useCaseId: Optional[str] = None,
    templateStructure: Optional[str] = None
) -> ContinuationContext:
    """
    Build context information from accumulated sections for continuation prompt.
    
    Returns summary of delivered data and cut-off point for continuation.
    
    Args:
        allSections: List of ALL sections accumulated across ALL iterations
        lastRawResponse: Raw JSON response from last iteration (can be broken/incomplete)
        useCaseId: Optional use case ID to determine expected JSON structure
        templateStructure: JSON structure template from initial prompt (MUST be identical)
        
    Returns:
        ContinuationContext: Pydantic model with all continuation context information
    """
    section_count = len(allSections)
    
    # Build summary of delivered data (per-section counts)
    summary_lines = []
    summary_lines.append("Following data has already been delivered:\n")
    
    summary_items = []  # Collect items for truncation check
    
    for section in allSections:
        section_id = section.get("id")
        # CRITICAL: If section has no ID, omit it from summary
        if not section_id:
            continue
            
        content_type = section.get("content_type", "")
        elements = section.get("elements", [])
        
        if content_type == "heading":
            # Collect all heading elements with level and text
            heading_elements = []
            if isinstance(elements, list):
                for elem in elements:
                    if isinstance(elem, dict):
                        level = elem.get("level", "")
                        text = elem.get("text", "")
                        if text:
                            heading_elements.append(f"level {level}: {text}")
            elif isinstance(elements, dict):
                level = elements.get("level", "")
                text = elements.get("text", "")
                if text:
                    heading_elements.append(f"level {level}: {text}")
            
            if heading_elements:
                summary_items.append(f'- heading "{section_id}" {", ".join(heading_elements)}')
        
        elif content_type == "paragraph":
            # Count text elements
            text_count = 0
            if isinstance(elements, list):
                for elem in elements:
                    if isinstance(elem, dict) and elem.get("text"):
                        text_count += 1
            elif isinstance(elements, dict) and elements.get("text"):
                text_count = 1
            
            if text_count > 0:
                summary_items.append(f'- paragraph with {text_count} text(s)')
        
        elif content_type in ["bullet_list", "numbered_list"]:
            # Count items across all elements
            item_count = 0
            if isinstance(elements, list):
                for elem in elements:
                    if isinstance(elem, dict):
                        items = elem.get("items", [])
                        if isinstance(items, list):
                            item_count += len(items)
            elif isinstance(elements, dict):
                items = elements.get("items", [])
                if isinstance(items, list):
                    item_count = len(items)
            
            if item_count > 0:
                summary_items.append(f'- bullet_list with {item_count} items')
        
        elif content_type == "table":
            # Count rows across all elements
            row_count = 0
            if isinstance(elements, list):
                for elem in elements:
                    if isinstance(elem, dict):
                        rows = elem.get("rows", [])
                        if isinstance(rows, list):
                            row_count += len(rows)
            elif isinstance(elements, dict):
                rows = elements.get("rows", [])
                if isinstance(rows, list):
                    row_count = len(rows)
            
            if row_count > 0:
                summary_items.append(f'- table "{section_id}" with {row_count} rows')
        
        elif content_type == "code_block":
            # Count code lines across all elements
            line_count = 0
            if isinstance(elements, list):
                for elem in elements:
                    if isinstance(elem, dict):
                        code = elem.get("code", "")
                        if code:
                            lines = [l for l in code.split('\n') if l.strip()]
                            line_count += len(lines)
            elif isinstance(elements, dict):
                code = elements.get("code", "")
                if code:
                    lines = [l for l in code.split('\n') if l.strip()]
                    line_count = len(lines)
            
            if line_count > 0:
                line_word = "line" if line_count == 1 else "lines"
                summary_items.append(f'- code_block "{section_id}" with {line_count} code {line_word}')
    
    # If no sections extracted but we have raw response, indicate that previous response was broken
    if len(summary_items) == 0 and lastRawResponse:
        summary_items.append("- Previous response was incomplete/broken JSON - please continue from where it stopped")
    
    # CRITICAL: If summary is too long, truncate: show first 10 and last 10 items
    if len(summary_items) > 20:
        first_10 = summary_items[:10]
        last_10 = summary_items[-10:]
        summary_lines.extend(first_10)
        summary_lines.append(f"... (truncated {len(summary_items) - 20} items) ...")
        summary_lines.extend(last_10)
    else:
        summary_lines.extend(summary_items)
    
    delivered_summary = "\n".join(summary_lines)
    
    # Extract cut-off point using new algorithm
    # 1. Loop over all sections until finding incomplete section
    # 2. In incomplete section, loop through elements until finding cut-off element
    # CRITICAL: There is always only ONE section incomplete (JSON cut-off point)
    cut_off_element = None
    element_before_cutoff = None
    
    if lastRawResponse:
        try:
            # CRITICAL: Always try to find incomplete section from raw JSON
            # Even if JSON can be parsed, it might be incomplete (cut off mid-element)
            raw_stripped = stripCodeFences(lastRawResponse.strip()).strip()
            
            # Check if response is just a fragment (not full JSON structure)
            # Fragments are continuation content that should be appended to the last incomplete element
            is_fragment = not (raw_stripped.strip().startswith('{') or raw_stripped.strip().startswith('['))
            
            if is_fragment:
                # Response is a fragment - it continues the last incomplete element
                # Find the last incomplete element from allSections
                if allSections:
                    last_section = allSections[-1]
                    elements = last_section.get("elements", [])
                    if isinstance(elements, list) and elements:
                        # Get the last element (which should be incomplete)
                        last_elem = elements[-1]
                        if isinstance(last_elem, dict):
                            # The fragment continues this element
                            # Show the fragment as cut_off_element
                            cut_off_element = raw_stripped
                            # Show the element before (if there is one)
                            if len(elements) > 1:
                                element_before_cutoff = json.dumps(elements[-2])
                            else:
                                element_before_cutoff = json.dumps(last_elem)
            else:
                # Response is full JSON - use standard extraction
                # Strategy 1: Try to find incomplete section using structured parsing
                incomplete_section = _findIncompleteSectionInRaw(raw_stripped)
                if incomplete_section:
                    cut_off_element, element_before_cutoff = _extractCutOffElements(incomplete_section, raw_stripped)
                
                # Strategy 2: If no incomplete section found, extract directly from raw JSON
                # This handles cases where JSON is cut off mid-element within a complete section
                if not cut_off_element:
                    cut_off_element, element_before_cutoff = _extractCutOffElementsFromRaw(raw_stripped, allSections)
        except Exception as e:
            logger.debug(f"Error extracting cut-off point: {e}")
    
    # Extract overlap information for continuation prompt
    # GENERIC overlap extraction: handles elements of any size, including long strings
    # Strategy: Extract last N elements, but if an element is very large, extract only a portion
    overlapElements = []
    overlapString = ""
    
    if allSections:
        # Get last section
        lastSection = allSections[-1]
        elements = lastSection.get("elements", [])
        
        if isinstance(elements, list) and len(elements) > 0:
            # Extract last 2-3 complete elements as overlap context
            # This helps the AI understand what was already delivered
            overlapCount = min(3, len(elements))
            overlapElements = elements[-overlapCount:]
            
            # Build overlap string showing these elements (with size limits for large elements)
            overlapStrings = []
            for elem in overlapElements:
                if isinstance(elem, dict):
                    elemType = elem.get("type", "unknown")
                    overlapElem = _extractOverlapFromElement(elem, elemType)
                    if overlapElem:
                        overlapStrings.append(json.dumps(overlapElem, ensure_ascii=False))
                else:
                    # Non-dict element - show as-is (but limit size)
                    elemStr = json.dumps(elem, ensure_ascii=False)
                    if len(elemStr) > 500:
                        elemStr = elemStr[:500] + "..."
                    overlapStrings.append(elemStr)
            
            if overlapStrings:
                overlapString = ",\n".join(overlapStrings)
    
    # Store raw JSON response and extract structure context
    last_raw_json = lastRawResponse or ""
    last_complete_part = ""
    incomplete_part = ""
    structure_context = ""
    
    if lastRawResponse:
        # Extract JSON structure context for continuation prompt
        # This provides: last complete part, incomplete part, structure context
        # NOTE: template_structure is now passed as parameter, not extracted
        try:
            structureContext = extractJsonStructureContext(lastRawResponse, useCaseId)
            last_complete_part = structureContext.get("last_complete_part", "")
            incomplete_part = structureContext.get("incomplete_part", "")
            structure_context = structureContext.get("structure_context", "")
        except Exception as e:
            logger.warning(f"Error extracting JSON structure context: {e}", exc_info=True)
    
    # Return ContinuationContext Pydantic model
    return ContinuationContext(
        section_count=section_count,
        delivered_summary=delivered_summary,
        cut_off_element=cut_off_element,
        element_before_cutoff=element_before_cutoff,
        template_structure=templateStructure,  # Use passed parameter, not extracted
        last_complete_part=last_complete_part,
        incomplete_part=incomplete_part,
        structure_context=structure_context,
        last_raw_json=last_raw_json
    )


def extractJsonStructureContext(
    incompleteJson: str,
    useCaseId: Optional[str] = None
) -> Dict[str, Any]:
    """
    Extract JSON structure context from incomplete JSON for continuation prompts.
    
    Extracts:
    1. Template JSON structure of the complete object (structure only, no content)
    2. Last complete part (last complete element/object)
    3. Incomplete part (the cut-off portion)
    4. Structure context (parent structure metadata only, no content)
    
    Args:
        incompleteJson: Incomplete JSON string (may be cut off mid-element)
        useCaseId: Optional use case ID to determine expected structure
        
    Returns:
        Dict with:
        - template_structure: Template JSON structure (structure only)
        - last_complete_part: Last complete element/object as JSON string
        - incomplete_part: Incomplete/cut-off portion as JSON string
        - structure_context: Parent structure metadata (keys only, no content)
    """
    from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
    
    result = {
        "template_structure": "",
        "last_complete_part": "",
        "incomplete_part": "",
        "structure_context": ""
    }
    
    if not incompleteJson or not incompleteJson.strip():
        return result
    
    # Normalize JSON string
    normalized = stripCodeFences(normalizeJsonText(incompleteJson)).strip()
    if not normalized:
        return result
    
    # Find first '{' or '[' to start
    startIdx = -1
    for i, char in enumerate(normalized):
        if char in '{[':
            startIdx = i
            break
    
    if startIdx == -1:
        return result
    
    jsonContent = normalized[startIdx:]
    
    # Step 1: Extract template structure (structure only, no content)
    templateStructure = _extractTemplateStructure(jsonContent, useCaseId)
    result["template_structure"] = templateStructure
    
    # Step 2: Find last complete part and incomplete part
    lastComplete, incompletePart = _extractLastCompleteAndIncomplete(jsonContent)
    result["last_complete_part"] = lastComplete
    result["incomplete_part"] = incompletePart
    
    # Step 3: Extract structure context (parent structure metadata only)
    # Pass both incomplete part and last complete part to show positions
    structureContext = _extractStructureContext(jsonContent, incompletePart, lastComplete)
    result["structure_context"] = structureContext
    
    return result


def _extractTemplateStructure(jsonContent: str, useCaseId: Optional[str] = None) -> str:
    """
    Extract template JSON structure (structure only, no content).
    
    Examples:
    - {"documents": [{"chapters": [{"sections": [...]}]}]}
    - {"elements": [{"type": "...", "content": {...}}]}
    """
    import json
    import re
    
    # Try to parse JSON to understand structure
    try:
        # Try to close and parse
        closed = closeJsonStructures(jsonContent)
        parsed = json.loads(closed)
        
        # Build template structure (keys only, no content)
        template = _buildStructureTemplate(parsed)
        return json.dumps(template, indent=2, ensure_ascii=False)
    except Exception:
        # If parsing fails, try to extract structure from string
        # Look for top-level keys
        topLevelKeys = []
        
        # Pattern: "key": { or "key": [
        keyPattern = r'"([^"]+)"\s*:\s*[{\[]'
        matches = re.findall(keyPattern, jsonContent)
        if matches:
            topLevelKeys = matches[:3]  # Take first 3 keys
        
        # Build template based on use case or detected keys
        if useCaseId == "chapter_structure":
            return json.dumps({"documents": [{"chapters": [{"id": "", "title": "", "level": 0}]}]}, indent=2, ensure_ascii=False)
        elif useCaseId == "section_content":
            return json.dumps({"elements": [{"type": "", "content": {}}]}, indent=2, ensure_ascii=False)
        elif useCaseId == "code_structure":
            return json.dumps({"files": [{"id": "", "filename": "", "fileType": ""}]}, indent=2, ensure_ascii=False)
        elif topLevelKeys:
            # Build generic template
            template = {}
            for key in topLevelKeys:
                template[key] = []
            return json.dumps(template, indent=2, ensure_ascii=False)
        else:
            return json.dumps({}, indent=2, ensure_ascii=False)


def _buildStructureTemplate(obj: Any, maxDepth: int = 3) -> Any:
    """
    Build structure template from parsed JSON (keys only, no content).
    """
    if isinstance(obj, dict):
        template = {}
        for key, value in obj.items():
            if isinstance(value, (dict, list)):
                template[key] = _buildStructureTemplate(value, maxDepth - 1) if maxDepth > 0 else None
            else:
                # Keep key but use empty value of same type
                if isinstance(value, str):
                    template[key] = ""
                elif isinstance(value, (int, float)):
                    template[key] = 0
                elif isinstance(value, bool):
                    template[key] = False
                else:
                    template[key] = None
        return template
    elif isinstance(obj, list) and obj:
        # Use first element as template
        return [_buildStructureTemplate(obj[0], maxDepth - 1) if maxDepth > 0 else None]
    else:
        return None


def _extractLastCompleteAndIncomplete(jsonContent: str) -> Tuple[str, str]:
    """
    Extract last complete part and incomplete part from JSON.
    
    Returns:
        Tuple of (last_complete_part, incomplete_part) as JSON strings
    """
    import json
    
    # Try to find the last complete element/object
    # Strategy: Parse backwards, find where structures are balanced
    
    # Count braces and brackets to find where JSON becomes incomplete
    braceCount = 0
    bracketCount = 0
    lastCompleteEnd = -1
    
    inString = False
    escapeNext = False
    
    for i, char in enumerate(jsonContent):
        if escapeNext:
            escapeNext = False
            continue
        
        if char == '\\':
            escapeNext = True
            continue
        
        if char == '"':
            inString = not inString
            continue
        
        if not inString:
            if char == '{':
                braceCount += 1
            elif char == '}':
                braceCount -= 1
                if braceCount == 0 and bracketCount == 0:
                    # Found end of complete structure
                    lastCompleteEnd = i + 1
            elif char == '[':
                bracketCount += 1
            elif char == ']':
                bracketCount -= 1
                if braceCount == 0 and bracketCount == 0:
                    # Found end of complete structure
                    lastCompleteEnd = i + 1
    
    # Extract parts
    if lastCompleteEnd > 0:
        lastCompletePart = jsonContent[:lastCompleteEnd]
        incompletePart = jsonContent[lastCompleteEnd:].strip()
        
        # Try to find last complete element within the structure
        # Look for last complete object/array element
        lastCompleteElement = _findLastCompleteElement(lastCompletePart)
        if lastCompleteElement:
            # Build context for incomplete part - show structure around the break
            incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
            return lastCompleteElement, incompleteWithContext
        else:
            # Build context for incomplete part
            incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
            return lastCompletePart, incompleteWithContext
    else:
        # No complete structure found - everything is incomplete
        # Still try to show context
        incompleteWithContext = buildIncompleteContext(jsonContent, 0)
        return "", incompleteWithContext


def _findLastCompleteElement(jsonStr: str) -> str:
    """
    Find the last complete element in JSON string.
    """
    import json
    
    # Try to parse and extract last element
    try:
        closed = closeJsonStructures(jsonStr)
        parsed = json.loads(closed)
        
        # If it's a dict with arrays, get last element from first array
        if isinstance(parsed, dict):
            for key, value in parsed.items():
                if isinstance(value, list) and value:
                    lastElem = value[-1]
                    return json.dumps(lastElem, indent=2, ensure_ascii=False)
        
        # If it's a list, get last element
        if isinstance(parsed, list) and parsed:
            lastElem = parsed[-1]
            return json.dumps(lastElem, indent=2, ensure_ascii=False)
    except Exception:
        pass
    
    # Fallback: try to find last complete object using brace matching
    braceCount = 0
    startPos = -1
    lastCompleteEnd = -1
    
    for i, char in enumerate(jsonStr):
        if char == '{':
            if braceCount == 0:
                startPos = i
            braceCount += 1
        elif char == '}':
            braceCount -= 1
            if braceCount == 0 and startPos >= 0:
                lastCompleteEnd = i + 1
    
    if lastCompleteEnd > 0:
        return jsonStr[startPos:lastCompleteEnd]
    
    return ""


def buildIncompleteContext(jsonContent: str, breakPosition: int) -> str:
    """
    Build hierarchical context showing incomplete JSON structure.
    
    Shows:
    - Full hierarchy structure (always shown)
    - Complete elements before cut (within 200 char DATA budget)
    - Cut piece marked with <-- CUT POINT (incomplete)
    - Does NOT close open structures
    """
    if breakPosition <= 0 or breakPosition > len(jsonContent):
        return jsonContent
    
    hierarchy = findStructureHierarchy(jsonContent, breakPosition)
    if not hierarchy:
        return jsonContent[:breakPosition]
    
    cutPiece = extractCutPiece(jsonContent, breakPosition)
    resultLines = []
    DATA_BUDGET = 500
    
    # Build hierarchy level by level - show actual JSON structure
    for levelIndex, level in enumerate(hierarchy):
        levelType = level['type']
        levelStart = level['start_pos']
        levelDepth = level['depth']
        indent = "  " * levelDepth
        isCutLevel = (levelIndex == len(hierarchy) - 1)
        isParentOfCutLevel = (levelIndex == len(hierarchy) - 2)
        
        # Get next level info
        if levelIndex < len(hierarchy) - 1:
            nextLevel = hierarchy[levelIndex + 1]
            nextLevelStart = nextLevel['start_pos']
        else:
            nextLevelStart = breakPosition
        
        # Show opening structure for this level
        # For cut level, check if cut piece already starts with bracket/brace - if so, don't duplicate
        if isCutLevel and cutPiece:
            cutPieceStripped = cutPiece.strip()
            if (levelType == 'array' and cutPieceStripped.startswith('[')) or \
               (levelType == 'object' and cutPieceStripped.startswith('{')):
                # Cut piece already includes opening bracket, don't add it separately
                # Use parent level's child indent (cut element is a child of parent, not a separate level)
                if levelIndex > 0:
                    parentLevel = hierarchy[levelIndex - 1]
                    parentIndent = "  " * parentLevel['depth']
                    childIndent = parentIndent + "  "
                else:
                    childIndent = indent + "  "
                for line in cutPiece.split('\n'):
                    stripped = line.strip()
                    if stripped:
                        resultLines.append(f'{childIndent}{stripped}')
                resultLines[-1] += '  <-- CUT POINT (incomplete)'
            else:
                # Cut piece doesn't start with matching bracket, add opening structure
                resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
                childIndent = indent + "  "
                for line in cutPiece.split('\n'):
                    stripped = line.strip()
                    if stripped:
                        resultLines.append(f'{childIndent}{stripped}')
                resultLines[-1] += '  <-- CUT POINT (incomplete)'
        elif isCutLevel:
            # Cut level but no cut piece - add opening structure
            resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
            childIndent = indent + "  "
            resultLines.append(f'{childIndent}...  <-- CUT POINT (incomplete)')
        elif isParentOfCutLevel:
            # Parent of cut level: add opening structure, then show complete elements with budget
            # Works for both arrays and objects
            resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
            childIndent = indent + "  "
            completeElements = _findCompleteElementsAtLevel(
                jsonContent, levelStart, nextLevelStart, levelDepth
            )
            
            dataBudget = DATA_BUDGET
            for elementStart, elementEnd in reversed(completeElements):
                elementData = jsonContent[elementStart:elementEnd].strip()
                elementSize = len(elementData)
                
                if elementSize == 0:
                    continue
                
                if elementSize > dataBudget:
                    break
                
                for line in elementData.split('\n'):
                    stripped = line.strip()
                    if stripped:
                        resultLines.append(f'{childIndent}{stripped}')
                if elementEnd < nextLevelStart:
                    resultLines[-1] += ','
                
                dataBudget -= elementSize
                
                if dataBudget <= 0:
                    break
        
        else:
            # Other parent levels: add opening structure, then show path content
            resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
            childIndent = indent + "  "
            pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
            if pathContent:
                # Show all path content (structure is always shown, not truncated)
                for line in pathContent.split('\n'):
                    stripped = line.strip()
                    if stripped:
                        resultLines.append(f'{childIndent}{stripped}')
    
    return "\n".join(resultLines)


def _buildNestedHierarchy(
    resultLines: List[str],
    jsonContent: str,
    hierarchy: List[Dict[str, Any]],
    levelIndex: int,
    breakPosition: int,
    cutPiece: str,
    cutLevel: Dict[str, Any]
) -> None:
    """
    Recursively build nested hierarchy from root to cut level.
    This ensures proper nesting where each level contains the next level.
    """
    if levelIndex >= len(hierarchy):
        return
    
    level = hierarchy[levelIndex]
    levelType = level['type']
    levelStart = level['start_pos']
    levelKey = level.get('key')
    levelDepth = level['depth']
    indent = "  " * levelDepth
    
    isCutLevel = (levelIndex == len(hierarchy) - 1)
    
    # Show opening structure for this level
    if levelKey:
        resultLines.append(f'{indent}"{levelKey}": {{' if levelType == 'object' else f'{indent}"{levelKey}": [')
    else:
        resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
    
    childIndent = indent + "  "
    
    if isCutLevel:
        # Cut level - show content (complete elements + cut piece)
        if levelType == 'array':
            charBudget = 1000
            completeElements = _findCompleteElementsAtLevel(
                jsonContent, levelStart, breakPosition, levelDepth
            )
            
            # Show complete elements (working backwards from the cut)
            for elementStart, elementEnd in reversed(completeElements):
                elementSize = elementEnd - elementStart
                if charBudget >= elementSize:
                    element = jsonContent[elementStart:elementEnd].strip()
                    if element:
                        elementLines = element.split('\n')
                        for line in elementLines:
                            if line.strip():
                                resultLines.append(f'{childIndent}{line}')
                        if elementEnd < breakPosition:
                            resultLines[-1] += ','
                        charBudget -= elementSize
                else:
                    break
            
            # Show cut piece
            if cutPiece:
                cutPieceLines = cutPiece.split('\n')
                for line in cutPieceLines:
                    if line.strip():
                        resultLines.append(f'{childIndent}{line}')
                resultLines[-1] += '  <-- CUT POINT (incomplete)'
            else:
                cutPart = jsonContent[max(0, breakPosition-50):breakPosition]
                resultLines.append(f'{childIndent}{cutPart}  <-- CUT POINT (incomplete)')
        
        else:
            # Object at cut level
            previewSize = breakPosition - levelStart
            maxPreviewSize = 500
            if previewSize > maxPreviewSize:
                previewStart = breakPosition - maxPreviewSize
                preview = jsonContent[previewStart:breakPosition]
            else:
                preview = jsonContent[levelStart:breakPosition]
            
            previewLines = preview.split('\n')
            for line in previewLines:
                if line.strip():
                    resultLines.append(f'{childIndent}{line}')
            
            cutPart = jsonContent[breakPosition:min(breakPosition + 50, len(jsonContent))]
            resultLines.append(f'{childIndent}... {cutPart}  <-- CUT POINT (incomplete)')
    
    else:
        # Parent level - show path to next level, then recursively build next level
        nextLevel = hierarchy[levelIndex + 1]
        nextLevelKey = nextLevel.get('key')
        nextLevelStart = nextLevel['start_pos']
        nextLevelType = nextLevel['type']
        
        # Extract content between this level's opening and next level's start
        # This shows any keys/values that come before the next level
        pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
        
        # Show the path content (keys/values before next level)
        if len(pathContent) > 0 and len(pathContent) <= 500:
            pathLines = pathContent.split('\n')
            nonEmptyLines = [line for line in pathLines if line.strip()]
            if nonEmptyLines:
                for line in nonEmptyLines[:20]:  # Show more lines
                    if line.strip():
                        resultLines.append(f'{childIndent}{line}')
                if len(nonEmptyLines) > 20:
                    resultLines.append(f'{childIndent}... ({len(nonEmptyLines) - 20} more lines) ...')
        elif len(pathContent) > 500:
            # Content too large - show placeholder
            resultLines.append(f'{childIndent}... (content too large, {len(pathContent)} chars) ...')
        
        # Always show the key leading to next level if it exists
        # The recursive call will show the opening bracket/brace, so we just show the key here
        if nextLevelKey:
            # Show the key (the recursive call will add the opening bracket/brace)
            # Actually, the recursive call already shows the full opening with key,
            # so we don't need to show it here - just let the recursive call handle it
            pass
        
        # Recursively build next level (this will show its opening structure and content)
        _buildNestedHierarchy(resultLines, jsonContent, hierarchy, levelIndex + 1, breakPosition, cutPiece, cutLevel)
    
    # Close this level
    resultLines.append(f'{indent}}}' if levelType == 'object' else f'{indent}]')


def _findCompleteElementsAtLevel(
    jsonContent: str, 
    levelStart: int, 
    breakPosition: int, 
    targetDepth: int
) -> List[Tuple[int, int]]:
    """
    Find all complete elements at a specific depth level.
    
    Elements inside the structure at targetDepth are at targetDepth + 1.
    We track depth relative to the start of the structure.
    
    Returns list of (start, end) tuples for complete elements.
    """
    completeElements = []
    
    # Track depth relative to the level start
    # When we're at levelStart, we're at the opening bracket/brace (depth = targetDepth)
    # Elements inside are at depth = targetDepth + 1
    relativeDepth = 0  # Depth relative to level start (0 = at opening bracket/brace)
    inString = False
    escapeNext = False
    currentElementStart = None
    
    # Find the first non-whitespace character after the opening bracket/brace
    for i in range(levelStart + 1, min(breakPosition, len(jsonContent))):
        if jsonContent[i] not in [' ', '\n', '\r', '\t']:
            currentElementStart = i
            break
    
    if currentElementStart is None:
        return completeElements
    
    for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
        char = jsonContent[i]
        
        if escapeNext:
            escapeNext = False
            continue
        
        if char == '\\':
            escapeNext = True
            continue
        
        if char == '"':
            inString = not inString
            continue
        
        if not inString:
            if char == '{':
                relativeDepth += 1
            elif char == '}':
                relativeDepth -= 1
                # Element is complete when we return to the level's depth (relativeDepth == 0)
                if relativeDepth == 0:
                    # Found end of complete element
                    if currentElementStart is not None:
                        completeElements.append((currentElementStart, i + 1))
                    # Find start of next element
                    j = i + 1
                    while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
                        j += 1
                    if j < breakPosition:
                        currentElementStart = j
                    else:
                        currentElementStart = None
            elif char == '[':
                relativeDepth += 1
            elif char == ']':
                relativeDepth -= 1
                # Element is complete when we return to the level's depth (relativeDepth == 0)
                if relativeDepth == 0:
                    # Found end of complete element
                    if currentElementStart is not None:
                        completeElements.append((currentElementStart, i + 1))
                    # Find start of next element
                    j = i + 1
                    while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
                        j += 1
                    if j < breakPosition:
                        currentElementStart = j
                    else:
                        currentElementStart = None
            elif char == ',':
                # Comma at relativeDepth == 0 means we're between elements at the cut level
                if relativeDepth == 0:
                    # Element boundary - check if we have a complete element
                    if currentElementStart is not None and currentElementStart < i:
                        # Simple value (string, number, boolean, null) - complete at comma
                        completeElements.append((currentElementStart, i))
                    # Find start of next element
                    j = i + 1
                    while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
                        j += 1
                    if j < breakPosition:
                        currentElementStart = j
                    else:
                        currentElementStart = None
    
    return completeElements


def extractCutPiece(jsonContent: str, breakPosition: int) -> str:
    """
    Extract the incomplete piece at the cut point.
    Generic function that works with all JSON types: arrays, objects, strings, numbers, booleans, null.
    
    CRITICAL: Uses findStructureHierarchy to identify the cut level, then parses from the cut level start
    to correctly identify which element contains the break position.
    This approach handles all JSON structures generically, including:
    - Nested objects and arrays
    - Strings containing brackets, braces, commas
    - Complex nested structures
    
    Returns the incomplete element from its start to the break position.
    """
    if breakPosition <= 0 or breakPosition > len(jsonContent):
        return ""
    
    # First, find the structure hierarchy to identify the cut level
    hierarchy = findStructureHierarchy(jsonContent, breakPosition)
    if not hierarchy:
        # Fallback: return content before break
        return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
    
    # Get the cut level (the structure containing the break position)
    cutLevel = hierarchy[-1]
    cutLevelStart = cutLevel['start_pos']
    cutLevelDepth = cutLevel.get('depth', 0)
    
    # Parse from cutLevelStart to breakPosition to find element boundaries
    braceDepth = 0  # Absolute brace depth
    bracketDepth = 0  # Absolute bracket depth
    inString = False
    escapeNext = False
    
    # Track element start at the cut level
    currentElementStart = cutLevelStart  # Start of current element
    
    # Parse from cut level start to break position
    for i in range(cutLevelStart, min(breakPosition, len(jsonContent))):
        char = jsonContent[i]
        
        if escapeNext:
            escapeNext = False
            continue
        
        if char == '\\':
            escapeNext = True
            continue
        
        if char == '"':
            inString = not inString
            continue
        
        if not inString:
            if char == '{':
                braceDepth += 1
            elif char == '}':
                braceDepth -= 1
            elif char == '[':
                bracketDepth += 1
            elif char == ']':
                bracketDepth -= 1
            elif char == ',':
                # Comma at cut level separates elements
                currentDepth = braceDepth + bracketDepth
                if currentDepth == cutLevelDepth:
                    # This comma is at the cut level - next element starts after it
                    j = i + 1
                    while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
                        j += 1
                    if j < breakPosition:
                        currentElementStart = j
            elif char == ':':
                # Colon at cut level separates key from value
                currentDepth = braceDepth + bracketDepth
                if currentDepth == cutLevelDepth:
                    # This colon is at the cut level - value starts after it
                    j = i + 1
                    while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
                        j += 1
                    if j < breakPosition:
                        currentElementStart = j
    
    # The element containing breakPosition starts at currentElementStart
    # Find the actual start by skipping leading whitespace
    actualStart = currentElementStart
    for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
        char = jsonContent[i]
        if char not in [' ', '\n', '\r', '\t']:
            actualStart = i
            break
    
    # Extract the incomplete piece from actualStart to breakPosition
    # Preserve trailing whitespace - it's needed for merging
    cutPiece = jsonContent[actualStart:breakPosition]
    
    # Remove leading whitespace but preserve trailing whitespace
    cutPiece = cutPiece.lstrip()
    
    return cutPiece if cutPiece else jsonContent[actualStart:breakPosition]


def findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]:
    """
    Find the structure hierarchy backwards from break point to root.
    
    Returns list of level info dicts, from root to cut level.
    Each level has: type, start_pos, end_pos, depth, key
    
    CRITICAL: Returns the path from root to cut point.
    - For closed structures: uses actual end position
    - For open structures: uses breakPosition
    """
    hierarchy = []
    
    # Track depth and positions
    braceDepth = 0
    bracketDepth = 0
    inString = False
    escapeNext = False
    
    # Track ALL structures (both closed and open) to get correct end positions
    # Stack of (type, start_pos, depth, end_pos)
    # end_pos is None until structure is closed
    structureStack = []  # Stack of (type, start_pos, depth, end_pos)
    closedStructures = []  # List of closed structures with their end positions
    
    for i in range(breakPosition):
        if i >= len(jsonContent):
            break
            
        char = jsonContent[i]
        
        if escapeNext:
            escapeNext = False
            continue
        
        if char == '\\':
            escapeNext = True
            continue
        
        if char == '"':
            inString = not inString
            continue
        
        if not inString:
            if char == '{':
                # Store depth BEFORE incrementing (this is the level of the structure being opened)
                currentDepth = braceDepth + bracketDepth
                structureStack.append(('object', i, currentDepth, None))
                braceDepth += 1
            elif char == '}':
                # When closing, record the end position and move to closed structures
                if structureStack and structureStack[-1][0] == 'object':
                    structType, start, depth, _ = structureStack.pop()
                    closedStructures.append({
                        'type': structType,
                        'start_pos': start,
                        'end_pos': i + 1,  # Actual end position
                        'depth': depth,
                        'key': findKeyBefore(jsonContent, start)
                    })
                braceDepth -= 1
            elif char == '[':
                # Store depth BEFORE incrementing
                currentDepth = braceDepth + bracketDepth
                structureStack.append(('array', i, currentDepth, None))
                bracketDepth += 1
            elif char == ']':
                # When closing, record the end position
                if structureStack and structureStack[-1][0] == 'array':
                    structType, start, depth, _ = structureStack.pop()
                    closedStructures.append({
                        'type': structType,
                        'start_pos': start,
                        'end_pos': i + 1,  # Actual end position
                        'depth': depth,
                        'key': findKeyBefore(jsonContent, start)
                    })
                bracketDepth -= 1
    
    # Build hierarchy: we need the actual path from root to cut level
    # CRITICAL: Only include structures that are actually on the path
    # A structure is on the path if it contains the next level's start position
    
    if not structureStack:
        # No open structures - all were closed before break
        # Return path to deepest closed structure
        if closedStructures:
            maxDepth = max(s['depth'] for s in closedStructures)
            # Build path: each level must contain the next level
            path = []
            for depth in range(maxDepth + 1):
                candidates = [s for s in closedStructures if s['depth'] == depth]
                if candidates:
                    # If multiple at same depth, use the one that contains structures at deeper depths
                    if depth < maxDepth:
                        # Find the one that contains a structure at depth + 1
                        nextDepthCandidates = [s for s in closedStructures if s['depth'] == depth + 1]
                        if nextDepthCandidates:
                            nextStart = min(s['start_pos'] for s in nextDepthCandidates)
                            # Find candidate that contains nextStart
                            for candidate in candidates:
                                if candidate['start_pos'] < nextStart < candidate['end_pos']:
                                    path.append(candidate)
                                    break
                            else:
                                # Fallback: use first candidate
                                path.append(candidates[0])
                        else:
                            path.append(candidates[0])
                    else:
                        path.append(candidates[0])
            return path
        return []
    
    # We have open structures - build path from root to deepest open structure
    # Strategy: Start from deepest open structure and work backwards to root,
    # ensuring each level contains the next level
    
    openByDepth = {}
    for structType, start, depth, _ in structureStack:
        openByDepth[depth] = {
            'type': structType,
            'start_pos': start,
            'end_pos': breakPosition,
            'depth': depth,
            'key': findKeyBefore(jsonContent, start)
        }
    
    maxOpenDepth = max(openByDepth.keys())
    
    # Build path backwards from deepest to root
    path = []
    currentDepth = maxOpenDepth
    currentStart = openByDepth[maxOpenDepth]['start_pos']
    
    while currentDepth >= 0:
        # Look for structure at currentDepth that contains currentStart
        # First check open structures
        if currentDepth in openByDepth:
            struct = openByDepth[currentDepth]
            if struct['start_pos'] <= currentStart:
                path.insert(0, struct)
                currentStart = struct['start_pos']
                currentDepth -= 1
                continue
        
        # Check closed structures
        candidates = [s for s in closedStructures if s['depth'] == currentDepth and s['start_pos'] <= currentStart < s['end_pos']]
        if candidates:
            # Use the one that ends latest (most recent)
            struct = max(candidates, key=lambda x: x['end_pos'])
            path.insert(0, struct)
            currentStart = struct['start_pos']
            currentDepth -= 1
        else:
            # No structure found at this depth - break
            break
    
    return path
    
    # Return the hierarchy (path from root to cut level)
    if hierarchy:
        return hierarchy
    
    # Fallback: if JSON starts with { or [, create a root level
    if jsonContent and jsonContent.strip():
        firstChar = jsonContent.strip()[0]
        if firstChar == '{':
            return [{
                'type': 'object',
                'start_pos': 0,
                'end_pos': breakPosition,
                'depth': 0,
                'key': None
            }]
        elif firstChar == '[':
            return [{
                'type': 'array',
                'start_pos': 0,
                'end_pos': breakPosition,
                'depth': 0,
                'key': None
            }]
    
    return []


def extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
    """
    Extract overlap context: the object containing the cut element.
    
    Returns ONLY the object containing the cut element (the incomplete element itself).
    This is what the continuation should start with for proper merging.
    
    CRITICAL: Preserves trailing whitespace for proper merging.
    
    Args:
        jsonContent: The incomplete JSON string
        breakPosition: Position where JSON was cut
        
    Returns:
        String with the object containing the cut element
    """
    if not jsonContent or breakPosition <= 0:
        return jsonContent[-200:].strip() if jsonContent else ""
    
    # Extract cut piece (incomplete element) - this is the object containing the cut element
    cutPiece = extractCutPiece(jsonContent, breakPosition)
    
    # Return only the cut piece - the object containing the cut element
    if cutPiece:
        return cutPiece
    
    # Fallback: show content before break
    return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()


def findKeyBefore(jsonContent: str, pos: int) -> Optional[str]:
    """Find the key name before a structure start position."""
    # Look backwards for "key": pattern
    before = jsonContent[max(0, pos - 100):pos]
    match = re.search(r'"([^"]+)"\s*:\s*[{\[]\s*$', before)
    if match:
        return match.group(1)
    return None


def _formatLevelContext(level: Dict[str, Any], jsonContent: str, maxContentChars: int = 1000) -> str:
    """Format a level in the hierarchy for display."""
    levelType = level['type']
    start = level['start_pos']
    end = level['end_pos']
    key = level.get('key')
    
    # Get content for this level
    levelContent = jsonContent[start:end]
    
    # If content is too large, show only metadata
    if len(levelContent) > maxContentChars:
        # Show opening and key if available
        if key:
            return f'  "{key}": {levelType} (content too large, {len(levelContent)} chars)'
        else:
            return f'  {levelType} (content too large, {len(levelContent)} chars)'
    else:
        # Show full content (formatted)
        indent = "  " * level['depth']
        if key:
            return f'{indent}"{key}": {levelContent[:maxContentChars]}'
        else:
            return f'{indent}{levelContent[:maxContentChars]}'


def _formatCutLevelContextDetailed(level: Dict[str, Any], cutPiece: str, jsonContent: str, breakPosition: int) -> str:
    """
    Format the cut level showing detailed hierarchy as per user instruction:
    1. Cut piece level: element of a list (the incomplete element)
    2. Parent of the cut element: the list containing the cut piece (with cut point shown)
    3. Last complete object on the same level like the cut object (if exists) PLUS further 
       previous content from the json string (maximum 1000 characters)
    """
    levelType = level['type']
    start = level['start_pos']
    key = level.get('key')
    
    # Get content before break point in this level
    contentBeforeBreak = jsonContent[start:breakPosition]
    
    result = []
    
    if levelType == 'array':
        # Step 3: Show last complete elements on same level + previous content (max 1000 chars)
        # Extract last complete array elements with context (up to 1000 chars)
        lastCompleteElements = _extractLastCompleteArrayElementsWithContext(
            contentBeforeBreak, jsonContent, start, maxChars=1000
        )
        if lastCompleteElements:
            result.append("3. Last complete elements on same level (plus previous content, max 1000 chars):")
            result.append(lastCompleteElements)
            result.append("")
        
        # Step 2: Show parent container (the list) with cut piece
        # Find the array element that contains the cut piece
        cutArrayElement = _findCutArrayElement(jsonContent, breakPosition, start)
        if cutArrayElement:
            result.append("2. Parent container (list containing cut piece):")
            result.append(f"  {cutArrayElement}")
        else:
            # Fallback: show cut piece directly
            cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
            result.append("2. Parent container (list containing cut piece):")
            result.append(f"  {cutPart}")
        result.append("")
        
        # Step 1: Show cut piece (incomplete element at cut point)
        result.append("1. Cut piece level (incomplete element at cut point):")
        if cutPiece:
            result.append(f"  {cutPiece}")
        else:
            cutPart = jsonContent[breakPosition:breakPosition + 50].strip()
            result.append(f"  {cutPart}")
    else:
        # Object - show structure with cut point
        result.append("Cut point in object:")
        cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
        preview = contentBeforeBreak[-500:] if len(contentBeforeBreak) > 500 else contentBeforeBreak
        result.append(f"  {preview}... {cutPart} <-- CUT POINT")
    
    return "\n".join(result)


def _formatParentLevelContext(level: Dict[str, Any], jsonContent: str, maxContentChars: int = 1000) -> str:
    """
    Format a parent level showing content (if small enough) or metadata only.
    Used for levels above the cut level, showing path to root.
    """
    levelType = level['type']
    start = level['start_pos']
    end = level['end_pos']
    key = level.get('key')
    
    # Get content for this level
    levelContent = jsonContent[start:end]
    
    # If content is too large, show only metadata
    if len(levelContent) > maxContentChars:
        # Show opening structure with key if available
        opening = jsonContent[start:start + 200].strip()
        if key:
            return f'  "{key}": {levelType} (content too large, {len(levelContent)} chars)\n    {opening}...'
        else:
            return f'  {levelType} (content too large, {len(levelContent)} chars)\n    {opening}...'
    else:
        # Show full content (formatted, but limit to maxContentChars)
        content = levelContent[:maxContentChars]
        if key:
            return f'  "{key}": {content}'
        else:
            return f'  {content}'


def _extractLastCompleteArrayElementsWithContext(
    arrayContent: str, fullJsonContent: str, arrayStart: int, maxChars: int = 1000
) -> str:
    """
    Extract last complete array elements PLUS further previous content from json string (max 1000 chars).
    
    This shows:
    - Last complete elements on the same level as the cut element
    - Additional previous content from the JSON string (up to maxChars total)
    """
    # First, extract last complete elements from arrayContent
    completeElements = []
    currentElement = ""
    braceDepth = 0
    bracketDepth = 0
    inString = False
    escapeNext = False
    totalChars = 0
    
    # Parse backwards to find complete elements
    for i in range(len(arrayContent) - 1, -1, -1):
        char = arrayContent[i]
        
        if escapeNext:
            escapeNext = False
            currentElement = char + currentElement
            continue
        
        if char == '\\':
            escapeNext = True
            currentElement = char + currentElement
            continue
        
        if char == '"':
            inString = not inString
            currentElement = char + currentElement
            continue
        
        if not inString:
            if char == '}':
                braceDepth += 1
                currentElement = char + currentElement
            elif char == '{':
                braceDepth -= 1
                currentElement = char + currentElement
                if braceDepth == 0 and bracketDepth == 0:
                    # Found complete element
                    element = currentElement.strip()
                    if element and element[0] in ['{', '[']:
                        completeElements.insert(0, element)
                        totalChars += len(element)
                        if totalChars >= maxChars:
                            break
                        currentElement = ""
            elif char == ']':
                bracketDepth += 1
                currentElement = char + currentElement
            elif char == '[':
                bracketDepth -= 1
                currentElement = char + currentElement
                if braceDepth == 0 and bracketDepth == 0:
                    # Found complete element
                    element = currentElement.strip()
                    if element and element[0] == '[':
                        completeElements.insert(0, element)
                        totalChars += len(element)
                        if totalChars >= maxChars:
                            break
                        currentElement = ""
            elif char == ',' and braceDepth == 0 and bracketDepth == 0:
                # Element boundary
                if currentElement.strip():
                    element = currentElement.strip()
                    if element and element[0] in ['{', '[', '"']:
                        completeElements.insert(0, element)
                        totalChars += len(element)
                        if totalChars >= maxChars:
                            break
                    currentElement = ""
            else:
                currentElement = char + currentElement
    
    # Format the elements
    if completeElements:
        # Show last few complete elements (up to maxChars)
        formattedElements = []
        charsUsed = 0
        for elem in reversed(completeElements):  # Show from newest to oldest
            if charsUsed + len(elem) <= maxChars:
                formattedElements.insert(0, elem)
                charsUsed += len(elem)
            else:
                break
        
        if formattedElements:
            # Format as JSON array rows (without hardcoded indentation - caller will add it)
            result = []
            for elem in formattedElements:
                # Remove leading comma if present (from mid-element extraction)
                cleanElem = elem.lstrip(',').strip()
                if cleanElem:
                    result.append(f"{cleanElem},")
            return "\n".join(result)
    
    return ""


def _findCutArrayElement(jsonContent: str, breakPosition: int, arrayStart: int) -> Optional[str]:
    """Find the array element that contains the cut piece."""
    # Look backwards from break position to find the start of the current array element
    braceDepth = 0
    bracketDepth = 0
    inString = False
    escapeNext = False
    elementStart = -1
    
    # Search backwards from break position
    for i in range(breakPosition - 1, arrayStart - 1, -1):
        if i < 0:
            break
            
        char = jsonContent[i]
        
        if escapeNext:
            escapeNext = False
            continue
        
        if char == '\\':
            escapeNext = True
            continue
        
        if char == '"':
            inString = not inString
            continue
        
        if not inString:
            if char == '}':
                braceDepth += 1
            elif char == '{':
                braceDepth -= 1
                if braceDepth == 0 and bracketDepth == 0:
                    elementStart = i
                    break
            elif char == ']':
                bracketDepth += 1
            elif char == '[':
                bracketDepth -= 1
                if braceDepth == 0 and bracketDepth == 0:
                    elementStart = i
                    break
            elif char == ',' and braceDepth == 0 and bracketDepth == 0:
                # Found element boundary
                elementStart = i + 1
                break
    
    if elementStart >= 0:
        # Extract the element (including incomplete part)
        elementContent = jsonContent[elementStart:breakPosition + 100].strip()
        # Clean up - remove leading comma if present
        if elementContent.startswith(','):
            elementContent = elementContent[1:].strip()
        return elementContent[:300]  # Limit length
    
    return None


def _extractLastCompleteArrayElements(arrayContent: str, maxChars: int = 1000) -> str:
    """Extract last complete array elements, up to maxChars."""
    # Count complete elements from the end
    elements = []
    currentElement = ""
    braceDepth = 0
    bracketDepth = 0
    inString = False
    escapeNext = False
    totalChars = 0
    
    # Parse backwards to find complete elements
    for i in range(len(arrayContent) - 1, -1, -1):
        char = arrayContent[i]
        
        if escapeNext:
            escapeNext = False
            currentElement = char + currentElement
            continue
        
        if char == '\\':
            escapeNext = True
            currentElement = char + currentElement
            continue
        
        if char == '"':
            inString = not inString
            currentElement = char + currentElement
            continue
        
        if not inString:
            if char == '}':
                braceDepth += 1
                currentElement = char + currentElement
            elif char == '{':
                braceDepth -= 1
                currentElement = char + currentElement
                if braceDepth == 0 and bracketDepth == 0:
                    # Found complete element
                    element = currentElement.strip()
                    if element and element[0] in ['{', '[']:
                        elements.insert(0, element)
                        totalChars += len(element)
                        if totalChars >= maxChars:
                            break
                        currentElement = ""
            elif char == ']':
                bracketDepth += 1
                currentElement = char + currentElement
            elif char == '[':
                bracketDepth -= 1
                currentElement = char + currentElement
                if braceDepth == 0 and bracketDepth == 0:
                    # Found complete element
                    element = currentElement.strip()
                    if element and element[0] == '[':
                        elements.insert(0, element)
                        totalChars += len(element)
                        if totalChars >= maxChars:
                            break
                        currentElement = ""
            elif char == ',' and braceDepth == 0 and bracketDepth == 0:
                # Element boundary
                if currentElement.strip():
                    element = currentElement.strip()
                    if element and element[0] in ['{', '[', '"']:
                        elements.insert(0, element)
                        totalChars += len(element)
                        if totalChars >= maxChars:
                            break
                    currentElement = ""
            else:
                currentElement = char + currentElement
    
    if elements:
        indent = "    "
        formatted = ",\n".join([f"{indent}{elem}" for elem in elements[-5:]])  # Show last 5 elements
        if len(elements) > 5:
            formatted = f"... ({len(elements) - 5} more elements) ...\n{formatted}"
        return formatted
    
    return ""


def _extractStructureContext(jsonContent: str, incompletePart: str, lastCompletePart: str = "") -> str:
    """
    Extract structure context showing WHERE in the structure the last complete and incomplete elements are.
    
    Returns a clear description of the structure context for the broken element.
    """
    import json
    import re
    
    if not incompletePart:
        # No incomplete part extracted - try to show context from raw JSON
        try:
            # Show last part of JSON to indicate where it broke
            lastPart = jsonContent[-300:] if len(jsonContent) > 300 else jsonContent
            return f"Structure context unavailable. Last part of response:\n{lastPart}"
        except Exception:
            return "Structure context unavailable - response was completely broken"
    
    # Find where incomplete part starts
    incompleteStart = jsonContent.find(incompletePart)
    if incompleteStart == -1:
        incompleteStart = len(jsonContent)
    
    # Try to extract the structure context showing the broken element
    try:
        # Get the part before incomplete to understand structure
        beforeIncomplete = jsonContent[:incompleteStart]
        
        # Try to find the array/object context where the break occurred
        # Look for the last complete structure before the break
        structureContext = ""
        
        # Try to parse what we have before the incomplete part
        try:
            closed = closeJsonStructures(beforeIncomplete)
            parsed = json.loads(closed)
            
            # Build structure showing where we are
            if isinstance(parsed, dict) and "elements" in parsed:
                elements = parsed.get("elements", [])
                if isinstance(elements, list):
                    structureContext = f"Structure: elements array with {len(elements)} complete elements\n"
                    structureContext += f"Break occurred in element at index {len(elements)}"
                else:
                    structureContext = "Structure: elements (not an array)"
            else:
                structureContext = "Structure: " + json.dumps(_buildStructureContext(parsed), indent=2, ensure_ascii=False)
        except Exception:
            # Can't parse - show raw context
            structureContext = f"Structure parsing failed. Context before break:\n{beforeIncomplete[-200:]}"
        
        return structureContext
        
    except Exception:
        # Fallback: show minimal context
        return f"Structure context unavailable. Break occurred at position {incompleteStart} in JSON string"


def _findElementPath(parsed: Any, elementStr: str, originalJson: str, isIncomplete: bool = False) -> str:
    """
    Find the path to an element in the parsed JSON structure.
    
    Returns a path like "elements[2]" or "documents[0].chapters[1].sections[3]"
    """
    import json
    
    if not elementStr or not elementStr.strip():
        return ""
    
    # Strategy: Find position in original JSON string, then determine path from structure
    elementStart = originalJson.find(elementStr.strip())
    if elementStart == -1:
        return ""
    
    # Find the array context by looking backwards from element position
    beforeElement = originalJson[:elementStart]
    
    # Find the nearest array declaration before this position
    # Look for patterns like "elements": [ or "chapters": [
    arrayPattern = r'"(\w+)"\s*:\s*\['
    matches = list(re.finditer(arrayPattern, beforeElement))
    if not matches:
        return ""
    
    # Get the most recent array (closest to element)
    lastMatch = matches[-1]
    arrayName = lastMatch.group(1)
    arrayStartPos = lastMatch.end()
    
    # Count complete array elements before this position
    arrayContent = beforeElement[arrayStartPos:]
    
    # Count complete objects (balanced braces) - each complete object is an array element
    braceCount = 0
    elementIndex = 0
    inString = False
    escapeNext = False
    lastCompleteObjectEnd = -1
    
    for i, char in enumerate(arrayContent):
        if escapeNext:
            escapeNext = False
            continue
        if char == '\\':
            escapeNext = True
            continue
        if char == '"':
            inString = not inString
            continue
        if not inString:
            if char == '{':
                if braceCount == 0:
                    # Start of new object
                    elementIndex += 1
                braceCount += 1
            elif char == '}':
                braceCount -= 1
                if braceCount == 0:
                    # End of complete object
                    lastCompleteObjectEnd = i
    
    # Determine the index
    # If we're looking for incomplete element, it's at the current elementIndex
    # If we're looking for last complete element, it's at elementIndex - 1
    if isIncomplete:
        index = elementIndex
    else:
        index = elementIndex - 1 if elementIndex > 0 else 0
    
    # Build the full path by traversing the parsed structure
    def _buildPathToArray(obj: Any, targetArrayName: str, targetIndex: int, currentPath: str = "") -> Optional[str]:
        """Recursively find path to array element."""
        if isinstance(obj, dict):
            for key, value in obj.items():
                newPath = f"{currentPath}.{key}" if currentPath else key
                if key == targetArrayName and isinstance(value, list):
                    # Found the target array
                    if 0 <= targetIndex < len(value):
                        return f"{newPath}[{targetIndex}]"
                    elif targetIndex >= len(value):
                        # Index beyond array - return array path with index
                        return f"{newPath}[{targetIndex}]"
                result = _buildPathToArray(value, targetArrayName, targetIndex, newPath)
                if result:
                    return result
        elif isinstance(obj, list):
            for i, item in enumerate(obj):
                result = _buildPathToArray(item, targetArrayName, targetIndex, currentPath)
                if result:
                    return result
        return None
    
    # Try to find full path in parsed structure
    fullPath = _buildPathToArray(parsed, arrayName, index)
    if fullPath:
        return fullPath
    
    # Fallback: return simple array path
    return f"{arrayName}[{index}]"


def _buildStructureContext(obj: Any, maxDepth: int = 5) -> Any:
    """
    Build structure context (metadata only, no content).
    Similar to _buildStructureTemplate but focuses on parent structure.
    """
    if isinstance(obj, dict):
        structure = {}
        for key, value in obj.items():
            if isinstance(value, (dict, list)):
                structure[key] = _buildStructureContext(value, maxDepth - 1) if maxDepth > 0 else []
            else:
                # Skip content values - only keep structure
                pass
        return structure
    elif isinstance(obj, list) and obj:
        # Return empty list structure (no content)
        return []
    else:
        return None


def _findIncompleteSectionInRaw(raw_json: str) -> Optional[Dict[str, Any]]:
    """
    Find the incomplete section in raw JSON.
    
    CRITICAL: JSON can be cut off mid-element (e.g., {"text": "20327,20)
    We need to find the last section and check if it's incomplete.
    """
    try:
        # Try to parse documents structure
        if '"documents"' in raw_json:
            # Find last document
            doc_start = raw_json.rfind('"documents"')
            if doc_start >= 0:
                doc_section = raw_json[doc_start:]
                # Try to find sections array
                sections_start = doc_section.find('"sections"')
                if sections_start >= 0:
                    sections_section = doc_section[sections_start:]
                    # Find sections array start
                    array_start = sections_section.find('[')
                    if array_start >= 0:
                        # Find all complete sections
                        section_objects = []
                        depth = 0
                        section_start = None
                        
                        for i in range(array_start, len(sections_section)):
                            if sections_section[i] == '{':
                                if depth == 0:
                                    section_start = i
                                depth += 1
                            elif sections_section[i] == '}':
                                depth -= 1
                                if depth == 0 and section_start is not None:
                                    # Found complete section
                                    section_str = sections_section[section_start:i+1]
                                    try:
                                        section_obj = json.loads('{' + section_str + '}')
                                        section_objects.append(section_obj)
                                    except:
                                        pass
                                    section_start = None
                        
                        # CRITICAL: Check if there's content after the last complete section
                        # If JSON ends mid-element, the last section is incomplete
                        if section_objects:
                            # Find position after last complete section
                            last_section_end = sections_section.rfind('}')
                            if last_section_end >= 0:
                                # Check if there's more content after the last }
                                remaining_after_last_section = sections_section[last_section_end+1:].strip()
                                # Remove closing brackets/braces that might be there
                                remaining_after_last_section = remaining_after_last_section.lstrip('],}')
                                
                                # If there's still content (like incomplete element), section is incomplete
                                if remaining_after_last_section and not remaining_after_last_section.startswith(']'):
                                    # Last section is incomplete - return it
                                    return section_objects[-1]
                                
                                # Also check: if we can't parse the full sections array, last section is incomplete
                                try:
                                    # Try to parse the sections array
                                    sections_array_str = sections_section[array_start:]
                                    json.loads(sections_array_str)
                                    # Parsed successfully - all sections complete
                                    return None
                                except:
                                    # Cannot parse - last section is incomplete
                                    return section_objects[-1] if section_objects else None
    except Exception as e:
        logger.debug(f"Error finding incomplete section: {e}")
    
    return None


def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> Tuple[Optional[str], Optional[str]]:
    """Extract cut-off element and element before from incomplete section."""
    cut_off_element = None
    element_before_cutoff = None
    
    elements = incomplete_section.get("elements", [])
    if not elements:
        return None, None
    
    # CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number
    # Deliver the cut-off part AS-IS (don't try to "complete" it)
    
    if isinstance(elements, list):
        # Find last element (might be incomplete)
        if elements:
            # Edge case: If cut-off is in first element, just show cut-off element
            if len(elements) == 1:
                # Only one element - might be cut-off
                last_elem = elements[0]
                if isinstance(last_elem, dict):
                    # Check if element contains nested content (e.g., code_block with JSON string)
                    cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
                    if not cut_off_element:
                        cut_off_element = json.dumps(last_elem)
                else:
                    cut_off_element = str(last_elem)
            else:
                # Multiple elements - last one might be cut-off, get element before
                element_before_cutoff = json.dumps(elements[-2]) if isinstance(elements[-2], dict) else str(elements[-2])
                last_elem = elements[-1]
                if isinstance(last_elem, dict):
                    # Check if element contains nested content
                    cut_off_element = _extractCutOffFromElement(last_elem, raw_json)
                    if not cut_off_element:
                        cut_off_element = json.dumps(last_elem)
                else:
                    cut_off_element = str(last_elem)
    elif isinstance(elements, dict):
        # Single element - might be cut-off
        cut_off_element = _extractCutOffFromElement(elements, raw_json)
        if not cut_off_element:
            cut_off_element = json.dumps(elements)
    
    # If we couldn't extract from parsed structure, extract from raw JSON
    if not cut_off_element:
        # Extract the last incomplete part from raw JSON
        # Find the last incomplete string/number/array
        # re is already imported at module level
        # Look for incomplete string at the end
        incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL)
        if incomplete_match:
            cut_off_element = incomplete_match.group(1)
        else:
            # Look for incomplete number
            number_match = re.search(r'(\d+\.?\d*)(?:\s*[,}\]]|$)', raw_json[-200:])
            if number_match:
                cut_off_element = number_match.group(1)
    
    return cut_off_element, element_before_cutoff


def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optional[str]:
    """
    Extract cut-off point from within an element (e.g., code_block with JSON string, table with incomplete rows).
    
    This helps identify where exactly to continue within nested structures.
    """
    # re is already imported at module level
    
    # Check for code_block with nested JSON
    if "code" in element:
        code_content = element.get("code", "")
        if isinstance(code_content, str) and code_content.strip().startswith("{"):
            # This is JSON inside a code string - find where it was cut off
            # Look for the last complete value in the raw JSON
            # Find the code string in raw JSON
            code_match = re.search(r'"code"\s*:\s*"([^"]*?)(?:"|$)', raw_json[-2000:], re.DOTALL)
            if code_match:
                code_str = code_match.group(1)
                # Try to find the last complete value in the JSON string
                # Look for patterns like: [2, 3, 5, ... 17929, (cut off here)
                array_match = re.search(r'\[([^\]]*?)(?:\]|$)', code_str, re.DOTALL)
                if array_match:
                    array_content = array_match.group(1)
                    # Find last complete number/item
                    # Match: number followed by comma or end
                    last_complete = re.findall(r'(\d+)\s*[,]', array_content)
                    if last_complete:
                        last_num = last_complete[-1]
                        # Return context showing where to continue
                        return f'{{"code": "{{\\"primes\\": [... up to {last_num}, <CONTINUE FROM HERE>]"}}'
    
    # Check for table with incomplete rows
    if "rows" in element:
        rows = element.get("rows", [])
        if isinstance(rows, list) and rows:
            # Find last complete row in raw JSON
            rows_str = str(rows)
            # Try to find where rows were cut off
            last_row_match = re.search(r'\[([^\]]*?)(?:\]|$)', raw_json[-1000:], re.DOTALL)
            if last_row_match:
                return f'{{"rows": [... last complete row shown above, <CONTINUE FROM HERE>]}}'
    
    # Check for list items
    if "items" in element:
        items = element.get("items", [])
        if isinstance(items, list) and items:
            # Find last complete item
            last_item_match = re.search(r'"([^"]*?)"\s*(?:,|\])', raw_json[-1000:], re.DOTALL)
            if last_item_match:
                return f'{{"items": [... last item shown above, <CONTINUE FROM HERE>]}}'
    
    return None


def _extractCutOffElementsFromRaw(raw_json: str, allSections: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
    """
    Extract cut-off element directly from raw JSON when section parsing fails.
    
    This handles ALL cases where JSON is cut off:
    - Mid-element (incomplete element object)
    - Mid-string/number within an element
    - Mid-array within an element (e.g., rows in table, items in list)
    - Mid-nested structure
    
    CRITICAL: In 99% of cases, JSON is cut off mid-string or mid-number - deliver as-is.
    """
    cut_off_element = None
    element_before_cutoff = None
    
    try:
        # Find the last "elements" array in raw JSON
        if '"elements"' in raw_json:
            # Find the last occurrence of "elements"
            last_elements_pos = raw_json.rfind('"elements"')
            if last_elements_pos >= 0:
                elements_section = raw_json[last_elements_pos:]
                
                # Find the array start '['
                array_start = elements_section.find('[')
                if array_start >= 0:
                    # Use a simpler approach: find all element objects by tracking braces
                    # This works even if elements contain nested arrays/objects
                    element_strings = []
                    depth = 0
                    in_string = False
                    escape_next = False
                    elem_start = None
                    
                    for i in range(array_start, len(elements_section)):
                        char = elements_section[i]
                        
                        # Track string state (ignore brackets/braces inside strings)
                        if escape_next:
                            escape_next = False
                            continue
                        if char == '\\':
                            escape_next = True
                            continue
                        if char == '"' and not escape_next:
                            in_string = not in_string
                            continue
                        
                        if not in_string:
                            if char == '{':
                                if depth == 0:
                                    elem_start = i
                                depth += 1
                            elif char == '}':
                                depth -= 1
                                if depth == 0 and elem_start is not None:
                                    # Found complete element (all braces closed, even if nested arrays are incomplete)
                                    elem_str = elements_section[elem_start:i+1]
                                    element_strings.append(elem_str)
                                    elem_start = None
                    
                    # Now analyze what we found
                    if element_strings:
                        last_elem = element_strings[-1]
                        last_complete_pos = elements_section.rfind('}')
                        
                        # Check if there's content after the last complete element
                        if last_complete_pos >= 0:
                            remaining = elements_section[last_complete_pos+1:].strip()
                            remaining_clean = remaining.lstrip(',').strip().lstrip(']').strip()
                            
                            # Case 1: Incomplete element after last complete one
                            if remaining_clean and not remaining_clean.startswith(']'):
                                incomplete_start = last_complete_pos + 1
                                while incomplete_start < len(elements_section) and elements_section[incomplete_start] in ' \n\t\r,':
                                    incomplete_start += 1
                                
                                if incomplete_start < len(elements_section):
                                    incomplete_elem_str = elements_section[incomplete_start:].strip()
                                    incomplete_elem_str = incomplete_elem_str.rstrip(']').rstrip('}').rstrip()
                                    cut_off_element = incomplete_elem_str
                                    element_before_cutoff = element_strings[-1]
                            
                            # Case 2: Last element itself is incomplete (cut off in nested structure like rows, items, etc.)
                            else:
                                # Check if JSON is incomplete by analyzing structure
                                # Count unclosed brackets/braces in elements section (ignoring strings)
                                elements_section_braces = 0
                                elements_section_brackets = 0
                                in_str = False
                                esc = False
                                
                                for char in elements_section:
                                    if esc:
                                        esc = False
                                        continue
                                    if char == '\\':
                                        esc = True
                                        continue
                                    if char == '"':
                                        in_str = not in_str
                                        continue
                                    if not in_str:
                                        if char == '{':
                                            elements_section_braces += 1
                                        elif char == '}':
                                            elements_section_braces -= 1
                                        elif char == '[':
                                            elements_section_brackets += 1
                                        elif char == ']':
                                            elements_section_brackets -= 1
                                
                                # Also check raw JSON for unclosed structures
                                raw_braces = 0
                                raw_brackets = 0
                                in_str = False
                                esc = False
                                
                                for char in raw_json:
                                    if esc:
                                        esc = False
                                        continue
                                    if char == '\\':
                                        esc = True
                                        continue
                                    if char == '"':
                                        in_str = not in_str
                                        continue
                                    if not in_str:
                                        if char == '{':
                                            raw_braces += 1
                                        elif char == '}':
                                            raw_braces -= 1
                                        elif char == '[':
                                            raw_brackets += 1
                                        elif char == ']':
                                            raw_brackets -= 1
                                
                                # Check if last element can be parsed
                                last_elem_parsable = False
                                try:
                                    json.loads(last_elem)
                                    last_elem_parsable = True
                                except:
                                    pass
                                
                                # Determine if last element is incomplete
                                is_incomplete = False
                                
                                # If there are unclosed structures, element is incomplete
                                if elements_section_brackets > 0 or elements_section_braces > 0 or raw_brackets > 0 or raw_braces > 0:
                                    is_incomplete = True
                                
                                # If element cannot be parsed, it's incomplete
                                elif not last_elem_parsable:
                                    is_incomplete = True
                                
                                # Check if JSON ends mid-element by finding where element ends in raw JSON
                                elif last_elem_parsable:
                                    # Find where this element ends in the raw JSON
                                    elem_end_marker = last_elem[-100:] if len(last_elem) > 100 else last_elem
                                    elem_end_in_raw = raw_json.rfind(elem_end_marker)
                                    
                                    if elem_end_in_raw >= 0:
                                        actual_elem_end = elem_end_in_raw + len(last_elem)
                                        
                                        if actual_elem_end < len(raw_json):
                                            remaining_after_elem = raw_json[actual_elem_end:].strip()
                                            remaining_clean = remaining_after_elem.lstrip(',').strip()
                                            
                                            # If there's unexpected content, element is incomplete
                                            if remaining_clean and not remaining_clean.startswith(']'):
                                                is_incomplete = True
                                
                                if is_incomplete:
                                    cut_off_element = last_elem
                                    if len(element_strings) >= 2:
                                        element_before_cutoff = element_strings[-2]
                                    elif len(element_strings) == 1:
                                        element_before_cutoff = last_elem
                    
                    # Case 3: No complete elements found, but there's an incomplete one
                    elif elem_start is not None:
                        # There's an incomplete element that hasn't been closed
                        incomplete_elem_str = elements_section[elem_start:].strip()
                        cut_off_element = incomplete_elem_str
                        # No element before (this is the first/only element)
                        element_before_cutoff = None
    except Exception as e:
        logger.debug(f"Error extracting cut-off elements from raw JSON: {e}")
    
    return cut_off_element, element_before_cutoff


def parseJsonWithModel(jsonString: str, modelClass: Type[T]) -> T:
    """
    Parse JSON string using Pydantic model with error handling.
    
    Uses existing jsonUtils methods:
    - extractJsonString() - Extracts JSON from text with code fences
    - tryParseJson() - Safe parsing with error handling
    - repairBrokenJson() - Repairs broken/incomplete JSON
    
    Args:
        jsonString: JSON string to parse (may contain code fences, extra text, etc.)
        modelClass: Pydantic model class to parse into
    
    Returns:
        Parsed Pydantic model instance
    
    Raises:
        ValueError: If JSON cannot be parsed or validated
    """
    if not jsonString:
        raise ValueError(f"Cannot parse empty JSON string for {modelClass.__name__}")
    
    # Step 1: Extract JSON string (handles code fences, extra text)
    extractedJson = extractJsonString(jsonString)
    
    if not extractedJson or extractedJson.strip() == "":
        raise ValueError(f"No JSON found in string for {modelClass.__name__}")
    
    # Step 2: Try to parse as JSON
    parsedJson, error, cleaned = tryParseJson(extractedJson)
    
    if error is None and parsedJson is not None:
        # Successfully parsed - try to create model
        try:
            if isinstance(parsedJson, dict):
                return modelClass(**parsedJson)
            elif isinstance(parsedJson, list):
                # If model expects a list, try to parse first item
                if parsedJson:
                    return modelClass(**parsedJson[0])
                else:
                    raise ValueError(f"Empty list cannot be parsed as {modelClass.__name__}")
            else:
                raise ValueError(f"Parsed JSON is not a dict or list: {type(parsedJson)}")
        except ValidationError as e:
            logger.error(f"Validation error parsing {modelClass.__name__}: {e}")
            raise ValueError(f"Invalid data for {modelClass.__name__}: {e}")
        except Exception as e:
            logger.error(f"Error creating {modelClass.__name__} instance: {e}")
            raise ValueError(f"Failed to create {modelClass.__name__} instance: {e}")
    
    # Step 3: Try to repair broken JSON
    logger.warning(f"Initial JSON parsing failed, attempting repair for {modelClass.__name__}")
    repairedJson = repairBrokenJson(extractedJson)
    
    if repairedJson:
        # Try parsing repaired JSON
        parsedRepaired, errorRepaired, _ = tryParseJson(json.dumps(repairedJson))
        
        if errorRepaired is None and parsedRepaired is not None:
            try:
                if isinstance(parsedRepaired, dict):
                    return modelClass(**parsedRepaired)
                elif isinstance(parsedRepaired, list) and parsedRepaired:
                    return modelClass(**parsedRepaired[0])
            except ValidationError as e:
                logger.error(f"Validation error parsing repaired {modelClass.__name__}: {e}")
                raise ValueError(f"Invalid repaired data for {modelClass.__name__}: {e}")
            except Exception as e:
                logger.error(f"Error creating {modelClass.__name__} from repaired JSON: {e}")
    
    # Step 4: All parsing failed
    logger.error(f"Failed to parse JSON for {modelClass.__name__}. Cleaned JSON preview: {cleaned[:200]}...")
    raise ValueError(f"Failed to parse or validate JSON for {modelClass.__name__}. JSON may be malformed or incomplete.")