gateway/modules/shared/_safety_copy_jsonContinuation.py

"""
JSON Continuation Context Module

Generiert drei Kontexte für abgeschnittene JSON-Strings:
1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält
2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut mit Budget-Logik
3. Complete Part: Der vollständige Teil des JSONs mit allen Strukturen geschlossen

Hauptfunktionen:
- extractContinuationContexts(truncatedJson: str) -> Tuple[str, str, str]
  Extrahiert alle drei Kontexte aus einem abgeschnittenen JSON-String.

- getContexts(truncatedJson: str) -> JsonContinuationContexts
  Gibt alle Kontexte als Pydantic-Modell zurück mit benannten Feldern.

Modulkonstanten:
- BUDGET_LIMIT: int = 500
  Zeichen-Budget für vollständige Datenwerte im Hierarchy Context

- OVERLAP_MAX_CHARS: int = 1000
  Maximale Zeichen für den Overlap Context

Verwendung:
    >>> from modules.shared.jsonContinuation import getContexts
    >>> jsonStr = '{"users": [{"name": "John", "bio": "Hello Wor'
    >>> contexts = getContexts(jsonStr)
    >>> print(contexts.overlapContext)
    >>> print(contexts.hierarchyContext)
    >>> print(contexts.completePart)

Autor: Claude
Version: 2.0
"""

from typing import Tuple, List, Optional, Any
from dataclasses import dataclass
from enum import Enum
from modules.datamodels.datamodelAi import JsonContinuationContexts


# =============================================================================
# MODULE CONSTANTS
# =============================================================================

BUDGET_LIMIT: int = 500
"""Zeichen-Budget für vollständige Datenwerte im Hierarchy Context"""

OVERLAP_MAX_CHARS: int = 1000
"""Maximale Zeichen für den Overlap Context"""


# =============================================================================
# TOKEN TYPES AND DATA CLASSES
# =============================================================================


class TokenType(Enum):
    """JSON Token Types"""
    OBJECT_START = "{"
    OBJECT_END = "}"
    ARRAY_START = "["
    ARRAY_END = "]"
    STRING = "string"
    NUMBER = "number"
    BOOLEAN = "boolean"
    NULL = "null"
    COLON = ":"
    COMMA = ","
    KEY = "key"
    EOF = "eof"
    TRUNCATED = "truncated"


@dataclass
class Token:
    """Represents a JSON token with position info"""
    type: TokenType
    value: Any
    start_pos: int
    end_pos: int
    raw: str  # Original string representation


@dataclass
class StackFrame:
    """Represents a level in the JSON hierarchy"""
    type: str  # "object" or "array"
    start_pos: int
    key: Optional[str] = None  # Current key for objects
    index: int = 0  # Current index for arrays
    content: str = ""  # Accumulated content for this frame
    keys_seen: List[str] = None  # Keys seen in this object

    def __post_init__(self):
        if self.keys_seen is None:
            self.keys_seen = []


class JsonTokenizer:
    """Tokenizer for potentially truncated JSON strings"""

    def __init__(self, jsonStr: str):
        self.jsonStr = jsonStr
        self.pos = 0
        self.length = len(jsonStr)

    def skipWhitespace(self):
        """Skip whitespace characters"""
        while self.pos < self.length and self.jsonStr[self.pos] in ' \t\n\r':
            self.pos += 1

    def peek(self) -> Optional[str]:
        """Peek at current character without consuming"""
        if self.pos < self.length:
            return self.jsonStr[self.pos]
        return None

    def readString(self) -> Token:
        """Read a JSON string token"""
        start_pos = self.pos
        self.pos += 1  # Skip opening quote

        escaped = False
        while self.pos < self.length:
            char = self.jsonStr[self.pos]
            if escaped:
                escaped = False
                self.pos += 1
            elif char == '\\':
                escaped = True
                self.pos += 1
            elif char == '"':
                self.pos += 1
                raw = self.jsonStr[start_pos:self.pos]
                try:
                    # Try to decode the string value
                    value = raw[1:-1]  # Remove quotes for value
                except:
                    value = raw
                return Token(TokenType.STRING, value, start_pos, self.pos, raw)
            else:
                self.pos += 1

        # String was truncated
        raw = self.jsonStr[start_pos:self.pos]
        return Token(TokenType.TRUNCATED, raw[1:] if len(raw) > 1 else "", start_pos, self.pos, raw)

    def readNumber(self) -> Token:
        """Read a JSON number token"""
        start_pos = self.pos

        # Handle negative
        if self.pos < self.length and self.jsonStr[self.pos] == '-':
            self.pos += 1

        # Read digits
        while self.pos < self.length and self.jsonStr[self.pos].isdigit():
            self.pos += 1

        # Decimal part
        if self.pos < self.length and self.jsonStr[self.pos] == '.':
            self.pos += 1
            while self.pos < self.length and self.jsonStr[self.pos].isdigit():
                self.pos += 1

        # Exponent
        if self.pos < self.length and self.jsonStr[self.pos] in 'eE':
            self.pos += 1
            if self.pos < self.length and self.jsonStr[self.pos] in '+-':
                self.pos += 1
            while self.pos < self.length and self.jsonStr[self.pos].isdigit():
                self.pos += 1

        raw = self.jsonStr[start_pos:self.pos]
        try:
            value = float(raw) if '.' in raw or 'e' in raw.lower() else int(raw)
        except ValueError:
            value = raw

        return Token(TokenType.NUMBER, value, start_pos, self.pos, raw)

    def readKeyword(self) -> Token:
        """Read true, false, or null"""
        start_pos = self.pos

        for keyword, token_type in [('true', TokenType.BOOLEAN),
                                     ('false', TokenType.BOOLEAN),
                                     ('null', TokenType.NULL)]:
            if self.jsonStr[self.pos:].startswith(keyword):
                self.pos += len(keyword)
                value = True if keyword == 'true' else (False if keyword == 'false' else None)
                return Token(token_type, value, start_pos, self.pos, keyword)

        # Partial keyword (truncated)
        while self.pos < self.length and self.jsonStr[self.pos].isalpha():
            self.pos += 1
        raw = self.jsonStr[start_pos:self.pos]
        return Token(TokenType.TRUNCATED, raw, start_pos, self.pos, raw)

    def nextToken(self) -> Token:
        """Get the next token"""
        self.skipWhitespace()

        if self.pos >= self.length:
            return Token(TokenType.EOF, None, self.pos, self.pos, "")

        char = self.jsonStr[self.pos]
        startPos = self.pos

        if char == '{':
            self.pos += 1
            return Token(TokenType.OBJECT_START, '{', startPos, self.pos, '{')
        elif char == '}':
            self.pos += 1
            return Token(TokenType.OBJECT_END, '}', startPos, self.pos, '}')
        elif char == '[':
            self.pos += 1
            return Token(TokenType.ARRAY_START, '[', startPos, self.pos, '[')
        elif char == ']':
            self.pos += 1
            return Token(TokenType.ARRAY_END, ']', startPos, self.pos, ']')
        elif char == ':':
            self.pos += 1
            return Token(TokenType.COLON, ':', startPos, self.pos, ':')
        elif char == ',':
            self.pos += 1
            return Token(TokenType.COMMA, ',', startPos, self.pos, ',')
        elif char == '"':
            return self.readString()
        elif char == '-' or char.isdigit():
            return self.readNumber()
        elif char.isalpha():
            return self.readKeyword()
        else:
            # Unknown character, treat as truncated
            self.pos += 1
            return Token(TokenType.TRUNCATED, char, startPos, self.pos, char)


@dataclass
class HierarchyLevel:
    """Represents one level in the parsed hierarchy"""
    type: str  # "object" or "array"
    start_pos: int
    end_pos: int  # -1 if not closed
    key: Optional[str]  # Key if this is a value in an object
    index: Optional[int]  # Index if this is in an array
    content: dict  # Parsed content at this level
    raw_start: str  # Raw string from start to children
    children_content: List[Any]  # For arrays: list of parsed elements


def getJsonContinuationContext(
    truncatedJson: str,
    budgetLimit: Optional[int] = None,
    overlapMaxChars: Optional[int] = None
) -> Tuple[str, str, str]:
    """
    Generate continuation contexts for a truncated JSON string.

    Generiert drei Kontexte für abgeschnittene JSON-Strings:
    1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält
    2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut mit Budget-Logik
    3. Complete Part: Der vollständige Teil des JSONs mit allen Strukturen geschlossen

    Args:
        truncatedJson: The truncated JSON string
        budgetLimit: Character budget for data values in hierarchy context (uses BUDGET_LIMIT if None)
        overlapMaxChars: Maximum characters for overlap context (uses OVERLAP_MAX_CHARS if None)

    Returns:
        Tuple of (overlapContext, hierarchyContext, completePart):
        - overlapContext: The innermost object/element containing the cut (for merging)
        - hierarchyContext: Full structure from root to cut with budget-limited values
        - completePart: Valid JSON with all structures properly closed
    """
    if budgetLimit is None:
        budgetLimit = BUDGET_LIMIT
    if overlapMaxChars is None:
        overlapMaxChars = OVERLAP_MAX_CHARS

    analyzer = JsonAnalyzer(truncatedJson, budgetLimit, overlapMaxChars)
    return analyzer.analyze()


class JsonAnalyzer:
    """
    Analyzes truncated JSON and generates continuation contexts.

    Generates three contexts for truncated JSON strings:
    1. Overlap Context: The innermost object/array element containing the cut point
    2. Hierarchy Context: The hierarchical structure from root to cut with budget logic
    3. Complete Part: The complete part of the JSON with all structures properly closed
    """

    def __init__(self, jsonStr: str, budgetLimit: Optional[int] = None, overlapMaxChars: Optional[int] = None):
        self.jsonStr = jsonStr
        self.budgetLimit = budgetLimit if budgetLimit is not None else BUDGET_LIMIT
        self.overlapMaxChars = overlapMaxChars if overlapMaxChars is not None else OVERLAP_MAX_CHARS
        self.stack: List[StackFrame] = []
        self.hierarchy: List[dict] = []  # Parsed hierarchy info

    def analyze(self) -> Tuple[str, str, str]:
        """
        Analyze the truncated JSON and return all three contexts.

        Returns:
            Tuple of (overlapContext, hierarchyContext, completePart)
        """
        # Parse and track the structure
        self._parseStructure()

        # Generate overlap context
        overlapContext = self._generateOverlapContext()

        # Generate hierarchy context (use improved version)
        hierarchyContext = self._renderWithBudgetV2()

        # Generate complete part (JSON with all structures closed)
        completePart = self._generateCompletePart()

        return overlapContext, hierarchyContext, completePart

    def _generateCompletePart(self) -> str:
        """
        Generate the complete part of the JSON with all structures properly closed.

        This creates valid JSON by closing all open strings, brackets/braces.
        Unvollständige Keys werden entfernt, damit das Ergebnis valides JSON ist.
        Unvollständige Keywords (true, false, null) werden vervollständigt.

        Strategy:
        1. Take the full truncated JSON
        2. If we're in the middle of a string, close it
        3. Complete incomplete keywords (tr → true, f → false, n → null)
        4. Remove incomplete key-value pairs (keys without values)
        5. Close all open brackets/braces
        """
        result = self.jsonStr.rstrip()

        # Remove trailing comma if present (after stripping)
        if result.endswith(','):
            result = result[:-1]

        # Check if we need to close an open string
        stringClosing = self._getStringClosing(result)
        result += stringClosing

        # Complete incomplete keywords (true, false, null)
        result = self._completeIncompleteKeywords(result)

        # Check if we're in the middle of a key (after colon)
        # If string was just closed and we're after a colon with no value, remove the key
        result = self._cleanIncompleteKeyValue(result)

        # Close all open structures
        closingBrackets = self._getClosingBrackets(result)

        return result + closingBrackets

    def _getStringClosing(self, jsonStr: str) -> str:
        """Check if there's an unclosed string and return closing quote if needed."""
        in_string = False
        escaped = False

        for char in jsonStr:
            if escaped:
                escaped = False
                continue

            if char == '\\' and in_string:
                escaped = True
                continue

            if char == '"':
                in_string = not in_string

        return '"' if in_string else ""

    def _cleanIncompleteKeyValue(self, jsonStr: str) -> str:
        """
        Clean up incomplete key-value pairs.
        Handles cases like:
        - {"key": "incompl  -> keep (valid truncated value)
        - {"key":           -> remove key
        - {"a": 1, "key     -> remove incomplete key (was in middle of key name)
        """
        stripped = jsonStr.rstrip()

        # Pattern: ends with colon (possibly with whitespace) - incomplete value
        if stripped.endswith(':'):
            # Find the start of this key and remove the whole key-value
            return self._removeLastKey(stripped)

        # Check if we just closed a string that was an incomplete key
        # Pattern: ..., "something" or { "something" where something has no colon after
        # This happens when we close a truncated key name like "add" -> "add"
        if stripped.endswith('"'):
            # Look for the pattern: comma/bracket + whitespace + "string"
            # and check if this was supposed to be a key
            if self._isIncompleteKey(stripped):
                return self._removeLastKey(stripped)

        return jsonStr

    def _completeIncompleteKeywords(self, jsonStr: str) -> str:
        """
        Complete incomplete JSON keywords at the end of the string.

        Checks the last element for incomplete keywords after colon:
        - ": t*" or ": f*" or ": n*" -> complete to true/false/null
        - ": " or ":" (without keyword) -> set to null
        """
        result = jsonStr.rstrip()

        # Find the last colon (not in string)
        in_string = False
        escaped = False
        last_colon_pos = -1

        for i in range(len(result) - 1, -1, -1):
            char = result[i]

            if escaped:
                escaped = False
                continue

            if char == '\\' and in_string:
                escaped = True
                continue

            if char == '"':
                in_string = not in_string
                continue

            if not in_string and char == ':':
                last_colon_pos = i
                break

        if last_colon_pos < 0:
            return result

        # Get text after the last colon
        after_colon = result[last_colon_pos + 1:].strip()

        # Check for incomplete keyword patterns
        if after_colon.startswith('t') or after_colon.startswith('T'):
            # Incomplete true
            keyword_start = last_colon_pos + 1
            # Skip whitespace
            while keyword_start < len(result) and result[keyword_start] in ' \t\n\r':
                keyword_start += 1
            # Remove partial keyword
            keyword_end = keyword_start + 1
            while keyword_end < len(result) and result[keyword_end].isalpha():
                keyword_end += 1
            return result[:keyword_start] + 'true' + result[keyword_end:]

        elif after_colon.startswith('f') or after_colon.startswith('F'):
            # Incomplete false
            keyword_start = last_colon_pos + 1
            while keyword_start < len(result) and result[keyword_start] in ' \t\n\r':
                keyword_start += 1
            keyword_end = keyword_start + 1
            while keyword_end < len(result) and result[keyword_end].isalpha():
                keyword_end += 1
            return result[:keyword_start] + 'false' + result[keyword_end:]

        elif after_colon.startswith('n') or after_colon.startswith('N'):
            # Incomplete null
            keyword_start = last_colon_pos + 1
            while keyword_start < len(result) and result[keyword_start] in ' \t\n\r':
                keyword_start += 1
            keyword_end = keyword_start + 1
            while keyword_end < len(result) and result[keyword_end].isalpha():
                keyword_end += 1
            return result[:keyword_start] + 'null' + result[keyword_end:]

        elif not after_colon or after_colon == '':
            # No keyword after colon -> set to null
            return result + 'null'

        return result

    def _isIncompleteKey(self, jsonStr: str) -> bool:
        """
        Check if the last string in the JSON is an incomplete key in an object.
        This happens when truncation occurred in the middle of a key name.
        Only applies to objects, not arrays.
        """
        # Find the last complete string
        pos = len(jsonStr) - 1
        if jsonStr[pos] != '"':
            return False

        # Find the opening quote of this string
        stringStart = pos - 1
        while stringStart >= 0:
            if jsonStr[stringStart] == '"':
                # Check it's not escaped
                numBackslashes = 0
                checkPos = stringStart - 1
                while checkPos >= 0 and jsonStr[checkPos] == '\\':
                    numBackslashes += 1
                    checkPos -= 1
                if numBackslashes % 2 == 0:
                    break
            stringStart -= 1

        if stringStart < 0:
            return False

        # Now stringStart points to opening quote
        # Check what's before it (skip whitespace)
        beforePos = stringStart - 1
        while beforePos >= 0 and jsonStr[beforePos] in ' \t\n\r':
            beforePos -= 1

        if beforePos < 0:
            return False

        # For this to be an incomplete key, it must be preceded by { or ,
        # AND we must be inside an object (not an array)
        if jsonStr[beforePos] not in ',{':
            return False

        # Now check if we're in an object context (not array)
        # Count open braces/brackets to determine context
        braceCount = 0
        bracketCount = 0
        inString = False

        for i in range(beforePos + 1):
            char = jsonStr[i]
            if char == '"' and (i == 0 or jsonStr[i-1] != '\\'):
                inString = not inString
            elif not inString:
                if char == '{':
                    braceCount += 1
                elif char == '}':
                    braceCount -= 1
                elif char == '[':
                    bracketCount += 1
                elif char == ']':
                    bracketCount -= 1

        # If we have more open braces than brackets at this point,
        # we're in an object context
        # Actually, we need to check the innermost container
        # Let's track the stack properly
        stack = []
        inString = False

        for i in range(beforePos + 1):
            char = jsonStr[i]
            if char == '"' and (i == 0 or jsonStr[i-1] != '\\'):
                inString = not inString
            elif not inString:
                if char == '{':
                    stack.append('object')
                elif char == '[':
                    stack.append('array')
                elif char == '}':
                    if stack and stack[-1] == 'object':
                        stack.pop()
                elif char == ']':
                    if stack and stack[-1] == 'array':
                        stack.pop()

        # If innermost container is an object, this is an incomplete key
        return len(stack) > 0 and stack[-1] == 'object'

    def _removeLastKey(self, jsonStr: str) -> str:
        """Remove the last incomplete key-value pair from the JSON string."""
        stripped = jsonStr.rstrip()

        # Find the last comma or opening bracket before the incomplete key
        pos = len(stripped) - 1

        # Skip past the current string/key
        in_string = False
        while pos >= 0:
            char = stripped[pos]
            if char == '"' and (pos == 0 or stripped[pos-1] != '\\'):
                in_string = not in_string
            if not in_string and char in ',{':
                break
            pos -= 1

        if pos < 0:
            return stripped

        if stripped[pos] == ',':
            # Remove from comma onwards
            return stripped[:pos]
        elif stripped[pos] == '{':
            # Keep the opening brace
            return stripped[:pos+1]

        return stripped

    def _findLastCompletePosition(self) -> int:
        """Find the position of the last complete value in the JSON."""
        tokenizer = JsonTokenizer(self.jsonStr)
        last_complete_pos = 0
        stack_depth = 0
        last_value_end = 0
        in_value = False

        while True:
            token = tokenizer.nextToken()

            if token.type == TokenType.EOF:
                break

            if token.type == TokenType.TRUNCATED:
                # Return position before the truncated part
                break

            if token.type in (TokenType.OBJECT_START, TokenType.ARRAY_START):
                stack_depth += 1
                in_value = True

            elif token.type in (TokenType.OBJECT_END, TokenType.ARRAY_END):
                stack_depth -= 1
                last_value_end = token.end_pos
                in_value = False

            elif token.type == TokenType.STRING:
                # Check if this is a key or a value
                saved_pos = tokenizer.pos
                tokenizer.skipWhitespace()
                next_char = tokenizer.peek()
                tokenizer.pos = saved_pos

                if next_char != ':':
                    # It's a value
                    last_value_end = token.end_pos
                    in_value = False

            elif token.type in (TokenType.NUMBER, TokenType.BOOLEAN, TokenType.NULL):
                last_value_end = token.end_pos
                in_value = False

            elif token.type == TokenType.COMMA:
                # After a comma, we've completed a value
                last_complete_pos = last_value_end

        # Return the last complete position
        return last_value_end if last_value_end > 0 else len(self.jsonStr)

    def _getClosingBrackets(self, jsonStr: str) -> str:
        """Determine what closing brackets are needed."""
        stack = []
        in_string = False
        escaped = False

        for char in jsonStr:
            if escaped:
                escaped = False
                continue

            if char == '\\' and in_string:
                escaped = True
                continue

            if char == '"':
                in_string = not in_string
                continue

            if in_string:
                continue

            if char == '{':
                stack.append('}')
            elif char == '[':
                stack.append(']')
            elif char == '}':
                if stack and stack[-1] == '}':
                    stack.pop()
            elif char == ']':
                if stack and stack[-1] == ']':
                    stack.pop()

        # Return closing brackets in reverse order
        return ''.join(reversed(stack))

    def _parseStructure(self):
        """Parse the JSON structure and track hierarchy"""
        tokenizer = JsonTokenizer(self.jsonStr)

        while True:
            token = tokenizer.nextToken()

            if token.type == TokenType.EOF or token.type == TokenType.TRUNCATED:
                break

            if token.type == TokenType.OBJECT_START:
                frame = StackFrame(
                    type="object",
                    start_pos=token.start_pos,
                    keys_seen=[]
                )
                self.stack.append(frame)

            elif token.type == TokenType.ARRAY_START:
                frame = StackFrame(
                    type="array",
                    start_pos=token.start_pos,
                    index=0
                )
                self.stack.append(frame)

            elif token.type == TokenType.OBJECT_END:
                if self.stack and self.stack[-1].type == "object":
                    self.stack.pop()

            elif token.type == TokenType.ARRAY_END:
                if self.stack and self.stack[-1].type == "array":
                    self.stack.pop()

            elif token.type == TokenType.STRING:
                # Could be a key or a value
                self._handleStringToken(token, tokenizer)

            elif token.type == TokenType.COMMA:
                # Increment array index
                if self.stack and self.stack[-1].type == "array":
                    self.stack[-1].index += 1

    def _handleStringToken(self, token: Token, tokenizer: JsonTokenizer):
        """Handle a string token (could be key or value)"""
        if self.stack and self.stack[-1].type == "object":
            # Check if this is a key (followed by colon)
            saved_pos = tokenizer.pos
            tokenizer.skipWhitespace()
            next_char = tokenizer.peek()

            if next_char == ':':
                # This is a key
                self.stack[-1].key = token.value
                self.stack[-1].keys_seen.append(token.value)

            tokenizer.pos = saved_pos

    def _generateOverlapContext(self) -> str:
        """
        Generate the overlap context - the innermost object/array element containing the cut.

        Returns the raw string from the start of that element to the end of the truncated JSON.
        Dieser Kontext wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu mergen.
        Exakt so wie im Original-String (für String-Matching beim Merge).

        SPECIAL CASE: If cut point is within a list item, return only:
        - The broken list item (containing the cut)
        - The list item before it (if available)
        This avoids returning the entire list when only a single item is broken.
        """
        if not self.stack:
            # No structure, return last overlap_max_chars characters
            return self.jsonStr[-self.overlapMaxChars:]

        # Find the innermost container that should be the overlap
        innermost = self.stack[-1]

        # SPECIAL CASE: If innermost is an array and cut is within a list item,
        # return only the broken item and the previous item (if available)
        if innermost.type == "array":
            overlap_start = self._findOverlapStartForArray(innermost)
        else:
            # For objects, use the standard logic
            overlap_start = self._findInnermostElementStart()

        overlap = self.jsonStr[overlap_start:]

        # Apply max chars limit
        if len(overlap) > self.overlapMaxChars:
            overlap = self.jsonStr[-self.overlapMaxChars:]

        return overlap

    def _findOverlapStartForArray(self, arrayFrame: StackFrame) -> int:
        """
        Find overlap start for array: return only the broken list item and previous item.

        If cut point is within a list item, returns start of previous item (if available),
        otherwise returns start of current (broken) item.
        """
        # Find all element start positions in the array
        element_starts = self._findAllArrayElementStarts(arrayFrame)

        if not element_starts:
            # No elements found, fall back to array start
            return arrayFrame.start_pos

        current_index = arrayFrame.index

        # If we're at index 0, there's no previous item - return current item start
        if current_index == 0:
            return element_starts[0]

        # If current_index is beyond known elements, use last known element
        if current_index >= len(element_starts):
            # Return start of second-to-last element (previous to last)
            if len(element_starts) >= 2:
                return element_starts[-2]  # Previous item
            else:
                return element_starts[0]  # Only one item, return it

        # Return start of previous item (current_index - 1)
        return element_starts[current_index - 1]

    def _findAllArrayElementStarts(self, arrayFrame: StackFrame) -> List[int]:
        """Find all element start positions in an array"""
        arrayContent = self.jsonStr[arrayFrame.start_pos:]

        # Skip the opening bracket and whitespace
        pos = 1
        while pos < len(arrayContent) and arrayContent[pos] in ' \t\n\r':
            pos += 1

        elementStarts = [arrayFrame.start_pos + pos]
        depth = 0
        inString = False
        escaped = False

        i = pos
        while i < len(arrayContent):
            char = arrayContent[i]

            if escaped:
                escaped = False
                i += 1
                continue

            if char == '\\' and inString:
                escaped = True
                i += 1
                continue

            if char == '"':
                inString = not inString
                i += 1
                continue

            if inString:
                i += 1
                continue

            if char in '{[':
                depth += 1
            elif char in '}]':
                depth -= 1
            elif char == ',' and depth == 0:
                # Found element boundary
                i += 1
                # Skip whitespace
                while i < len(arrayContent) and arrayContent[i] in ' \t\n\r':
                    i += 1
                elementStarts.append(arrayFrame.start_pos + i)

            i += 1

        return elementStarts

    def _findInnermostElementStart(self) -> int:
        """Find the start position of the innermost element for overlap"""
        if not self.stack:
            return max(0, len(self.jsonStr) - self.overlapMaxChars)

        # Walk through stack to find the innermost array element or object
        # We want the innermost "atomic" unit that contains the cut

        # Strategy:
        # - If innermost is an object: return its start
        # - If innermost is an array:
        #   - If current element is an object/array: return start of that element
        #   - If current element is a primitive: return start of array or last N chars

        innermost = self.stack[-1]

        if innermost.type == "object":
            return innermost.start_pos
        else:
            # It's an array - find the start of the current element
            element_start = self._findArrayElementStart(innermost)

            # Check if the element is a primitive or complex type
            element_content = self.jsonStr[element_start:].strip()

            # If it starts with { or [ it's complex, return the element start
            if element_content and element_content[0] in '{[':
                return element_start
            else:
                # Primitive in array - check if there's a parent object
                # or return overlap_max_chars from end
                for i in range(len(self.stack) - 2, -1, -1):
                    if self.stack[i].type == "object":
                        return self.stack[i].start_pos

                # No parent object, return max chars from end
                return max(0, len(self.jsonStr) - self.overlapMaxChars)

    def _findArrayElementStart(self, arrayFrame: StackFrame) -> int:
        """Find the start position of the current array element"""
        # We need to find the start of the current element in the array
        # Parse from array start to find element boundaries

        arrayContent = self.jsonStr[arrayFrame.start_pos:]

        # Skip the opening bracket and whitespace
        pos = 1
        while pos < len(arrayContent) and arrayContent[pos] in ' \t\n\r':
            pos += 1

        elementStarts = [arrayFrame.start_pos + pos]
        depth = 0
        inString = False
        escaped = False

        i = pos
        while i < len(arrayContent):
            char = arrayContent[i]

            if escaped:
                escaped = False
                i += 1
                continue

            if char == '\\' and inString:
                escaped = True
                i += 1
                continue

            if char == '"':
                inString = not inString
                i += 1
                continue

            if inString:
                i += 1
                continue

            if char in '{[':
                depth += 1
            elif char in '}]':
                depth -= 1
            elif char == ',' and depth == 0:
                # Found element boundary
                i += 1
                # Skip whitespace
                while i < len(arrayContent) and arrayContent[i] in ' \t\n\r':
                    i += 1
                elementStarts.append(arrayFrame.start_pos + i)

            i += 1

        # Return the start of the current element
        if arrayFrame.index < len(elementStarts):
            return elementStarts[arrayFrame.index]
        elif elementStarts:
            return elementStarts[-1]
        else:
            return arrayFrame.start_pos

    def _generateHierarchyContext(self) -> str:
        """
        Generate the hierarchy context with budget logic.
        Shows structure from root to cut point with data values limited by budget.
        """
        if not self.stack:
            # No structure
            return self.jsonStr[-self.overlapMaxChars:]

        # We need to rebuild the JSON with budget logic
        # Priority: elements closer to cut get full values, distant ones get "..."

        return self._rebuildWithBudget()

    def _rebuildWithBudget(self) -> str:
        """Rebuild JSON from root to cut with budget constraints"""

        # Strategy:
        # 1. Parse the JSON structure tracking all values
        # 2. Calculate total value size
        # 3. Apply budget from cut backwards
        # 4. Render with "..." for values outside budget

        # First, get a structured representation
        structure = self._parseForHierarchy()

        # Now render with budget
        return self._renderWithBudget(structure)

    def _parseForHierarchy(self) -> dict:
        """Parse JSON into a structure suitable for hierarchy rendering"""

        result = {
            'type': 'root',
            'children': [],
            'raw_positions': []
        }

        tokenizer = JsonTokenizer(self.jsonStr)
        stack = [result]
        current_key = None

        while True:
            token = tokenizer.nextToken()

            if token.type == TokenType.EOF:
                break

            if token.type == TokenType.TRUNCATED:
                # Mark the truncation point
                if stack:
                    current = stack[-1]
                    if current.get('type') == 'object':
                        if current_key:
                            current['children'].append({
                                'type': 'truncated_value',
                                'key': current_key,
                                'raw': self.jsonStr[token.start_pos:],
                                'start_pos': token.start_pos
                            })
                    elif current.get('type') == 'array':
                        current['children'].append({
                            'type': 'truncated_value',
                            'raw': self.jsonStr[token.start_pos:],
                            'start_pos': token.start_pos
                        })
                break

            if token.type == TokenType.OBJECT_START:
                obj = {
                    'type': 'object',
                    'key': current_key,
                    'children': [],
                    'start_pos': token.start_pos
                }
                if stack:
                    stack[-1]['children'].append(obj)
                stack.append(obj)
                current_key = None

            elif token.type == TokenType.ARRAY_START:
                arr = {
                    'type': 'array',
                    'key': current_key,
                    'children': [],
                    'start_pos': token.start_pos
                }
                if stack:
                    stack[-1]['children'].append(arr)
                stack.append(arr)
                current_key = None

            elif token.type == TokenType.OBJECT_END:
                if len(stack) > 1 and stack[-1].get('type') == 'object':
                    stack[-1]['end_pos'] = token.end_pos
                    stack[-1]['complete'] = True
                    stack.pop()

            elif token.type == TokenType.ARRAY_END:
                if len(stack) > 1 and stack[-1].get('type') == 'array':
                    stack[-1]['end_pos'] = token.end_pos
                    stack[-1]['complete'] = True
                    stack.pop()

            elif token.type == TokenType.STRING:
                # Check if it's a key
                saved_pos = tokenizer.pos
                tokenizer.skipWhitespace()
                next_char = tokenizer.peek()

                if next_char == ':' and stack and stack[-1].get('type') == 'object':
                    current_key = token.value
                else:
                    # It's a value
                    value_node = {
                        'type': 'value',
                        'key': current_key,
                        'value': token.value,
                        'raw': token.raw,
                        'start_pos': token.start_pos,
                        'end_pos': token.end_pos,
                        'value_type': 'string'
                    }
                    if stack:
                        stack[-1]['children'].append(value_node)
                    current_key = None

                tokenizer.pos = saved_pos

            elif token.type in (TokenType.NUMBER, TokenType.BOOLEAN, TokenType.NULL):
                value_node = {
                    'type': 'value',
                    'key': current_key,
                    'value': token.value,
                    'raw': token.raw,
                    'start_pos': token.start_pos,
                    'end_pos': token.end_pos,
                    'value_type': str(token.type.value)
                }
                if stack:
                    stack[-1]['children'].append(value_node)
                current_key = None

        return result

    def _renderWithBudget(self, structure: dict) -> str:
        """Render the structure with budget constraints"""

        # First, collect all value nodes with their distances from cut
        cutPos = len(self.jsonStr)
        allValues = self._collectValuesWithDistance(structure, cutPos)

        # Sort by distance (closest to cut first)
        allValues.sort(key=lambda x: x['distance'])

        # Determine which values get full rendering
        budgetRemaining = self.budgetLimit
        valuesWithBudget = set()

        for valInfo in allValues:
            valSize = len(str(valInfo['raw']))
            if budgetRemaining >= valSize:
                valuesWithBudget.add(valInfo['id'])
                budgetRemaining -= valSize

        # Now render the structure
        return self._renderNode(structure, valuesWithBudget, indent=0)

    def _collectValuesWithDistance(self, node: dict, cutPos: int, depth: int = 0) -> list:
        """Collect all value nodes with their distance from cut point"""
        values = []

        if node.get('type') == 'value':
            endPos = node.get('end_pos', cutPos)
            distance = cutPos - endPos
            values.append({
                'id': id(node),
                'node': node,
                'distance': distance,
                'raw': node.get('raw', ''),
                'depth': depth
            })
        elif node.get('type') == 'truncated_value':
            values.append({
                'id': id(node),
                'node': node,
                'distance': 0,  # Truncated values are at the cut
                'raw': node.get('raw', ''),
                'depth': depth
            })

        for child in node.get('children', []):
            values.extend(self._collectValuesWithDistance(child, cutPos, depth + 1))

        return values

    def _renderNode(self, node: dict, valuesWithBudget: set, indent: int = 0) -> str:
        """Render a node with budget constraints"""
        indent_str = "  " * indent

        node_type = node.get('type')

        if node_type == 'root':
            parts = []
            for child in node.get('children', []):
                parts.append(self._renderNode(child, valuesWithBudget, indent))
            return '\n'.join(parts)

        elif node_type == 'object':
            return self._renderObject(node, valuesWithBudget, indent)

        elif node_type == 'array':
            return self._renderArray(node, valuesWithBudget, indent)

        elif node_type == 'value':
            return self._renderValue(node, valuesWithBudget, indent)

        elif node_type == 'truncated_value':
            return node.get('raw', '')

        return ''

    def _renderObject(self, node: dict, valuesWithBudget: set, indent: int) -> str:
        """Render an object node"""
        indent_str = "  " * indent
        inner_indent = "  " * (indent + 1)

        key_prefix = ""
        if node.get('key'):
            key_prefix = f'"{node["key"]}": '

        if not node.get('children'):
            if node.get('complete'):
                return f"{key_prefix}{{}}"
            else:
                return f"{key_prefix}{{"

        parts = [f"{key_prefix}{{"]

        children = node.get('children', [])
        for i, child in enumerate(children):
            child_rendered = self._renderNode(child, valuesWithBudget, indent + 1)

            # Add comma if not last and next sibling exists
            if i < len(children) - 1:
                if child.get('type') != 'truncated_value':
                    parts.append(f"{inner_indent}{child_rendered},")
                else:
                    parts.append(f"{inner_indent}{child_rendered}")
            else:
                parts.append(f"{inner_indent}{child_rendered}")

        if node.get('complete'):
            parts.append(f"{indent_str}}}")

        return '\n'.join(parts)

    def _renderArray(self, node: dict, valuesWithBudget: set, indent: int) -> str:
        """Render an array node"""
        indent_str = "  " * indent
        inner_indent = "  " * (indent + 1)

        key_prefix = ""
        if node.get('key'):
            key_prefix = f'"{node["key"]}": '

        if not node.get('children'):
            if node.get('complete'):
                return f"{key_prefix}[]"
            else:
                return f"{key_prefix}["

        parts = [f"{key_prefix}["]

        children = node.get('children', [])
        for i, child in enumerate(children):
            child_rendered = self._renderNode(child, valuesWithBudget, indent + 1)

            if i < len(children) - 1:
                if child.get('type') != 'truncated_value':
                    parts.append(f"{inner_indent}{child_rendered},")
                else:
                    parts.append(f"{inner_indent}{child_rendered}")
            else:
                parts.append(f"{inner_indent}{child_rendered}")

        if node.get('complete'):
            parts.append(f"{indent_str}]")

        return '\n'.join(parts)

    def _renderValue(self, node: dict, valuesWithBudget: set, indent: int) -> str:
        """Render a value node"""
        key_prefix = ""
        if node.get('key'):
            key_prefix = f'"{node["key"]}": '

        if id(node) in valuesWithBudget:
            # Full value
            default_raw = '"...\"'
            raw_value = node.get('raw', default_raw)
            return f"{key_prefix}{raw_value}"
        else:
            # Placeholder
            return f'{key_prefix}"..."'

    def _renderWithBudgetV2(self) -> str:
        """
        Generate hierarchy context with budget logic.

        Alternative rendering that stays closer to the original truncated string.
        Shows full context near the cut, replaces distant values with "...".

        Budget-Logik:
        1. Sammeln: Alle String-Werte werden mit ihrer Position gesammelt
        2. Sortieren: Nach Entfernung zum Cut-Punkt (näher = höhere Priorität)
        3. Zuweisen: Budget wird von hinten nach vorne aufgebraucht
        4. Ersetzen: Werte außerhalb des Budgets werden durch "..." ersetzt
        """
        # Parse to understand structure, but render from original string with modifications
        structure = self._parseForHierarchy()

        # Collect all complete value nodes with positions
        allValues = self._collectCompleteValues(structure)

        # Sort by end position (furthest from cut = first to be truncated)
        allValues.sort(key=lambda x: x['end_pos'])

        # Apply budget: replace values from the start until budget exhausted
        budgetUsed = 0
        totalAvailable = sum(len(v['raw']) for v in allValues)

        valuesToReplace = []

        for val in allValues:
            valSize = len(val['raw'])
            if totalAvailable - budgetUsed > self.budgetLimit:
                # This value should be replaced with "..."
                valuesToReplace.append(val)
                budgetUsed += valSize
            else:
                break

        # Build the modified string
        result = self.jsonStr

        # Replace from end to start to preserve positions
        valuesToReplace.sort(key=lambda x: x['start_pos'], reverse=True)

        for val in valuesToReplace:
            start = val['start_pos']
            end = val['end_pos']
            result = result[:start] + '"..."' + result[end:]

        return result

    def _collectCompleteValues(self, node: dict) -> list:
        """Collect all complete (non-truncated) value nodes"""
        values = []

        if node.get('type') == 'value' and node.get('value_type') == 'string':
            values.append({
                'start_pos': node['start_pos'],
                'end_pos': node['end_pos'],
                'raw': node['raw'],
                'key': node.get('key')
            })

        for child in node.get('children', []):
            values.extend(self._collectCompleteValues(child))

        return values


def extractContinuationContexts(
    truncatedJson: str
) -> Tuple[str, str, str]:
    """
    Main entry point: Extract all three continuation contexts from a truncated JSON.

    Generiert drei Kontexte für abgeschnittene JSON-Strings:
    1. Overlap Context: Das innerste Objekt/Array-Element, das den Cut-Punkt enthält
       - Wird verwendet, um den abgeschnittenen Teil mit dem neuen Teil zu mergen
       - Exakt so wie im Original-String (für String-Matching beim Merge)

    2. Hierarchy Context: Die hierarchische Struktur vom Root bis zum Cut-Punkt
       - Mit Budget-Logik: Näher am Cut = vollständige Werte, weiter weg = "..." Platzhalter
       - Gibt der AI den Kontext der gesamten JSON-Struktur

    3. Complete Part: Der vollständige, valide JSON bis zum Cut-Punkt
       - Alle offenen Strukturen werden geschlossen (}, ], ")
       - Unvollständige Keys werden entfernt
       - Kann direkt als valides JSON geparst werden

    Uses module constants BUDGET_LIMIT and OVERLAP_MAX_CHARS.

    Args:
        truncatedJson: The truncated JSON string

    Returns:
        Tuple of (overlapContext, hierarchyContext, completePart):
        - overlapContext: The innermost object/element containing the cut (for merging)
        - hierarchyContext: Full structure from root to cut with budget-limited values
        - completePart: Valid JSON with all structures properly closed

    Example:
        >>> jsonStr = '{"users": [{"name": "John", "bio": "Hello Wor'
        >>> overlap, hierarchy, complete = extractContinuationContexts(jsonStr)
        >>> import json
        >>> parsed = json.loads(complete)  # ✓ Funktioniert!
    """
    return getJsonContinuationContext(truncatedJson)


# Convenience function with named results
def getContexts(
    truncatedJson: str
) -> JsonContinuationContexts:
    """
    Get all contexts as a Pydantic model with named fields.

    Uses module constants BUDGET_LIMIT and OVERLAP_MAX_CHARS.

    Args:
        truncatedJson: The truncated JSON string

    Returns:
        JsonContinuationContexts Pydantic model with:
        - overlapContext: The innermost object/element containing the cut
        - hierarchyContext: Full structure with budget-limited values
        - completePart: Valid JSON with all structures properly closed

    Example:
        >>> json_str = '{"users": [{"name": "John", "bio": "Hello Wor'
        >>> contexts = getContexts(json_str)
        >>> print(contexts.overlapContext)
        >>> print(contexts.hierarchyContext)
        >>> print(contexts.completePart)
    """
    overlap, hierarchy, completePart = extractContinuationContexts(truncatedJson)
    return JsonContinuationContexts(
        overlapContext=overlap,
        hierarchyContext=hierarchy,
        completePart=completePart
    )