fixed json looping context strings for cut point
This commit is contained in:
parent
c20e65ad4e
commit
879a3c0eff
9 changed files with 2036 additions and 370 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -262,7 +262,11 @@ class JsonMergeLogger:
|
|||
JsonMergeLogger._log(f" {line}")
|
||||
JsonMergeLogger._log(" " + "="*76)
|
||||
else:
|
||||
JsonMergeLogger._log(f" Accumulated suffix (COMPLETE): {accSuffix}")
|
||||
# For lists/arrays, only log summary to avoid log flooding
|
||||
if isinstance(accSuffix, list):
|
||||
JsonMergeLogger._log(f" Accumulated suffix: list with {len(accSuffix)} items")
|
||||
else:
|
||||
JsonMergeLogger._log(f" Accumulated suffix: {type(accSuffix).__name__}")
|
||||
if fragPrefix is not None:
|
||||
if isinstance(fragPrefix, str):
|
||||
prefixLines = fragPrefix.split('\n')
|
||||
|
|
@ -278,7 +282,11 @@ class JsonMergeLogger:
|
|||
for line in prefixLines:
|
||||
JsonMergeLogger._log(f" {line}")
|
||||
else:
|
||||
JsonMergeLogger._log(f" Fragment prefix (COMPLETE): {fragPrefix}")
|
||||
# For lists/arrays, only log summary to avoid log flooding
|
||||
if isinstance(fragPrefix, list):
|
||||
JsonMergeLogger._log(f" Fragment prefix: list with {len(fragPrefix)} items")
|
||||
else:
|
||||
JsonMergeLogger._log(f" Fragment prefix: {type(fragPrefix).__name__}")
|
||||
else:
|
||||
JsonMergeLogger._log(f" ⚠️ No overlap detected - appending all")
|
||||
|
||||
|
|
@ -1903,13 +1911,32 @@ class ModularJsonMerger:
|
|||
def _mergeStrings(accStr: str, fragStr: str, overlapLength: int) -> str:
|
||||
"""
|
||||
Merge two JSON strings together, removing the overlap.
|
||||
Handles whitespace at cut points properly for seamless merging.
|
||||
"""
|
||||
if overlapLength > 0:
|
||||
# Remove overlap from fragment and append
|
||||
# CRITICAL: Handle whitespace properly - if accumulated ends with whitespace
|
||||
# and fragment starts with the same content, we need to preserve whitespace structure
|
||||
merged = accStr + fragStr[overlapLength:]
|
||||
else:
|
||||
# No overlap - just concatenate (might need comma or other separator)
|
||||
# Try to add comma if needed
|
||||
# CRITICAL: Preserve whitespace structure when merging
|
||||
|
||||
# Get trailing whitespace from accumulated (spaces, tabs, but not newlines)
|
||||
accTrailingWs = ""
|
||||
i = len(accStr) - 1
|
||||
while i >= 0 and accStr[i] in [' ', '\t']:
|
||||
accTrailingWs = accStr[i] + accTrailingWs
|
||||
i -= 1
|
||||
|
||||
# Get leading whitespace from fragment (spaces, tabs, but not newlines)
|
||||
fragLeadingWs = ""
|
||||
i = 0
|
||||
while i < len(fragStr) and fragStr[i] in [' ', '\t']:
|
||||
fragLeadingWs += fragStr[i]
|
||||
i += 1
|
||||
|
||||
# Trim for content detection but preserve whitespace structure
|
||||
accTrimmed = accStr.rstrip().rstrip(',')
|
||||
fragTrimmed = fragStr.lstrip().lstrip(',')
|
||||
|
||||
|
|
@ -1917,10 +1944,14 @@ class ModularJsonMerger:
|
|||
if accTrimmed and fragTrimmed:
|
||||
# If accumulated ends with } or ] and fragment starts with { or [, we might need comma
|
||||
if (accTrimmed[-1] in '}]' and fragTrimmed[0] in '{['):
|
||||
merged = accTrimmed + ',' + fragTrimmed
|
||||
# Add comma with appropriate whitespace
|
||||
merged = accTrimmed + ',' + fragLeadingWs + fragTrimmed
|
||||
else:
|
||||
merged = accTrimmed + fragTrimmed
|
||||
# Merge with preserved whitespace structure
|
||||
# Use the whitespace from fragment (it knows the proper spacing)
|
||||
merged = accTrimmed + accTrailingWs + fragLeadingWs + fragTrimmed
|
||||
else:
|
||||
# One is empty - just concatenate with preserved whitespace
|
||||
merged = accStr + fragStr
|
||||
|
||||
return merged
|
||||
|
|
|
|||
|
|
@ -2198,16 +2198,66 @@ Output requirements:
|
|||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Build overlap context: extract last ~100 characters from the response for overlap
|
||||
# Build overlap context: extract cut part and full part before (same level) for overlap
|
||||
overlapContext = ""
|
||||
if lastRawJson:
|
||||
overlapContext = lastRawJson[-100:].strip()
|
||||
# Find break position in raw JSON
|
||||
lastCompletePart = continuationContext.last_complete_part
|
||||
breakPos = len(lastRawJson.rstrip())
|
||||
|
||||
if lastCompletePart:
|
||||
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
|
||||
normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip()
|
||||
normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip()
|
||||
|
||||
# Find where normalizedComplete ends in normalizedRaw
|
||||
pos = normalizedRaw.find(normalizedComplete)
|
||||
if pos >= 0:
|
||||
breakPos = pos + len(normalizedComplete)
|
||||
else:
|
||||
pos = lastRawJson.find(lastCompletePart)
|
||||
if pos >= 0:
|
||||
breakPos = pos + len(lastCompletePart)
|
||||
elif incompletePart:
|
||||
pos = lastRawJson.find(incompletePart)
|
||||
if pos >= 0:
|
||||
breakPos = pos
|
||||
|
||||
# Extract cut part and full part before (same level)
|
||||
overlapContext = self._extractOverlapContext(lastRawJson, breakPos)
|
||||
|
||||
# Build unified context showing structure hierarchy with cut point
|
||||
unifiedContext = ""
|
||||
if lastRawJson:
|
||||
# Find break position in raw JSON
|
||||
if incompletePart:
|
||||
# Use last_complete_part length to find where complete part ends
|
||||
lastCompletePart = continuationContext.last_complete_part
|
||||
if lastCompletePart:
|
||||
# Break position is where the complete part ends
|
||||
# Normalize lastRawJson to match the normalized lastCompletePart
|
||||
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
|
||||
normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip()
|
||||
normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip()
|
||||
|
||||
# Find where normalizedComplete ends in normalizedRaw
|
||||
breakPos = normalizedRaw.find(normalizedComplete)
|
||||
if breakPos >= 0:
|
||||
breakPos = breakPos + len(normalizedComplete)
|
||||
else:
|
||||
# Fallback: use length of lastCompletePart in original string
|
||||
breakPos = lastRawJson.find(lastCompletePart)
|
||||
if breakPos >= 0:
|
||||
breakPos = breakPos + len(lastCompletePart)
|
||||
else:
|
||||
# Last resort: use incompletePart position
|
||||
if incompletePart:
|
||||
breakPos = lastRawJson.find(incompletePart)
|
||||
if breakPos == -1:
|
||||
breakPos = len(lastRawJson.rstrip())
|
||||
else:
|
||||
breakPos = len(lastRawJson.rstrip())
|
||||
elif incompletePart:
|
||||
# If no complete part, find where incomplete part starts
|
||||
breakPos = lastRawJson.find(incompletePart)
|
||||
if breakPos == -1:
|
||||
breakPos = len(lastRawJson.rstrip())
|
||||
|
|
@ -2215,8 +2265,8 @@ Output requirements:
|
|||
breakPos = len(lastRawJson.rstrip())
|
||||
|
||||
# Build intelligent context showing hierarchy
|
||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
||||
from modules.shared.jsonUtils import buildIncompleteContext
|
||||
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
|
|
@ -2229,29 +2279,43 @@ Output requirements:
|
|||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
JSON Structure Template:
|
||||
```json
|
||||
{templateStructure}
|
||||
```
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
||||
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||
|
||||
Last ~100 characters from previous response (repeat these at the start):
|
||||
Overlap context (cut part and full part before at same level):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
||||
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||
3. Continue generating the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response must be valid JSON matching the structure template above
|
||||
- Start with overlap (~100 chars) then continue seamlessly
|
||||
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||
- Complete the incomplete element and continue with remaining elements"""
|
||||
return continuationPrompt
|
||||
|
||||
def _extractOverlapContext(self, jsonContent: str, breakPosition: int) -> str:
|
||||
"""
|
||||
Extract overlap context: cut part and full part before (same level).
|
||||
Delegates to shared function in jsonUtils for consistency.
|
||||
"""
|
||||
from modules.shared.jsonUtils import extractOverlapContext
|
||||
return extractOverlapContext(jsonContent, breakPosition)
|
||||
|
||||
def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract multiple JSON blocks from response and merge them appropriately.
|
||||
|
|
|
|||
|
|
@ -128,10 +128,18 @@ class StructureGenerator:
|
|||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Build overlap context: extract last ~100 characters from the response for overlap
|
||||
# Build overlap context: extract cut part and full part before (same level) for overlap
|
||||
overlapContext = ""
|
||||
if lastRawJson:
|
||||
overlapContext = lastRawJson[-100:].strip()
|
||||
# Find break position
|
||||
breakPos = len(lastRawJson.rstrip())
|
||||
if incompletePart:
|
||||
pos = lastRawJson.find(incompletePart)
|
||||
if pos >= 0:
|
||||
breakPos = pos
|
||||
|
||||
# Extract cut part and full part before (same level)
|
||||
overlapContext = StructureGenerator._extractOverlapContext(lastRawJson, breakPos)
|
||||
|
||||
# Build unified context showing structure hierarchy with cut point
|
||||
unifiedContext = ""
|
||||
|
|
@ -145,8 +153,8 @@ class StructureGenerator:
|
|||
breakPos = len(lastRawJson.rstrip())
|
||||
|
||||
# Build intelligent context showing hierarchy
|
||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
||||
from modules.shared.jsonUtils import buildIncompleteContext
|
||||
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
|
|
@ -159,28 +167,172 @@ class StructureGenerator:
|
|||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
JSON Structure Template:
|
||||
```json
|
||||
{templateStructure}
|
||||
```
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
||||
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||
|
||||
Last ~100 characters from previous response (repeat these at the start):
|
||||
Overlap context (cut part and full part before at same level):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
||||
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||
3. Continue generating the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response must be valid JSON matching the structure template above
|
||||
- Start with overlap (~100 chars) then continue seamlessly
|
||||
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||
- Complete the incomplete element and continue with remaining elements"""
|
||||
return continuationPrompt
|
||||
"""
|
||||
Extract overlap context: cut part and full part before (same level).
|
||||
|
||||
Returns a string showing:
|
||||
1. The last complete element at the same level before the cut point
|
||||
2. The cut part (incomplete element at the cut point)
|
||||
"""
|
||||
if not jsonContent or breakPosition <= 0:
|
||||
return jsonContent[-200:].strip() if jsonContent else ""
|
||||
|
||||
from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece
|
||||
|
||||
# Find structure hierarchy
|
||||
hierarchy = findStructureHierarchy(jsonContent, breakPosition)
|
||||
if not hierarchy:
|
||||
# Fallback: show last 200 chars before break
|
||||
start = max(0, breakPosition - 200)
|
||||
return jsonContent[start:breakPosition + 100].strip()
|
||||
|
||||
# Get cut level (the array/object containing the cut piece)
|
||||
cutLevel = hierarchy[-1]
|
||||
cutLevelStart = cutLevel['start_pos']
|
||||
cutLevelType = cutLevel['type']
|
||||
|
||||
# Extract cut piece (incomplete element)
|
||||
cutPiece = extractCutPiece(jsonContent, breakPosition)
|
||||
|
||||
# Find the last complete element at the same level before the cut point
|
||||
overlapParts = []
|
||||
|
||||
if cutLevelType == 'array':
|
||||
# Find the last complete array element before breakPosition
|
||||
i = breakPosition - 1
|
||||
depth = 0
|
||||
inString = False
|
||||
escapeNext = False
|
||||
elementStart = breakPosition
|
||||
|
||||
# Find the start of the incomplete element (or last complete element)
|
||||
while i >= cutLevelStart:
|
||||
char = jsonContent[i]
|
||||
|
||||
if escapeNext:
|
||||
escapeNext = False
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escapeNext = True
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
if not inString:
|
||||
if char == ']':
|
||||
depth += 1
|
||||
elif char == '[':
|
||||
depth -= 1
|
||||
if depth < 0:
|
||||
elementStart = i + 1
|
||||
break
|
||||
elif char == ',' and depth == 0:
|
||||
elementStart = i + 1
|
||||
break
|
||||
|
||||
i -= 1
|
||||
|
||||
# Extract the last complete element (if exists) and the cut part
|
||||
if elementStart < breakPosition:
|
||||
contentBeforeBreak = jsonContent[max(cutLevelStart, elementStart - 500):breakPosition].strip()
|
||||
|
||||
# Find the last complete element by looking for balanced brackets/braces
|
||||
lastCompleteEnd = breakPosition
|
||||
braceCount = 0
|
||||
bracketCount = 0
|
||||
inString = False
|
||||
escapeNext = False
|
||||
|
||||
# Go backwards from breakPosition to find where last complete element ends
|
||||
for j in range(breakPosition - 1, max(cutLevelStart, breakPosition - 1000), -1):
|
||||
char = jsonContent[j]
|
||||
|
||||
if escapeNext:
|
||||
escapeNext = False
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escapeNext = True
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
continue
|
||||
|
||||
if not inString:
|
||||
if char == '}':
|
||||
braceCount += 1
|
||||
elif char == '{':
|
||||
braceCount -= 1
|
||||
if braceCount == 0 and bracketCount == 0:
|
||||
lastCompleteEnd = j
|
||||
break
|
||||
elif char == ']':
|
||||
bracketCount += 1
|
||||
elif char == '[':
|
||||
bracketCount -= 1
|
||||
if bracketCount == 0 and braceCount == 0:
|
||||
lastCompleteEnd = j + 1
|
||||
break
|
||||
elif char == ',' and braceCount == 0 and bracketCount == 0:
|
||||
lastCompleteEnd = j + 1
|
||||
break
|
||||
|
||||
# Extract last complete element and cut part
|
||||
if lastCompleteEnd < breakPosition:
|
||||
lastCompleteElement = jsonContent[max(cutLevelStart, lastCompleteEnd - 300):lastCompleteEnd].strip()
|
||||
cutPart = jsonContent[lastCompleteEnd:breakPosition + len(cutPiece)].strip()
|
||||
|
||||
if lastCompleteElement:
|
||||
overlapParts.append(f"Last complete element at same level:\n{lastCompleteElement}")
|
||||
if cutPart:
|
||||
overlapParts.append(f"Cut part (incomplete):\n{cutPart}")
|
||||
else:
|
||||
contextStart = max(cutLevelStart, breakPosition - 300)
|
||||
overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
|
||||
else:
|
||||
contextStart = max(cutLevelStart, breakPosition - 300)
|
||||
overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
|
||||
else:
|
||||
# For objects or other types, show context around break point
|
||||
contextStart = max(cutLevelStart, breakPosition - 300)
|
||||
overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
|
||||
|
||||
return "\n\n".join(overlapParts) if overlapParts else jsonContent[max(0, breakPosition - 200):breakPosition + 100].strip()
|
||||
|
||||
# Call AI with looping support
|
||||
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
|
||||
|
|
@ -304,6 +456,15 @@ CRITICAL:
|
|||
logger.error(f"Error in generateStructure: {str(e)}")
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
|
||||
"""
|
||||
Extract overlap context: cut part and full part before (same level).
|
||||
Delegates to shared function in jsonUtils for consistency.
|
||||
"""
|
||||
from modules.shared.jsonUtils import extractOverlapContext
|
||||
return extractOverlapContext(jsonContent, breakPosition)
|
||||
|
||||
def _buildChapterStructurePrompt(
|
||||
self,
|
||||
userPrompt: str,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,15 @@ class CodeGenerationPath:
|
|||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
@staticmethod
|
||||
def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
|
||||
"""
|
||||
Extract overlap context: cut part and full part before (same level).
|
||||
Delegates to shared function in jsonUtils for consistency.
|
||||
"""
|
||||
from modules.shared.jsonUtils import extractOverlapContext
|
||||
return extractOverlapContext(jsonContent, breakPosition)
|
||||
|
||||
async def generateCode(
|
||||
self,
|
||||
userPrompt: str,
|
||||
|
|
@ -354,8 +363,8 @@ Return ONLY valid JSON matching the request above.
|
|||
breakPos = len(lastRawJson.rstrip())
|
||||
|
||||
# Build intelligent context showing hierarchy
|
||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
||||
from modules.shared.jsonUtils import buildIncompleteContext
|
||||
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
|
|
@ -368,26 +377,32 @@ Return ONLY valid JSON matching the request above.
|
|||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
JSON Structure Template:
|
||||
```json
|
||||
{templateStructure}
|
||||
```
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
||||
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||
|
||||
Last ~100 characters from previous response (repeat these at the start):
|
||||
Overlap context (cut part and full part before at same level):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
||||
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||
3. Continue generating the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response must be valid JSON matching the structure template above
|
||||
- Start with overlap (~100 chars) then continue seamlessly
|
||||
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||
- Complete the incomplete element and continue with remaining elements"""
|
||||
return continuationPrompt
|
||||
|
||||
|
|
@ -793,10 +808,18 @@ Return ONLY valid JSON in this format:
|
|||
incompletePart = continuationContext.incomplete_part
|
||||
lastRawJson = continuationContext.last_raw_json
|
||||
|
||||
# Build overlap context: extract last ~100 characters from the response for overlap
|
||||
# Build overlap context: extract cut part and full part before (same level) for overlap
|
||||
overlapContext = ""
|
||||
if lastRawJson:
|
||||
overlapContext = lastRawJson[-100:].strip()
|
||||
# Find break position
|
||||
breakPos = len(lastRawJson.rstrip())
|
||||
if incompletePart:
|
||||
pos = lastRawJson.find(incompletePart)
|
||||
if pos >= 0:
|
||||
breakPos = pos
|
||||
|
||||
# Extract cut part and full part before (same level)
|
||||
overlapContext = CodeGenerationPath._extractOverlapContext(lastRawJson, breakPos)
|
||||
|
||||
# Build unified context showing structure hierarchy with cut point
|
||||
unifiedContext = ""
|
||||
|
|
@ -810,8 +833,8 @@ Return ONLY valid JSON in this format:
|
|||
breakPos = len(lastRawJson.rstrip())
|
||||
|
||||
# Build intelligent context showing hierarchy
|
||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
||||
from modules.shared.jsonUtils import buildIncompleteContext
|
||||
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||
elif incompletePart:
|
||||
unifiedContext = incompletePart
|
||||
else:
|
||||
|
|
@ -824,26 +847,32 @@ Return ONLY valid JSON in this format:
|
|||
The previous JSON response was incomplete. Continue from where it stopped.
|
||||
|
||||
JSON Structure Template:
|
||||
```json
|
||||
{templateStructure}
|
||||
```
|
||||
|
||||
Context showing structure hierarchy with cut point:
|
||||
```
|
||||
{unifiedContext}
|
||||
```
|
||||
|
||||
Overlap Requirement:
|
||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
||||
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||
|
||||
Last ~100 characters from previous response (repeat these at the start):
|
||||
Overlap context (cut part and full part before at same level):
|
||||
```json
|
||||
{overlapContext if overlapContext else "No overlap context available"}
|
||||
```
|
||||
|
||||
TASK:
|
||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
||||
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||
3. Continue generating the remaining content following the JSON structure template above
|
||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||
|
||||
CRITICAL:
|
||||
- Your response must be valid JSON matching the structure template above
|
||||
- Start with overlap (~100 chars) then continue seamlessly
|
||||
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||
- Complete the incomplete element and continue with remaining elements"""
|
||||
return continuationPrompt
|
||||
|
||||
|
|
|
|||
|
|
@ -346,9 +346,18 @@ class BaseRenderer(ABC):
|
|||
|
||||
response = await aiService.callAi(request)
|
||||
|
||||
# Save styling prompt and response to debug
|
||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||
# Save styling prompt and response to debug (fire and forget - don't block on slow file I/O)
|
||||
# The writeDebugFile calls os.listdir() which can be slow with many files
|
||||
# Run in background thread to avoid blocking rendering
|
||||
import threading
|
||||
def _writeDebugFiles():
|
||||
try:
|
||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||
except Exception:
|
||||
pass # Silently fail - debug writing should never block rendering
|
||||
|
||||
threading.Thread(target=_writeDebugFiles, daemon=True).start()
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip() if response and response.content else ""
|
||||
|
|
|
|||
|
|
@ -116,24 +116,37 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate DOCX content from structured JSON document."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
try:
|
||||
self.logger.debug("_generateDocxFromJson: Starting document generation")
|
||||
# Create new document
|
||||
doc = Document()
|
||||
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
style_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: About to get style set")
|
||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
|
||||
|
||||
# Setup basic document styles and create all styles from style set
|
||||
setup_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Setting up document styles")
|
||||
self._setupBasicDocumentStyles(doc)
|
||||
self._setupDocumentStyles(doc, styleSet)
|
||||
self.logger.debug(f"_generateDocxFromJson: Document styles setup in {time.time() - setup_start:.2f}s")
|
||||
|
||||
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
||||
if not self._validateJsonStructure(json_content):
|
||||
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
||||
|
||||
# Extract sections and metadata from standardized schema
|
||||
extract_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Extracting sections and metadata")
|
||||
sections = self._extractSections(json_content)
|
||||
metadata = self._extractMetadata(json_content)
|
||||
self.logger.debug(f"_generateDocxFromJson: Extracted {len(sections)} sections in {time.time() - extract_start:.2f}s")
|
||||
|
||||
# Use provided title (which comes from documents[].title) as primary source
|
||||
# Fallback to metadata.title only if title parameter is empty
|
||||
|
|
@ -144,18 +157,32 @@ class RendererDocx(BaseRenderer):
|
|||
doc.add_paragraph(document_title, style='Title')
|
||||
|
||||
# Process each section in order
|
||||
for section in sections:
|
||||
render_start = time.time()
|
||||
self.logger.debug(f"_generateDocxFromJson: Starting to render {len(sections)} sections")
|
||||
for idx, section in enumerate(sections):
|
||||
section_start = time.time()
|
||||
self.logger.debug(f"_generateDocxFromJson: Rendering section {idx + 1}/{len(sections)}")
|
||||
self._renderJsonSection(doc, section, styleSet)
|
||||
self.logger.debug(f"_generateDocxFromJson: Section {idx + 1} rendered in {time.time() - section_start:.2f}s")
|
||||
self.logger.debug(f"_generateDocxFromJson: All sections rendered in {time.time() - render_start:.2f}s")
|
||||
|
||||
# Save to buffer
|
||||
save_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Starting to save document to buffer")
|
||||
buffer = io.BytesIO()
|
||||
doc.save(buffer)
|
||||
buffer.seek(0)
|
||||
self.logger.debug(f"_generateDocxFromJson: Document saved to buffer in {time.time() - save_start:.2f}s")
|
||||
|
||||
# Convert to base64
|
||||
encode_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: Converting to base64")
|
||||
docx_bytes = buffer.getvalue()
|
||||
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
|
||||
self.logger.debug(f"_generateDocxFromJson: Converted to base64 in {time.time() - encode_start:.2f}s (document size: {len(docx_bytes)} bytes)")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.logger.info(f"_generateDocxFromJson: Document generation completed in {total_time:.2f}s")
|
||||
return docx_base64
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -381,6 +408,8 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON table to DOCX using AI-generated styles."""
|
||||
import time
|
||||
table_start = time.time()
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = table_data.get("content", {})
|
||||
|
|
@ -392,19 +421,26 @@ class RendererDocx(BaseRenderer):
|
|||
if not headers or not rows:
|
||||
return
|
||||
|
||||
self.logger.debug(f"_renderJsonTable: Starting table render - {len(rows)} rows × {len(headers)} columns = {len(rows) * len(headers)} cells")
|
||||
|
||||
# Create table
|
||||
create_start = time.time()
|
||||
table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
|
||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||
self.logger.debug(f"_renderJsonTable: Table created in {time.time() - create_start:.2f}s")
|
||||
|
||||
# Apply table borders based on AI style
|
||||
border_start = time.time()
|
||||
border_style = styles["table_border"]["style"]
|
||||
if border_style == "horizontal_only":
|
||||
self._applyHorizontalBordersOnly(table)
|
||||
elif border_style == "grid":
|
||||
table.style = 'Table Grid'
|
||||
# else: no borders
|
||||
self.logger.debug(f"_renderJsonTable: Borders applied in {time.time() - border_start:.2f}s")
|
||||
|
||||
# Add headers with AI-generated styling
|
||||
header_start = time.time()
|
||||
header_row = table.rows[0]
|
||||
header_style = styles["table_header"]
|
||||
for i, header in enumerate(headers):
|
||||
|
|
@ -424,9 +460,14 @@ class RendererDocx(BaseRenderer):
|
|||
run.font.size = Pt(11)
|
||||
text_color = header_style["text_color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||
self.logger.debug(f"_renderJsonTable: Headers rendered in {time.time() - header_start:.2f}s")
|
||||
|
||||
# Add data rows with AI-generated styling
|
||||
rows_start = time.time()
|
||||
cell_style = styles["table_cell"]
|
||||
total_cells = len(rows) * len(headers)
|
||||
log_interval = max(1, total_cells // 20) # Log every 5% progress
|
||||
|
||||
for row_idx, row_data in enumerate(rows):
|
||||
if row_idx + 1 < len(table.rows):
|
||||
table_row = table.rows[row_idx + 1]
|
||||
|
|
@ -435,16 +476,30 @@ class RendererDocx(BaseRenderer):
|
|||
cell = table_row.cells[col_idx]
|
||||
cell.text = str(cell_data)
|
||||
|
||||
# Apply text styling
|
||||
for paragraph in cell.paragraphs:
|
||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
for run in paragraph.runs:
|
||||
run.font.size = Pt(10)
|
||||
text_color = cell_style["text_color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||
# Apply text styling - OPTIMIZED: Only style if needed
|
||||
# For large tables, styling every cell can be very slow
|
||||
# Check if we need to apply styling (only if style differs from default)
|
||||
if cell_style.get("text_color") != "#2F2F2F" or cell_style.get("font_size") != 10:
|
||||
for paragraph in cell.paragraphs:
|
||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
for run in paragraph.runs:
|
||||
run.font.size = Pt(10)
|
||||
text_color = cell_style["text_color"].lstrip('#')
|
||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||
|
||||
# Log progress for large tables
|
||||
if (row_idx + 1) % log_interval == 0 or row_idx == len(rows) - 1:
|
||||
elapsed = time.time() - rows_start
|
||||
progress = ((row_idx + 1) / len(rows)) * 100
|
||||
cells_processed = (row_idx + 1) * len(headers)
|
||||
rate = cells_processed / elapsed if elapsed > 0 else 0
|
||||
self.logger.debug(f"_renderJsonTable: Progress {progress:.1f}% ({row_idx + 1}/{len(rows)} rows, {cells_processed}/{total_cells} cells) - Rate: {rate:.1f} cells/s, Elapsed: {elapsed:.2f}s")
|
||||
|
||||
total_time = time.time() - table_start
|
||||
self.logger.info(f"_renderJsonTable: Table rendering completed in {total_time:.2f}s ({len(rows)} rows × {len(headers)} cols = {total_cells} cells)")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
|
||||
|
||||
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||
"""Apply only horizontal borders to the table (no vertical borders)."""
|
||||
|
|
|
|||
|
|
@ -1300,16 +1300,16 @@ def _extractLastCompleteAndIncomplete(jsonContent: str) -> Tuple[str, str]:
|
|||
lastCompleteElement = _findLastCompleteElement(lastCompletePart)
|
||||
if lastCompleteElement:
|
||||
# Build context for incomplete part - show structure around the break
|
||||
incompleteWithContext = _buildIncompleteContext(jsonContent, lastCompleteEnd)
|
||||
incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
|
||||
return lastCompleteElement, incompleteWithContext
|
||||
else:
|
||||
# Build context for incomplete part
|
||||
incompleteWithContext = _buildIncompleteContext(jsonContent, lastCompleteEnd)
|
||||
incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
|
||||
return lastCompletePart, incompleteWithContext
|
||||
else:
|
||||
# No complete structure found - everything is incomplete
|
||||
# Still try to show context
|
||||
incompleteWithContext = _buildIncompleteContext(jsonContent, 0)
|
||||
incompleteWithContext = buildIncompleteContext(jsonContent, 0)
|
||||
return "", incompleteWithContext
|
||||
|
||||
|
||||
|
|
@ -1359,183 +1359,449 @@ def _findLastCompleteElement(jsonStr: str) -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def _buildIncompleteContext(jsonContent: str, breakPosition: int) -> str:
|
||||
def buildIncompleteContext(jsonContent: str, breakPosition: int) -> str:
|
||||
"""
|
||||
Build intelligent context showing the incomplete element with its parent structure hierarchy.
|
||||
Build hierarchical context showing incomplete JSON structure.
|
||||
|
||||
Logic (as per user instruction):
|
||||
1. Cut piece level: element of a list (the incomplete element at cut point)
|
||||
2. Parent of the cut element: the list/array containing the cut piece (with cut point shown)
|
||||
3. Last complete object on the same level like the cut object (if exists) PLUS further previous
|
||||
content from the json string (maximum 1000 characters)
|
||||
4. Next parent levels, until root. Further 1000 characters to show content (but only complete
|
||||
objects - if too big, not to show), then only showing metadata until root
|
||||
|
||||
Example output structure:
|
||||
{
|
||||
"elements": [
|
||||
{
|
||||
"content": {
|
||||
"rows": [
|
||||
[37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957],
|
||||
[37957, 37963, 37967, 37987, 37991, <-- CUT POINT (incomplete)
|
||||
Shows:
|
||||
- Full hierarchy structure (always shown)
|
||||
- Complete elements before cut (within 200 char DATA budget)
|
||||
- Cut piece marked with <-- CUT POINT (incomplete)
|
||||
- Does NOT close open structures
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
|
||||
if breakPosition <= 0 or breakPosition >= len(jsonContent):
|
||||
# Invalid break position - show last 500 chars
|
||||
return jsonContent[-500:] if len(jsonContent) > 500 else jsonContent
|
||||
|
||||
contextParts = []
|
||||
|
||||
# Find structure hierarchy backwards from break point
|
||||
hierarchy = _findStructureHierarchy(jsonContent, breakPosition)
|
||||
if breakPosition <= 0 or breakPosition > len(jsonContent):
|
||||
return jsonContent
|
||||
|
||||
hierarchy = findStructureHierarchy(jsonContent, breakPosition)
|
||||
if not hierarchy:
|
||||
# Fallback: show simple context
|
||||
contextParts.append("Cut point context:\n")
|
||||
contextStart = max(0, breakPosition - 500)
|
||||
contextParts.append(jsonContent[contextStart:breakPosition + 100])
|
||||
return "\n".join(contextParts)
|
||||
|
||||
# Step 1: Extract cut piece (incomplete element at cut point)
|
||||
cutPiece = _extractCutPiece(jsonContent, breakPosition)
|
||||
|
||||
# Step 2: Find the cut level (the array/object containing the cut piece)
|
||||
cutLevel = hierarchy[-1] if hierarchy else None
|
||||
|
||||
if not cutLevel:
|
||||
# Fallback
|
||||
contextParts.append("Cut point context:\n")
|
||||
contextStart = max(0, breakPosition - 500)
|
||||
contextParts.append(jsonContent[contextStart:breakPosition + 100])
|
||||
return "\n".join(contextParts)
|
||||
|
||||
# Build context following the exact structure requested
|
||||
# Show hierarchical structure from root to cut point
|
||||
|
||||
# Extract the actual JSON structure from root to cut point
|
||||
# Build the full hierarchical structure showing:
|
||||
# 4. Parent levels until root (with content/metadata limits)
|
||||
# 3. Last complete elements on same level + previous content (max 1000 chars)
|
||||
# 2. Parent container (the list) with cut piece
|
||||
# 1. Cut piece
|
||||
return jsonContent[:breakPosition]
|
||||
|
||||
cutPiece = extractCutPiece(jsonContent, breakPosition)
|
||||
resultLines = []
|
||||
DATA_BUDGET = 500
|
||||
|
||||
# Build structure from root to cut level
|
||||
# Extract actual JSON content for each level
|
||||
for i, level in enumerate(hierarchy):
|
||||
# Build hierarchy level by level - show actual JSON structure
|
||||
for levelIndex, level in enumerate(hierarchy):
|
||||
levelType = level['type']
|
||||
start = level['start_pos']
|
||||
end = level['end_pos'] if i < len(hierarchy) - 1 else breakPosition
|
||||
key = level.get('key')
|
||||
depth = level['depth']
|
||||
levelStart = level['start_pos']
|
||||
levelDepth = level['depth']
|
||||
indent = " " * levelDepth
|
||||
isCutLevel = (levelIndex == len(hierarchy) - 1)
|
||||
isParentOfCutLevel = (levelIndex == len(hierarchy) - 2)
|
||||
|
||||
indent = " " * depth
|
||||
|
||||
if i < len(hierarchy) - 1:
|
||||
# Parent levels - show opening structure
|
||||
levelContent = jsonContent[start:end]
|
||||
|
||||
# If content is too large, show only metadata
|
||||
if len(levelContent) > 1000:
|
||||
# Show opening with key
|
||||
opening = jsonContent[start:min(start + 100, end)]
|
||||
if key:
|
||||
resultLines.append(f'{indent}"{key}": {{')
|
||||
else:
|
||||
resultLines.append(f'{indent}{{')
|
||||
resultLines.append(f'{indent} ...')
|
||||
else:
|
||||
# Show opening structure
|
||||
if key:
|
||||
# Find where the key's value starts
|
||||
keyEnd = jsonContent.find(':', start)
|
||||
if keyEnd > 0:
|
||||
opening = jsonContent[start:min(keyEnd + 50, end)]
|
||||
resultLines.append(f'{indent}{opening}')
|
||||
else:
|
||||
opening = jsonContent[start:min(start + 50, end)]
|
||||
resultLines.append(f'{indent}{opening}')
|
||||
# Get next level info
|
||||
if levelIndex < len(hierarchy) - 1:
|
||||
nextLevel = hierarchy[levelIndex + 1]
|
||||
nextLevelStart = nextLevel['start_pos']
|
||||
else:
|
||||
# Cut level - show detailed context
|
||||
cutLevelType = levelType
|
||||
cutLevelStart = start
|
||||
cutLevelKey = key
|
||||
cutLevelDepth = depth
|
||||
nextLevelStart = breakPosition
|
||||
|
||||
# Show key if available
|
||||
if cutLevelKey:
|
||||
resultLines.append(f'{indent}"{cutLevelKey}": {{')
|
||||
indent += " "
|
||||
# Show opening structure for this level
|
||||
resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
|
||||
childIndent = indent + " "
|
||||
|
||||
if cutLevelType == 'array':
|
||||
# Show array opening
|
||||
arrayKey = _findKeyBefore(jsonContent, cutLevelStart)
|
||||
if arrayKey:
|
||||
resultLines.append(f'{indent}"{arrayKey}": [')
|
||||
else:
|
||||
resultLines.append(f'{indent}[')
|
||||
indent += " "
|
||||
|
||||
# 3. Show last complete elements on same level + previous content (max 1000 chars)
|
||||
contentBeforeBreak = jsonContent[cutLevelStart:breakPosition]
|
||||
lastCompleteElements = _extractLastCompleteArrayElementsWithContext(
|
||||
contentBeforeBreak, jsonContent, cutLevelStart, maxChars=1000
|
||||
)
|
||||
if lastCompleteElements:
|
||||
resultLines.append(lastCompleteElements)
|
||||
|
||||
# 2. Show parent container (the list) with cut piece
|
||||
cutArrayElement = _findCutArrayElement(jsonContent, breakPosition, cutLevelStart)
|
||||
if cutArrayElement:
|
||||
resultLines.append(f'{indent}{cutArrayElement} <-- CUT POINT (incomplete)')
|
||||
else:
|
||||
# Fallback: show what we have at break point
|
||||
cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
|
||||
resultLines.append(f'{indent}{cutPart} <-- CUT POINT (incomplete)')
|
||||
|
||||
# Close the array
|
||||
indent = indent[:-2] if len(indent) >= 2 else indent
|
||||
resultLines.append(f'{indent}]')
|
||||
if isCutLevel:
|
||||
# Cut level: show cut piece
|
||||
if cutPiece:
|
||||
for line in cutPiece.split('\n'):
|
||||
stripped = line.strip()
|
||||
if stripped:
|
||||
resultLines.append(f'{childIndent}{stripped}')
|
||||
resultLines[-1] += ' <-- CUT POINT (incomplete)'
|
||||
else:
|
||||
# Object at cut level
|
||||
cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
|
||||
preview = jsonContent[cutLevelStart:breakPosition]
|
||||
preview = preview[-500:] if len(preview) > 500 else preview
|
||||
resultLines.append(f'{indent}{preview}... {cutPart} <-- CUT POINT (incomplete)')
|
||||
resultLines.append(f'{childIndent}... <-- CUT POINT (incomplete)')
|
||||
|
||||
# Close all parent structures
|
||||
for i in range(len(hierarchy) - 2, -1, -1):
|
||||
level = hierarchy[i]
|
||||
depth = level['depth']
|
||||
indent = " " * depth
|
||||
resultLines.append(f'{indent}}}')
|
||||
elif isParentOfCutLevel and levelType == 'array':
|
||||
# Parent of cut level: show complete elements with budget
|
||||
completeElements = _findCompleteElementsAtLevel(
|
||||
jsonContent, levelStart, nextLevelStart, levelDepth
|
||||
)
|
||||
|
||||
contextParts.append("\n".join(resultLines))
|
||||
print(f"DEBUG: Found {len(completeElements)} complete elements")
|
||||
print(f"DEBUG: Budget = {DATA_BUDGET}")
|
||||
|
||||
return "\n".join(contextParts)
|
||||
dataBudget = DATA_BUDGET
|
||||
for elementStart, elementEnd in reversed(completeElements):
|
||||
elementData = jsonContent[elementStart:elementEnd].strip()
|
||||
elementSize = len(elementData)
|
||||
|
||||
print(f"DEBUG: Element size = {elementSize}, remaining budget = {dataBudget}")
|
||||
|
||||
if elementSize == 0:
|
||||
continue
|
||||
|
||||
if elementSize > dataBudget:
|
||||
print(f"DEBUG: Element too large, stopping")
|
||||
break
|
||||
|
||||
print(f"DEBUG: Adding element (size {elementSize})")
|
||||
for line in elementData.split('\n'):
|
||||
stripped = line.strip()
|
||||
if stripped:
|
||||
resultLines.append(f'{childIndent}{stripped}')
|
||||
if elementEnd < nextLevelStart:
|
||||
resultLines[-1] += ','
|
||||
|
||||
dataBudget -= elementSize
|
||||
print(f"DEBUG: Budget after decrement = {dataBudget}")
|
||||
|
||||
if dataBudget <= 0:
|
||||
print(f"DEBUG: Budget exhausted, stopping")
|
||||
break
|
||||
|
||||
else:
|
||||
# Other parent levels: show path content (keys and values) leading to next level
|
||||
pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
|
||||
if pathContent:
|
||||
# Show all path content (structure is always shown, not truncated)
|
||||
for line in pathContent.split('\n'):
|
||||
stripped = line.strip()
|
||||
if stripped:
|
||||
resultLines.append(f'{childIndent}{stripped}')
|
||||
|
||||
return "\n".join(resultLines)
|
||||
|
||||
|
||||
def _extractCutPiece(jsonContent: str, breakPosition: int) -> str:
|
||||
"""Extract the incomplete piece at the cut point."""
|
||||
# Get characters after break point (incomplete part)
|
||||
afterBreak = jsonContent[breakPosition:breakPosition + 200].strip()
|
||||
# Find where the incomplete piece ends (next comma, bracket, brace, or end)
|
||||
for i, char in enumerate(afterBreak):
|
||||
if char in [',', ']', '}', '\n']:
|
||||
return afterBreak[:i].strip()
|
||||
return afterBreak[:50].strip() # Limit to 50 chars if no delimiter found
|
||||
def _buildNestedHierarchy(
|
||||
resultLines: List[str],
|
||||
jsonContent: str,
|
||||
hierarchy: List[Dict[str, Any]],
|
||||
levelIndex: int,
|
||||
breakPosition: int,
|
||||
cutPiece: str,
|
||||
cutLevel: Dict[str, Any]
|
||||
) -> None:
|
||||
"""
|
||||
Recursively build nested hierarchy from root to cut level.
|
||||
This ensures proper nesting where each level contains the next level.
|
||||
"""
|
||||
if levelIndex >= len(hierarchy):
|
||||
return
|
||||
|
||||
level = hierarchy[levelIndex]
|
||||
levelType = level['type']
|
||||
levelStart = level['start_pos']
|
||||
levelKey = level.get('key')
|
||||
levelDepth = level['depth']
|
||||
indent = " " * levelDepth
|
||||
|
||||
isCutLevel = (levelIndex == len(hierarchy) - 1)
|
||||
|
||||
# Show opening structure for this level
|
||||
if levelKey:
|
||||
resultLines.append(f'{indent}"{levelKey}": {{' if levelType == 'object' else f'{indent}"{levelKey}": [')
|
||||
else:
|
||||
resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
|
||||
|
||||
childIndent = indent + " "
|
||||
|
||||
if isCutLevel:
|
||||
# Cut level - show content (complete elements + cut piece)
|
||||
if levelType == 'array':
|
||||
charBudget = 1000
|
||||
completeElements = _findCompleteElementsAtLevel(
|
||||
jsonContent, levelStart, breakPosition, levelDepth
|
||||
)
|
||||
|
||||
# Show complete elements (working backwards from the cut)
|
||||
for elementStart, elementEnd in reversed(completeElements):
|
||||
elementSize = elementEnd - elementStart
|
||||
if charBudget >= elementSize:
|
||||
element = jsonContent[elementStart:elementEnd].strip()
|
||||
if element:
|
||||
elementLines = element.split('\n')
|
||||
for line in elementLines:
|
||||
if line.strip():
|
||||
resultLines.append(f'{childIndent}{line}')
|
||||
if elementEnd < breakPosition:
|
||||
resultLines[-1] += ','
|
||||
charBudget -= elementSize
|
||||
else:
|
||||
break
|
||||
|
||||
# Show cut piece
|
||||
if cutPiece:
|
||||
cutPieceLines = cutPiece.split('\n')
|
||||
for line in cutPieceLines:
|
||||
if line.strip():
|
||||
resultLines.append(f'{childIndent}{line}')
|
||||
resultLines[-1] += ' <-- CUT POINT (incomplete)'
|
||||
else:
|
||||
cutPart = jsonContent[max(0, breakPosition-50):breakPosition]
|
||||
resultLines.append(f'{childIndent}{cutPart} <-- CUT POINT (incomplete)')
|
||||
|
||||
else:
|
||||
# Object at cut level
|
||||
previewSize = breakPosition - levelStart
|
||||
maxPreviewSize = 500
|
||||
if previewSize > maxPreviewSize:
|
||||
previewStart = breakPosition - maxPreviewSize
|
||||
preview = jsonContent[previewStart:breakPosition]
|
||||
else:
|
||||
preview = jsonContent[levelStart:breakPosition]
|
||||
|
||||
previewLines = preview.split('\n')
|
||||
for line in previewLines:
|
||||
if line.strip():
|
||||
resultLines.append(f'{childIndent}{line}')
|
||||
|
||||
cutPart = jsonContent[breakPosition:min(breakPosition + 50, len(jsonContent))]
|
||||
resultLines.append(f'{childIndent}... {cutPart} <-- CUT POINT (incomplete)')
|
||||
|
||||
else:
|
||||
# Parent level - show path to next level, then recursively build next level
|
||||
nextLevel = hierarchy[levelIndex + 1]
|
||||
nextLevelKey = nextLevel.get('key')
|
||||
nextLevelStart = nextLevel['start_pos']
|
||||
nextLevelType = nextLevel['type']
|
||||
|
||||
# Extract content between this level's opening and next level's start
|
||||
# This shows any keys/values that come before the next level
|
||||
pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
|
||||
|
||||
# Show the path content (keys/values before next level)
|
||||
if len(pathContent) > 0 and len(pathContent) <= 500:
|
||||
pathLines = pathContent.split('\n')
|
||||
nonEmptyLines = [line for line in pathLines if line.strip()]
|
||||
if nonEmptyLines:
|
||||
for line in nonEmptyLines[:20]: # Show more lines
|
||||
if line.strip():
|
||||
resultLines.append(f'{childIndent}{line}')
|
||||
if len(nonEmptyLines) > 20:
|
||||
resultLines.append(f'{childIndent}... ({len(nonEmptyLines) - 20} more lines) ...')
|
||||
elif len(pathContent) > 500:
|
||||
# Content too large - show placeholder
|
||||
resultLines.append(f'{childIndent}... (content too large, {len(pathContent)} chars) ...')
|
||||
|
||||
# Always show the key leading to next level if it exists
|
||||
# The recursive call will show the opening bracket/brace, so we just show the key here
|
||||
if nextLevelKey:
|
||||
# Show the key (the recursive call will add the opening bracket/brace)
|
||||
# Actually, the recursive call already shows the full opening with key,
|
||||
# so we don't need to show it here - just let the recursive call handle it
|
||||
pass
|
||||
|
||||
# Recursively build next level (this will show its opening structure and content)
|
||||
_buildNestedHierarchy(resultLines, jsonContent, hierarchy, levelIndex + 1, breakPosition, cutPiece, cutLevel)
|
||||
|
||||
# Close this level
|
||||
resultLines.append(f'{indent}}}' if levelType == 'object' else f'{indent}]')
|
||||
|
||||
|
||||
def _findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]:
|
||||
def _findCompleteElementsAtLevel(
|
||||
jsonContent: str,
|
||||
levelStart: int,
|
||||
breakPosition: int,
|
||||
targetDepth: int
|
||||
) -> List[Tuple[int, int]]:
|
||||
"""
|
||||
Find all complete elements at a specific depth level.
|
||||
|
||||
Elements inside the structure at targetDepth are at targetDepth + 1.
|
||||
We track depth relative to the start of the structure.
|
||||
|
||||
Returns list of (start, end) tuples for complete elements.
|
||||
"""
|
||||
completeElements = []
|
||||
|
||||
# Track depth relative to the level start
|
||||
# When we're at levelStart, we're at the opening bracket/brace (depth = targetDepth)
|
||||
# Elements inside are at depth = targetDepth + 1
|
||||
relativeDepth = 0 # Depth relative to level start (0 = at opening bracket/brace)
|
||||
inString = False
|
||||
escapeNext = False
|
||||
currentElementStart = None
|
||||
|
||||
# Find the first non-whitespace character after the opening bracket/brace
|
||||
for i in range(levelStart + 1, min(breakPosition, len(jsonContent))):
|
||||
if jsonContent[i] not in [' ', '\n', '\r', '\t']:
|
||||
currentElementStart = i
|
||||
break
|
||||
|
||||
if currentElementStart is None:
|
||||
return completeElements
|
||||
|
||||
for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
|
||||
char = jsonContent[i]
|
||||
|
||||
if escapeNext:
|
||||
escapeNext = False
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escapeNext = True
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
continue
|
||||
|
||||
if not inString:
|
||||
if char == '{':
|
||||
relativeDepth += 1
|
||||
elif char == '}':
|
||||
relativeDepth -= 1
|
||||
# Element is complete when we return to the level's depth (relativeDepth == 0)
|
||||
if relativeDepth == 0:
|
||||
# Found end of complete element
|
||||
if currentElementStart is not None:
|
||||
completeElements.append((currentElementStart, i + 1))
|
||||
# Find start of next element
|
||||
j = i + 1
|
||||
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
|
||||
j += 1
|
||||
if j < breakPosition:
|
||||
currentElementStart = j
|
||||
else:
|
||||
currentElementStart = None
|
||||
elif char == '[':
|
||||
relativeDepth += 1
|
||||
elif char == ']':
|
||||
relativeDepth -= 1
|
||||
# Element is complete when we return to the level's depth (relativeDepth == 0)
|
||||
if relativeDepth == 0:
|
||||
# Found end of complete element
|
||||
if currentElementStart is not None:
|
||||
completeElements.append((currentElementStart, i + 1))
|
||||
# Find start of next element
|
||||
j = i + 1
|
||||
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
|
||||
j += 1
|
||||
if j < breakPosition:
|
||||
currentElementStart = j
|
||||
else:
|
||||
currentElementStart = None
|
||||
elif char == ',':
|
||||
# Comma at relativeDepth == 0 means we're between elements at the cut level
|
||||
if relativeDepth == 0:
|
||||
# Element boundary - check if we have a complete element
|
||||
if currentElementStart is not None and currentElementStart < i:
|
||||
# Simple value (string, number, boolean, null) - complete at comma
|
||||
completeElements.append((currentElementStart, i))
|
||||
# Find start of next element
|
||||
j = i + 1
|
||||
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
|
||||
j += 1
|
||||
if j < breakPosition:
|
||||
currentElementStart = j
|
||||
else:
|
||||
currentElementStart = None
|
||||
|
||||
return completeElements
|
||||
|
||||
|
||||
def extractCutPiece(jsonContent: str, breakPosition: int) -> str:
|
||||
"""
|
||||
Extract the incomplete piece at the cut point.
|
||||
Generic function that works with all JSON types: arrays, objects, strings, numbers, booleans, null.
|
||||
|
||||
CRITICAL: Uses findStructureHierarchy to identify the cut level, then parses from the cut level start
|
||||
to correctly identify which element contains the break position.
|
||||
This approach handles all JSON structures generically, including:
|
||||
- Nested objects and arrays
|
||||
- Strings containing brackets, braces, commas
|
||||
- Complex nested structures
|
||||
|
||||
Returns the incomplete element from its start to the break position.
|
||||
"""
|
||||
if breakPosition <= 0 or breakPosition > len(jsonContent):
|
||||
return ""
|
||||
|
||||
# First, find the structure hierarchy to identify the cut level
|
||||
hierarchy = findStructureHierarchy(jsonContent, breakPosition)
|
||||
if not hierarchy:
|
||||
# Fallback: return content before break
|
||||
return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
|
||||
|
||||
# Get the cut level (the structure containing the break position)
|
||||
cutLevel = hierarchy[-1]
|
||||
cutLevelStart = cutLevel['start_pos']
|
||||
cutLevelDepth = cutLevel.get('depth', 0)
|
||||
|
||||
# Parse from cutLevelStart to breakPosition to find element boundaries
|
||||
braceDepth = 0 # Absolute brace depth
|
||||
bracketDepth = 0 # Absolute bracket depth
|
||||
inString = False
|
||||
escapeNext = False
|
||||
|
||||
# Track element start at the cut level
|
||||
currentElementStart = cutLevelStart # Start of current element
|
||||
|
||||
# Parse from cut level start to break position
|
||||
for i in range(cutLevelStart, min(breakPosition, len(jsonContent))):
|
||||
char = jsonContent[i]
|
||||
|
||||
if escapeNext:
|
||||
escapeNext = False
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escapeNext = True
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
inString = not inString
|
||||
continue
|
||||
|
||||
if not inString:
|
||||
if char == '{':
|
||||
braceDepth += 1
|
||||
elif char == '}':
|
||||
braceDepth -= 1
|
||||
elif char == '[':
|
||||
bracketDepth += 1
|
||||
elif char == ']':
|
||||
bracketDepth -= 1
|
||||
elif char == ',':
|
||||
# Comma at cut level separates elements
|
||||
currentDepth = braceDepth + bracketDepth
|
||||
if currentDepth == cutLevelDepth:
|
||||
# This comma is at the cut level - next element starts after it
|
||||
j = i + 1
|
||||
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
|
||||
j += 1
|
||||
if j < breakPosition:
|
||||
currentElementStart = j
|
||||
elif char == ':':
|
||||
# Colon at cut level separates key from value
|
||||
currentDepth = braceDepth + bracketDepth
|
||||
if currentDepth == cutLevelDepth:
|
||||
# This colon is at the cut level - value starts after it
|
||||
j = i + 1
|
||||
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
|
||||
j += 1
|
||||
if j < breakPosition:
|
||||
currentElementStart = j
|
||||
|
||||
# The element containing breakPosition starts at currentElementStart
|
||||
# Find the actual start by skipping leading whitespace
|
||||
actualStart = currentElementStart
|
||||
for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
|
||||
char = jsonContent[i]
|
||||
if char not in [' ', '\n', '\r', '\t']:
|
||||
actualStart = i
|
||||
break
|
||||
|
||||
# Extract the incomplete piece from actualStart to breakPosition
|
||||
# Preserve trailing whitespace - it's needed for merging
|
||||
cutPiece = jsonContent[actualStart:breakPosition]
|
||||
|
||||
# Remove leading whitespace but preserve trailing whitespace
|
||||
cutPiece = cutPiece.lstrip()
|
||||
|
||||
return cutPiece if cutPiece else jsonContent[actualStart:breakPosition]
|
||||
|
||||
|
||||
def findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Find the structure hierarchy backwards from break point to root.
|
||||
|
||||
Returns list of level info dicts, from root to cut level.
|
||||
Each level has: type, start_pos, end_pos, parent_start, content_preview
|
||||
Each level has: type, start_pos, end_pos, depth, key
|
||||
|
||||
CRITICAL: Returns the path from root to cut point.
|
||||
- For closed structures: uses actual end position
|
||||
- For open structures: uses breakPosition
|
||||
"""
|
||||
hierarchy = []
|
||||
|
||||
|
|
@ -1545,8 +1811,11 @@ def _findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[s
|
|||
inString = False
|
||||
escapeNext = False
|
||||
|
||||
# Find all structure boundaries before break point
|
||||
structureStack = [] # Stack of (type, start_pos, depth)
|
||||
# Track ALL structures (both closed and open) to get correct end positions
|
||||
# Stack of (type, start_pos, depth, end_pos)
|
||||
# end_pos is None until structure is closed
|
||||
structureStack = [] # Stack of (type, start_pos, depth, end_pos)
|
||||
closedStructures = [] # List of closed structures with their end positions
|
||||
|
||||
for i in range(breakPosition):
|
||||
if i >= len(jsonContent):
|
||||
|
|
@ -1568,52 +1837,179 @@ def _findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[s
|
|||
|
||||
if not inString:
|
||||
if char == '{':
|
||||
structureStack.append(('object', i, braceDepth + bracketDepth))
|
||||
# Store depth BEFORE incrementing (this is the level of the structure being opened)
|
||||
currentDepth = braceDepth + bracketDepth
|
||||
structureStack.append(('object', i, currentDepth, None))
|
||||
braceDepth += 1
|
||||
elif char == '}':
|
||||
# When closing, record the end position and move to closed structures
|
||||
if structureStack and structureStack[-1][0] == 'object':
|
||||
_, start, depth = structureStack.pop()
|
||||
hierarchy.append({
|
||||
'type': 'object',
|
||||
structType, start, depth, _ = structureStack.pop()
|
||||
closedStructures.append({
|
||||
'type': structType,
|
||||
'start_pos': start,
|
||||
'end_pos': i + 1,
|
||||
'end_pos': i + 1, # Actual end position
|
||||
'depth': depth,
|
||||
'key': _findKeyBefore(jsonContent, start)
|
||||
'key': findKeyBefore(jsonContent, start)
|
||||
})
|
||||
braceDepth -= 1
|
||||
elif char == '[':
|
||||
structureStack.append(('array', i, braceDepth + bracketDepth))
|
||||
# Store depth BEFORE incrementing
|
||||
currentDepth = braceDepth + bracketDepth
|
||||
structureStack.append(('array', i, currentDepth, None))
|
||||
bracketDepth += 1
|
||||
elif char == ']':
|
||||
# When closing, record the end position
|
||||
if structureStack and structureStack[-1][0] == 'array':
|
||||
_, start, depth = structureStack.pop()
|
||||
hierarchy.append({
|
||||
'type': 'array',
|
||||
structType, start, depth, _ = structureStack.pop()
|
||||
closedStructures.append({
|
||||
'type': structType,
|
||||
'start_pos': start,
|
||||
'end_pos': i + 1,
|
||||
'end_pos': i + 1, # Actual end position
|
||||
'depth': depth,
|
||||
'key': _findKeyBefore(jsonContent, start)
|
||||
'key': findKeyBefore(jsonContent, start)
|
||||
})
|
||||
bracketDepth -= 1
|
||||
|
||||
# Sort by depth (root first) and filter to get hierarchy from root to cut
|
||||
hierarchy.sort(key=lambda x: x['depth'])
|
||||
# Build hierarchy: we need the actual path from root to cut level
|
||||
# CRITICAL: Only include structures that are actually on the path
|
||||
# A structure is on the path if it contains the next level's start position
|
||||
|
||||
# Find which level contains the break point
|
||||
cutLevelIndex = -1
|
||||
for i, level in enumerate(hierarchy):
|
||||
if level['start_pos'] < breakPosition <= level['end_pos']:
|
||||
cutLevelIndex = i
|
||||
if not structureStack:
|
||||
# No open structures - all were closed before break
|
||||
# Return path to deepest closed structure
|
||||
if closedStructures:
|
||||
maxDepth = max(s['depth'] for s in closedStructures)
|
||||
# Build path: each level must contain the next level
|
||||
path = []
|
||||
for depth in range(maxDepth + 1):
|
||||
candidates = [s for s in closedStructures if s['depth'] == depth]
|
||||
if candidates:
|
||||
# If multiple at same depth, use the one that contains structures at deeper depths
|
||||
if depth < maxDepth:
|
||||
# Find the one that contains a structure at depth + 1
|
||||
nextDepthCandidates = [s for s in closedStructures if s['depth'] == depth + 1]
|
||||
if nextDepthCandidates:
|
||||
nextStart = min(s['start_pos'] for s in nextDepthCandidates)
|
||||
# Find candidate that contains nextStart
|
||||
for candidate in candidates:
|
||||
if candidate['start_pos'] < nextStart < candidate['end_pos']:
|
||||
path.append(candidate)
|
||||
break
|
||||
else:
|
||||
# Fallback: use first candidate
|
||||
path.append(candidates[0])
|
||||
else:
|
||||
path.append(candidates[0])
|
||||
else:
|
||||
path.append(candidates[0])
|
||||
return path
|
||||
return []
|
||||
|
||||
# We have open structures - build path from root to deepest open structure
|
||||
# Strategy: Start from deepest open structure and work backwards to root,
|
||||
# ensuring each level contains the next level
|
||||
|
||||
openByDepth = {}
|
||||
for structType, start, depth, _ in structureStack:
|
||||
openByDepth[depth] = {
|
||||
'type': structType,
|
||||
'start_pos': start,
|
||||
'end_pos': breakPosition,
|
||||
'depth': depth,
|
||||
'key': findKeyBefore(jsonContent, start)
|
||||
}
|
||||
|
||||
maxOpenDepth = max(openByDepth.keys())
|
||||
|
||||
# Build path backwards from deepest to root
|
||||
path = []
|
||||
currentDepth = maxOpenDepth
|
||||
currentStart = openByDepth[maxOpenDepth]['start_pos']
|
||||
|
||||
while currentDepth >= 0:
|
||||
# Look for structure at currentDepth that contains currentStart
|
||||
# First check open structures
|
||||
if currentDepth in openByDepth:
|
||||
struct = openByDepth[currentDepth]
|
||||
if struct['start_pos'] <= currentStart:
|
||||
path.insert(0, struct)
|
||||
currentStart = struct['start_pos']
|
||||
currentDepth -= 1
|
||||
continue
|
||||
|
||||
# Check closed structures
|
||||
candidates = [s for s in closedStructures if s['depth'] == currentDepth and s['start_pos'] <= currentStart < s['end_pos']]
|
||||
if candidates:
|
||||
# Use the one that ends latest (most recent)
|
||||
struct = max(candidates, key=lambda x: x['end_pos'])
|
||||
path.insert(0, struct)
|
||||
currentStart = struct['start_pos']
|
||||
currentDepth -= 1
|
||||
else:
|
||||
# No structure found at this depth - break
|
||||
break
|
||||
|
||||
if cutLevelIndex >= 0:
|
||||
# Return hierarchy from root to cut level
|
||||
return hierarchy[:cutLevelIndex + 1]
|
||||
return path
|
||||
|
||||
# Return the hierarchy (path from root to cut level)
|
||||
if hierarchy:
|
||||
return hierarchy
|
||||
|
||||
# Fallback: if JSON starts with { or [, create a root level
|
||||
if jsonContent and jsonContent.strip():
|
||||
firstChar = jsonContent.strip()[0]
|
||||
if firstChar == '{':
|
||||
return [{
|
||||
'type': 'object',
|
||||
'start_pos': 0,
|
||||
'end_pos': breakPosition,
|
||||
'depth': 0,
|
||||
'key': None
|
||||
}]
|
||||
elif firstChar == '[':
|
||||
return [{
|
||||
'type': 'array',
|
||||
'start_pos': 0,
|
||||
'end_pos': breakPosition,
|
||||
'depth': 0,
|
||||
'key': None
|
||||
}]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def _findKeyBefore(jsonContent: str, pos: int) -> Optional[str]:
|
||||
def extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
|
||||
"""
|
||||
Extract overlap context: the object containing the cut element.
|
||||
|
||||
Returns ONLY the object containing the cut element (the incomplete element itself).
|
||||
This is what the continuation should start with for proper merging.
|
||||
|
||||
CRITICAL: Preserves trailing whitespace for proper merging.
|
||||
|
||||
Args:
|
||||
jsonContent: The incomplete JSON string
|
||||
breakPosition: Position where JSON was cut
|
||||
|
||||
Returns:
|
||||
String with the object containing the cut element
|
||||
"""
|
||||
if not jsonContent or breakPosition <= 0:
|
||||
return jsonContent[-200:].strip() if jsonContent else ""
|
||||
|
||||
# Extract cut piece (incomplete element) - this is the object containing the cut element
|
||||
cutPiece = extractCutPiece(jsonContent, breakPosition)
|
||||
|
||||
# Return only the cut piece - the object containing the cut element
|
||||
if cutPiece:
|
||||
return cutPiece
|
||||
|
||||
# Fallback: show content before break
|
||||
return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
|
||||
|
||||
|
||||
def findKeyBefore(jsonContent: str, pos: int) -> Optional[str]:
|
||||
"""Find the key name before a structure start position."""
|
||||
# Look backwards for "key": pattern
|
||||
before = jsonContent[max(0, pos - 100):pos]
|
||||
|
|
@ -1832,10 +2228,13 @@ def _extractLastCompleteArrayElementsWithContext(
|
|||
break
|
||||
|
||||
if formattedElements:
|
||||
# Format as JSON array rows
|
||||
# Format as JSON array rows (without hardcoded indentation - caller will add it)
|
||||
result = []
|
||||
for elem in formattedElements:
|
||||
result.append(f" {elem},")
|
||||
# Remove leading comma if present (from mid-element extraction)
|
||||
cleanElem = elem.lstrip(',').strip()
|
||||
if cleanElem:
|
||||
result.append(f"{cleanElem},")
|
||||
return "\n".join(result)
|
||||
|
||||
return ""
|
||||
|
|
|
|||
216
tests/test_overlap_context.py
Normal file
216
tests/test_overlap_context.py
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""
|
||||
Test function to verify structure hierarchy and overlap context generation.
|
||||
Tests the functions used to generate continuation prompts for incomplete JSON.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def testOverlapContext():
|
||||
"""
|
||||
Test function that loads two JSON parts and returns:
|
||||
1. Structure hierarchy result
|
||||
2. Overlap requirement context result
|
||||
"""
|
||||
# Load the JSON file (incomplete/cut JSON)
|
||||
basePath = Path(__file__).parent.parent.parent / "local" / "debug" / "prompts"
|
||||
|
||||
file1Path = basePath / "20260104-220716-032-chapter_2_section_section_2_response.txt"
|
||||
|
||||
# Read JSON (incomplete)
|
||||
with open(file1Path, 'r', encoding='utf-8') as f:
|
||||
json1Content = f.read().strip()
|
||||
|
||||
# Find the break position in json1 (where it was cut)
|
||||
# The last line in json1 is incomplete: [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
|
||||
# We need to find where this incomplete array element ends (right after the last number)
|
||||
# Find the last number in the file - that's where the content actually ends
|
||||
import re
|
||||
# Find all numbers at the end and get the position of the last one
|
||||
# Look for the pattern: number followed by whitespace/newline or end of string
|
||||
matches = list(re.finditer(r'\d+', json1Content))
|
||||
if matches:
|
||||
lastMatch = matches[-1]
|
||||
# Break position is right after the last number (where the closing ] should be)
|
||||
breakPosition = lastMatch.end()
|
||||
else:
|
||||
# Fallback: use end of file
|
||||
breakPosition = len(json1Content.rstrip())
|
||||
|
||||
print(f"Break position determined: {breakPosition}")
|
||||
print(f"Content at break position: '{json1Content[max(0, breakPosition-50):breakPosition+10]}'")
|
||||
|
||||
# Import the functions we need to test
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece, buildIncompleteContext
|
||||
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
|
||||
|
||||
# Test 1: Find structure hierarchy
|
||||
print("=" * 80)
|
||||
print("TEST 1: Structure Hierarchy")
|
||||
print("=" * 80)
|
||||
print(f"Break position: {breakPosition}")
|
||||
print(f"JSON length: {len(json1Content)}")
|
||||
print(f"Content around break: '{json1Content[max(0, breakPosition-100):breakPosition+20]}'")
|
||||
hierarchy = findStructureHierarchy(json1Content, breakPosition)
|
||||
print(f"\nHierarchy levels found: {len(hierarchy) if hierarchy else 0}")
|
||||
if not hierarchy:
|
||||
print("WARNING: No hierarchy found! This suggests the function isn't working correctly.")
|
||||
else:
|
||||
print("\nHierarchy details (from root to cut level):")
|
||||
for i, level in enumerate(hierarchy):
|
||||
levelType = level['type']
|
||||
levelKey = level.get('key', 'N/A')
|
||||
levelDepth = level['depth']
|
||||
levelStart = level['start_pos']
|
||||
levelEnd = level['end_pos']
|
||||
print(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}")
|
||||
# Show a snippet of content at this level
|
||||
if levelStart < len(json1Content):
|
||||
snippet = json1Content[levelStart:min(levelStart + 50, levelEnd, len(json1Content))]
|
||||
print(f" Content: {repr(snippet)}")
|
||||
|
||||
# Test 2: Extract cut piece
|
||||
print("\n" + "=" * 80)
|
||||
print("TEST 2: Extract Cut Piece")
|
||||
print("=" * 80)
|
||||
cutPiece = extractCutPiece(json1Content, breakPosition)
|
||||
print(f"\nCut piece extracted (length: {len(cutPiece)}):")
|
||||
if cutPiece:
|
||||
print(cutPiece[:500] if len(cutPiece) > 500 else cutPiece)
|
||||
else:
|
||||
print("WARNING: Cut piece is empty! This suggests the function isn't working correctly.")
|
||||
# Try to manually find the cut piece
|
||||
# Look backwards from break position for the start of the incomplete array
|
||||
i = breakPosition - 1
|
||||
while i >= 0 and json1Content[i] not in ['[', ',', '\n']:
|
||||
i -= 1
|
||||
if i >= 0 and json1Content[i] == '[':
|
||||
manualCutPiece = json1Content[i:breakPosition]
|
||||
print(f"\nManually found cut piece: {manualCutPiece[:200]}")
|
||||
|
||||
# Test 3: Build incomplete context (structure hierarchy with cut point)
|
||||
print("\n" + "=" * 80)
|
||||
print("TEST 3: Build Incomplete Context (Structure Hierarchy with Cut Point)")
|
||||
print("=" * 80)
|
||||
print("Expected: Should show complete hierarchy from root to cut point")
|
||||
print(" with complete elements before cut and cut piece marked")
|
||||
incompleteContext = buildIncompleteContext(json1Content, breakPosition)
|
||||
print(f"\nIncomplete context (length: {len(incompleteContext)} chars):")
|
||||
print("-" * 80)
|
||||
print(incompleteContext)
|
||||
print("-" * 80)
|
||||
|
||||
# Validate the output
|
||||
if incompleteContext:
|
||||
# Check if it shows hierarchy (should have multiple levels of indentation)
|
||||
lines = incompleteContext.split('\n')
|
||||
indentLevels = set()
|
||||
for line in lines:
|
||||
if line.strip():
|
||||
indent = len(line) - len(line.lstrip())
|
||||
indentLevels.add(indent)
|
||||
print(f"\nValidation: Found {len(indentLevels)} different indent levels (should be > 1 for hierarchy)")
|
||||
|
||||
# Check if cut point is marked
|
||||
if "<-- CUT POINT" in incompleteContext:
|
||||
print("Validation: Cut point marker found ✓")
|
||||
else:
|
||||
print("Validation: WARNING - Cut point marker NOT found!")
|
||||
|
||||
# Check if root structure is shown
|
||||
if incompleteContext.strip().startswith('{') or incompleteContext.strip().startswith('['):
|
||||
print("Validation: Root structure opening found ✓")
|
||||
else:
|
||||
print("Validation: WARNING - Root structure opening NOT found!")
|
||||
else:
|
||||
print("WARNING: Incomplete context is empty!")
|
||||
|
||||
# Test 4: Extract overlap context (cut part and full part before same level)
|
||||
print("\n" + "=" * 80)
|
||||
print("TEST 4: Extract Overlap Context (Cut Part + Full Part Before Same Level)")
|
||||
print("=" * 80)
|
||||
overlapContext = CodeGenerationPath._extractOverlapContext(json1Content, breakPosition)
|
||||
print(f"\nOverlap context:")
|
||||
print(overlapContext)
|
||||
|
||||
# Return results as dictionary
|
||||
results = {
|
||||
"hierarchy": hierarchy,
|
||||
"cutPiece": cutPiece,
|
||||
"incompleteContext": incompleteContext,
|
||||
"overlapContext": overlapContext,
|
||||
"breakPosition": breakPosition,
|
||||
"json1Length": len(json1Content),
|
||||
"json1Content": json1Content
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Testing Overlap Context Generation")
|
||||
print("=" * 80)
|
||||
results = testOverlapContext()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY")
|
||||
print("=" * 80)
|
||||
print(f"\nBreak position: {results['breakPosition']}")
|
||||
print(f"JSON1 length: {results['json1Length']}")
|
||||
print(f"Hierarchy levels: {len(results['hierarchy']) if results['hierarchy'] else 0}")
|
||||
print(f"Cut piece length: {len(results['cutPiece'])}")
|
||||
print(f"Incomplete context length: {len(results['incompleteContext'])}")
|
||||
print(f"Overlap context length: {len(results['overlapContext'])}")
|
||||
|
||||
# Save results to file for inspection
|
||||
outputPath = Path(__file__).parent.parent.parent / "local" / "debug" / "test_overlap_results.txt"
|
||||
outputPath.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(outputPath, 'w', encoding='utf-8') as f:
|
||||
f.write("=" * 80 + "\n")
|
||||
f.write("OVERLAP CONTEXT TEST RESULTS\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
|
||||
f.write("FIRST JSON (CUT/INCOMPLETE):\n")
|
||||
f.write("-" * 80 + "\n")
|
||||
f.write(f"Break position: {results['breakPosition']}\n")
|
||||
f.write(f"JSON length: {results['json1Length']}\n")
|
||||
json1Content = results['json1Content']
|
||||
f.write(f"Content around break: '{json1Content[max(0, results['breakPosition']-100):results['breakPosition']+20]}'\n\n")
|
||||
f.write("Full JSON1 content:\n")
|
||||
f.write(json1Content)
|
||||
|
||||
f.write("\n\n" + "=" * 80 + "\n")
|
||||
f.write("STRUCTURE HIERARCHY:\n")
|
||||
f.write("-" * 80 + "\n")
|
||||
if results['hierarchy']:
|
||||
f.write(f"Hierarchy levels found: {len(results['hierarchy'])}\n\n")
|
||||
f.write("Hierarchy details (from root to cut level):\n")
|
||||
for i, level in enumerate(results['hierarchy']):
|
||||
levelType = level['type']
|
||||
levelKey = level.get('key', 'N/A')
|
||||
levelDepth = level['depth']
|
||||
levelStart = level['start_pos']
|
||||
levelEnd = level['end_pos']
|
||||
f.write(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}\n")
|
||||
else:
|
||||
f.write("No hierarchy found\n")
|
||||
|
||||
f.write("\n\n" + "=" * 80 + "\n")
|
||||
f.write("INCOMPLETE CONTEXT (Structure Hierarchy with Cut Point):\n")
|
||||
f.write("-" * 80 + "\n")
|
||||
f.write(results['incompleteContext'])
|
||||
|
||||
f.write("\n\n" + "=" * 80 + "\n")
|
||||
f.write("OVERLAP CONTEXT (Object containing the cut element):\n")
|
||||
f.write("-" * 80 + "\n")
|
||||
f.write(results['overlapContext'])
|
||||
|
||||
print(f"\n\nFull results saved to: {outputPath}")
|
||||
Loading…
Reference in a new issue