fixed json looping context strings for cut point
This commit is contained in:
parent
c20e65ad4e
commit
879a3c0eff
9 changed files with 2036 additions and 370 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -262,7 +262,11 @@ class JsonMergeLogger:
|
||||||
JsonMergeLogger._log(f" {line}")
|
JsonMergeLogger._log(f" {line}")
|
||||||
JsonMergeLogger._log(" " + "="*76)
|
JsonMergeLogger._log(" " + "="*76)
|
||||||
else:
|
else:
|
||||||
JsonMergeLogger._log(f" Accumulated suffix (COMPLETE): {accSuffix}")
|
# For lists/arrays, only log summary to avoid log flooding
|
||||||
|
if isinstance(accSuffix, list):
|
||||||
|
JsonMergeLogger._log(f" Accumulated suffix: list with {len(accSuffix)} items")
|
||||||
|
else:
|
||||||
|
JsonMergeLogger._log(f" Accumulated suffix: {type(accSuffix).__name__}")
|
||||||
if fragPrefix is not None:
|
if fragPrefix is not None:
|
||||||
if isinstance(fragPrefix, str):
|
if isinstance(fragPrefix, str):
|
||||||
prefixLines = fragPrefix.split('\n')
|
prefixLines = fragPrefix.split('\n')
|
||||||
|
|
@ -278,7 +282,11 @@ class JsonMergeLogger:
|
||||||
for line in prefixLines:
|
for line in prefixLines:
|
||||||
JsonMergeLogger._log(f" {line}")
|
JsonMergeLogger._log(f" {line}")
|
||||||
else:
|
else:
|
||||||
JsonMergeLogger._log(f" Fragment prefix (COMPLETE): {fragPrefix}")
|
# For lists/arrays, only log summary to avoid log flooding
|
||||||
|
if isinstance(fragPrefix, list):
|
||||||
|
JsonMergeLogger._log(f" Fragment prefix: list with {len(fragPrefix)} items")
|
||||||
|
else:
|
||||||
|
JsonMergeLogger._log(f" Fragment prefix: {type(fragPrefix).__name__}")
|
||||||
else:
|
else:
|
||||||
JsonMergeLogger._log(f" ⚠️ No overlap detected - appending all")
|
JsonMergeLogger._log(f" ⚠️ No overlap detected - appending all")
|
||||||
|
|
||||||
|
|
@ -1903,13 +1911,32 @@ class ModularJsonMerger:
|
||||||
def _mergeStrings(accStr: str, fragStr: str, overlapLength: int) -> str:
|
def _mergeStrings(accStr: str, fragStr: str, overlapLength: int) -> str:
|
||||||
"""
|
"""
|
||||||
Merge two JSON strings together, removing the overlap.
|
Merge two JSON strings together, removing the overlap.
|
||||||
|
Handles whitespace at cut points properly for seamless merging.
|
||||||
"""
|
"""
|
||||||
if overlapLength > 0:
|
if overlapLength > 0:
|
||||||
# Remove overlap from fragment and append
|
# Remove overlap from fragment and append
|
||||||
|
# CRITICAL: Handle whitespace properly - if accumulated ends with whitespace
|
||||||
|
# and fragment starts with the same content, we need to preserve whitespace structure
|
||||||
merged = accStr + fragStr[overlapLength:]
|
merged = accStr + fragStr[overlapLength:]
|
||||||
else:
|
else:
|
||||||
# No overlap - just concatenate (might need comma or other separator)
|
# No overlap - just concatenate (might need comma or other separator)
|
||||||
# Try to add comma if needed
|
# CRITICAL: Preserve whitespace structure when merging
|
||||||
|
|
||||||
|
# Get trailing whitespace from accumulated (spaces, tabs, but not newlines)
|
||||||
|
accTrailingWs = ""
|
||||||
|
i = len(accStr) - 1
|
||||||
|
while i >= 0 and accStr[i] in [' ', '\t']:
|
||||||
|
accTrailingWs = accStr[i] + accTrailingWs
|
||||||
|
i -= 1
|
||||||
|
|
||||||
|
# Get leading whitespace from fragment (spaces, tabs, but not newlines)
|
||||||
|
fragLeadingWs = ""
|
||||||
|
i = 0
|
||||||
|
while i < len(fragStr) and fragStr[i] in [' ', '\t']:
|
||||||
|
fragLeadingWs += fragStr[i]
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Trim for content detection but preserve whitespace structure
|
||||||
accTrimmed = accStr.rstrip().rstrip(',')
|
accTrimmed = accStr.rstrip().rstrip(',')
|
||||||
fragTrimmed = fragStr.lstrip().lstrip(',')
|
fragTrimmed = fragStr.lstrip().lstrip(',')
|
||||||
|
|
||||||
|
|
@ -1917,10 +1944,14 @@ class ModularJsonMerger:
|
||||||
if accTrimmed and fragTrimmed:
|
if accTrimmed and fragTrimmed:
|
||||||
# If accumulated ends with } or ] and fragment starts with { or [, we might need comma
|
# If accumulated ends with } or ] and fragment starts with { or [, we might need comma
|
||||||
if (accTrimmed[-1] in '}]' and fragTrimmed[0] in '{['):
|
if (accTrimmed[-1] in '}]' and fragTrimmed[0] in '{['):
|
||||||
merged = accTrimmed + ',' + fragTrimmed
|
# Add comma with appropriate whitespace
|
||||||
|
merged = accTrimmed + ',' + fragLeadingWs + fragTrimmed
|
||||||
else:
|
else:
|
||||||
merged = accTrimmed + fragTrimmed
|
# Merge with preserved whitespace structure
|
||||||
|
# Use the whitespace from fragment (it knows the proper spacing)
|
||||||
|
merged = accTrimmed + accTrailingWs + fragLeadingWs + fragTrimmed
|
||||||
else:
|
else:
|
||||||
|
# One is empty - just concatenate with preserved whitespace
|
||||||
merged = accStr + fragStr
|
merged = accStr + fragStr
|
||||||
|
|
||||||
return merged
|
return merged
|
||||||
|
|
|
||||||
|
|
@ -2198,16 +2198,66 @@ Output requirements:
|
||||||
incompletePart = continuationContext.incomplete_part
|
incompletePart = continuationContext.incomplete_part
|
||||||
lastRawJson = continuationContext.last_raw_json
|
lastRawJson = continuationContext.last_raw_json
|
||||||
|
|
||||||
# Build overlap context: extract last ~100 characters from the response for overlap
|
# Build overlap context: extract cut part and full part before (same level) for overlap
|
||||||
overlapContext = ""
|
overlapContext = ""
|
||||||
if lastRawJson:
|
if lastRawJson:
|
||||||
overlapContext = lastRawJson[-100:].strip()
|
# Find break position in raw JSON
|
||||||
|
lastCompletePart = continuationContext.last_complete_part
|
||||||
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
|
||||||
|
if lastCompletePart:
|
||||||
|
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
|
||||||
|
normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip()
|
||||||
|
normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip()
|
||||||
|
|
||||||
|
# Find where normalizedComplete ends in normalizedRaw
|
||||||
|
pos = normalizedRaw.find(normalizedComplete)
|
||||||
|
if pos >= 0:
|
||||||
|
breakPos = pos + len(normalizedComplete)
|
||||||
|
else:
|
||||||
|
pos = lastRawJson.find(lastCompletePart)
|
||||||
|
if pos >= 0:
|
||||||
|
breakPos = pos + len(lastCompletePart)
|
||||||
|
elif incompletePart:
|
||||||
|
pos = lastRawJson.find(incompletePart)
|
||||||
|
if pos >= 0:
|
||||||
|
breakPos = pos
|
||||||
|
|
||||||
|
# Extract cut part and full part before (same level)
|
||||||
|
overlapContext = self._extractOverlapContext(lastRawJson, breakPos)
|
||||||
|
|
||||||
# Build unified context showing structure hierarchy with cut point
|
# Build unified context showing structure hierarchy with cut point
|
||||||
unifiedContext = ""
|
unifiedContext = ""
|
||||||
if lastRawJson:
|
if lastRawJson:
|
||||||
# Find break position in raw JSON
|
# Find break position in raw JSON
|
||||||
if incompletePart:
|
# Use last_complete_part length to find where complete part ends
|
||||||
|
lastCompletePart = continuationContext.last_complete_part
|
||||||
|
if lastCompletePart:
|
||||||
|
# Break position is where the complete part ends
|
||||||
|
# Normalize lastRawJson to match the normalized lastCompletePart
|
||||||
|
from modules.shared.jsonUtils import stripCodeFences, normalizeJsonText
|
||||||
|
normalizedRaw = stripCodeFences(normalizeJsonText(lastRawJson)).strip()
|
||||||
|
normalizedComplete = stripCodeFences(normalizeJsonText(lastCompletePart)).strip()
|
||||||
|
|
||||||
|
# Find where normalizedComplete ends in normalizedRaw
|
||||||
|
breakPos = normalizedRaw.find(normalizedComplete)
|
||||||
|
if breakPos >= 0:
|
||||||
|
breakPos = breakPos + len(normalizedComplete)
|
||||||
|
else:
|
||||||
|
# Fallback: use length of lastCompletePart in original string
|
||||||
|
breakPos = lastRawJson.find(lastCompletePart)
|
||||||
|
if breakPos >= 0:
|
||||||
|
breakPos = breakPos + len(lastCompletePart)
|
||||||
|
else:
|
||||||
|
# Last resort: use incompletePart position
|
||||||
|
if incompletePart:
|
||||||
|
breakPos = lastRawJson.find(incompletePart)
|
||||||
|
if breakPos == -1:
|
||||||
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
else:
|
||||||
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
elif incompletePart:
|
||||||
|
# If no complete part, find where incomplete part starts
|
||||||
breakPos = lastRawJson.find(incompletePart)
|
breakPos = lastRawJson.find(incompletePart)
|
||||||
if breakPos == -1:
|
if breakPos == -1:
|
||||||
breakPos = len(lastRawJson.rstrip())
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
|
@ -2215,8 +2265,8 @@ Output requirements:
|
||||||
breakPos = len(lastRawJson.rstrip())
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
|
||||||
# Build intelligent context showing hierarchy
|
# Build intelligent context showing hierarchy
|
||||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
from modules.shared.jsonUtils import buildIncompleteContext
|
||||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||||
elif incompletePart:
|
elif incompletePart:
|
||||||
unifiedContext = incompletePart
|
unifiedContext = incompletePart
|
||||||
else:
|
else:
|
||||||
|
|
@ -2229,29 +2279,43 @@ Output requirements:
|
||||||
The previous JSON response was incomplete. Continue from where it stopped.
|
The previous JSON response was incomplete. Continue from where it stopped.
|
||||||
|
|
||||||
JSON Structure Template:
|
JSON Structure Template:
|
||||||
|
```json
|
||||||
{templateStructure}
|
{templateStructure}
|
||||||
|
```
|
||||||
|
|
||||||
Context showing structure hierarchy with cut point:
|
Context showing structure hierarchy with cut point:
|
||||||
|
```
|
||||||
{unifiedContext}
|
{unifiedContext}
|
||||||
|
```
|
||||||
|
|
||||||
Overlap Requirement:
|
Overlap Requirement:
|
||||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||||
|
|
||||||
Last ~100 characters from previous response (repeat these at the start):
|
Overlap context (cut part and full part before at same level):
|
||||||
|
```json
|
||||||
{overlapContext if overlapContext else "No overlap context available"}
|
{overlapContext if overlapContext else "No overlap context available"}
|
||||||
|
```
|
||||||
|
|
||||||
TASK:
|
TASK:
|
||||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||||
3. Continue generating the remaining content following the JSON structure template above
|
3. Continue generating the remaining content following the JSON structure template above
|
||||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||||
|
|
||||||
CRITICAL:
|
CRITICAL:
|
||||||
- Your response must be valid JSON matching the structure template above
|
- Your response must be valid JSON matching the structure template above
|
||||||
- Start with overlap (~100 chars) then continue seamlessly
|
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||||
- Complete the incomplete element and continue with remaining elements"""
|
- Complete the incomplete element and continue with remaining elements"""
|
||||||
return continuationPrompt
|
return continuationPrompt
|
||||||
|
|
||||||
|
def _extractOverlapContext(self, jsonContent: str, breakPosition: int) -> str:
|
||||||
|
"""
|
||||||
|
Extract overlap context: cut part and full part before (same level).
|
||||||
|
Delegates to shared function in jsonUtils for consistency.
|
||||||
|
"""
|
||||||
|
from modules.shared.jsonUtils import extractOverlapContext
|
||||||
|
return extractOverlapContext(jsonContent, breakPosition)
|
||||||
|
|
||||||
def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
|
def _extractAndMergeMultipleJsonBlocks(self, responseText: str, contentType: str, sectionId: str) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Extract multiple JSON blocks from response and merge them appropriately.
|
Extract multiple JSON blocks from response and merge them appropriately.
|
||||||
|
|
|
||||||
|
|
@ -128,10 +128,18 @@ class StructureGenerator:
|
||||||
incompletePart = continuationContext.incomplete_part
|
incompletePart = continuationContext.incomplete_part
|
||||||
lastRawJson = continuationContext.last_raw_json
|
lastRawJson = continuationContext.last_raw_json
|
||||||
|
|
||||||
# Build overlap context: extract last ~100 characters from the response for overlap
|
# Build overlap context: extract cut part and full part before (same level) for overlap
|
||||||
overlapContext = ""
|
overlapContext = ""
|
||||||
if lastRawJson:
|
if lastRawJson:
|
||||||
overlapContext = lastRawJson[-100:].strip()
|
# Find break position
|
||||||
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
if incompletePart:
|
||||||
|
pos = lastRawJson.find(incompletePart)
|
||||||
|
if pos >= 0:
|
||||||
|
breakPos = pos
|
||||||
|
|
||||||
|
# Extract cut part and full part before (same level)
|
||||||
|
overlapContext = StructureGenerator._extractOverlapContext(lastRawJson, breakPos)
|
||||||
|
|
||||||
# Build unified context showing structure hierarchy with cut point
|
# Build unified context showing structure hierarchy with cut point
|
||||||
unifiedContext = ""
|
unifiedContext = ""
|
||||||
|
|
@ -145,8 +153,8 @@ class StructureGenerator:
|
||||||
breakPos = len(lastRawJson.rstrip())
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
|
||||||
# Build intelligent context showing hierarchy
|
# Build intelligent context showing hierarchy
|
||||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
from modules.shared.jsonUtils import buildIncompleteContext
|
||||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||||
elif incompletePart:
|
elif incompletePart:
|
||||||
unifiedContext = incompletePart
|
unifiedContext = incompletePart
|
||||||
else:
|
else:
|
||||||
|
|
@ -159,28 +167,172 @@ class StructureGenerator:
|
||||||
The previous JSON response was incomplete. Continue from where it stopped.
|
The previous JSON response was incomplete. Continue from where it stopped.
|
||||||
|
|
||||||
JSON Structure Template:
|
JSON Structure Template:
|
||||||
|
```json
|
||||||
{templateStructure}
|
{templateStructure}
|
||||||
|
```
|
||||||
|
|
||||||
Context showing structure hierarchy with cut point:
|
Context showing structure hierarchy with cut point:
|
||||||
|
```
|
||||||
{unifiedContext}
|
{unifiedContext}
|
||||||
|
```
|
||||||
|
|
||||||
Overlap Requirement:
|
Overlap Requirement:
|
||||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||||
|
|
||||||
Last ~100 characters from previous response (repeat these at the start):
|
Overlap context (cut part and full part before at same level):
|
||||||
|
```json
|
||||||
{overlapContext if overlapContext else "No overlap context available"}
|
{overlapContext if overlapContext else "No overlap context available"}
|
||||||
|
```
|
||||||
|
|
||||||
TASK:
|
TASK:
|
||||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||||
3. Continue generating the remaining content following the JSON structure template above
|
3. Continue generating the remaining content following the JSON structure template above
|
||||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||||
|
|
||||||
CRITICAL:
|
CRITICAL:
|
||||||
- Your response must be valid JSON matching the structure template above
|
- Your response must be valid JSON matching the structure template above
|
||||||
- Start with overlap (~100 chars) then continue seamlessly
|
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||||
- Complete the incomplete element and continue with remaining elements"""
|
- Complete the incomplete element and continue with remaining elements"""
|
||||||
return continuationPrompt
|
return continuationPrompt
|
||||||
|
"""
|
||||||
|
Extract overlap context: cut part and full part before (same level).
|
||||||
|
|
||||||
|
Returns a string showing:
|
||||||
|
1. The last complete element at the same level before the cut point
|
||||||
|
2. The cut part (incomplete element at the cut point)
|
||||||
|
"""
|
||||||
|
if not jsonContent or breakPosition <= 0:
|
||||||
|
return jsonContent[-200:].strip() if jsonContent else ""
|
||||||
|
|
||||||
|
from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece
|
||||||
|
|
||||||
|
# Find structure hierarchy
|
||||||
|
hierarchy = findStructureHierarchy(jsonContent, breakPosition)
|
||||||
|
if not hierarchy:
|
||||||
|
# Fallback: show last 200 chars before break
|
||||||
|
start = max(0, breakPosition - 200)
|
||||||
|
return jsonContent[start:breakPosition + 100].strip()
|
||||||
|
|
||||||
|
# Get cut level (the array/object containing the cut piece)
|
||||||
|
cutLevel = hierarchy[-1]
|
||||||
|
cutLevelStart = cutLevel['start_pos']
|
||||||
|
cutLevelType = cutLevel['type']
|
||||||
|
|
||||||
|
# Extract cut piece (incomplete element)
|
||||||
|
cutPiece = extractCutPiece(jsonContent, breakPosition)
|
||||||
|
|
||||||
|
# Find the last complete element at the same level before the cut point
|
||||||
|
overlapParts = []
|
||||||
|
|
||||||
|
if cutLevelType == 'array':
|
||||||
|
# Find the last complete array element before breakPosition
|
||||||
|
i = breakPosition - 1
|
||||||
|
depth = 0
|
||||||
|
inString = False
|
||||||
|
escapeNext = False
|
||||||
|
elementStart = breakPosition
|
||||||
|
|
||||||
|
# Find the start of the incomplete element (or last complete element)
|
||||||
|
while i >= cutLevelStart:
|
||||||
|
char = jsonContent[i]
|
||||||
|
|
||||||
|
if escapeNext:
|
||||||
|
escapeNext = False
|
||||||
|
i -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '\\':
|
||||||
|
escapeNext = True
|
||||||
|
i -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '"':
|
||||||
|
inString = not inString
|
||||||
|
i -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not inString:
|
||||||
|
if char == ']':
|
||||||
|
depth += 1
|
||||||
|
elif char == '[':
|
||||||
|
depth -= 1
|
||||||
|
if depth < 0:
|
||||||
|
elementStart = i + 1
|
||||||
|
break
|
||||||
|
elif char == ',' and depth == 0:
|
||||||
|
elementStart = i + 1
|
||||||
|
break
|
||||||
|
|
||||||
|
i -= 1
|
||||||
|
|
||||||
|
# Extract the last complete element (if exists) and the cut part
|
||||||
|
if elementStart < breakPosition:
|
||||||
|
contentBeforeBreak = jsonContent[max(cutLevelStart, elementStart - 500):breakPosition].strip()
|
||||||
|
|
||||||
|
# Find the last complete element by looking for balanced brackets/braces
|
||||||
|
lastCompleteEnd = breakPosition
|
||||||
|
braceCount = 0
|
||||||
|
bracketCount = 0
|
||||||
|
inString = False
|
||||||
|
escapeNext = False
|
||||||
|
|
||||||
|
# Go backwards from breakPosition to find where last complete element ends
|
||||||
|
for j in range(breakPosition - 1, max(cutLevelStart, breakPosition - 1000), -1):
|
||||||
|
char = jsonContent[j]
|
||||||
|
|
||||||
|
if escapeNext:
|
||||||
|
escapeNext = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '\\':
|
||||||
|
escapeNext = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '"':
|
||||||
|
inString = not inString
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not inString:
|
||||||
|
if char == '}':
|
||||||
|
braceCount += 1
|
||||||
|
elif char == '{':
|
||||||
|
braceCount -= 1
|
||||||
|
if braceCount == 0 and bracketCount == 0:
|
||||||
|
lastCompleteEnd = j
|
||||||
|
break
|
||||||
|
elif char == ']':
|
||||||
|
bracketCount += 1
|
||||||
|
elif char == '[':
|
||||||
|
bracketCount -= 1
|
||||||
|
if bracketCount == 0 and braceCount == 0:
|
||||||
|
lastCompleteEnd = j + 1
|
||||||
|
break
|
||||||
|
elif char == ',' and braceCount == 0 and bracketCount == 0:
|
||||||
|
lastCompleteEnd = j + 1
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract last complete element and cut part
|
||||||
|
if lastCompleteEnd < breakPosition:
|
||||||
|
lastCompleteElement = jsonContent[max(cutLevelStart, lastCompleteEnd - 300):lastCompleteEnd].strip()
|
||||||
|
cutPart = jsonContent[lastCompleteEnd:breakPosition + len(cutPiece)].strip()
|
||||||
|
|
||||||
|
if lastCompleteElement:
|
||||||
|
overlapParts.append(f"Last complete element at same level:\n{lastCompleteElement}")
|
||||||
|
if cutPart:
|
||||||
|
overlapParts.append(f"Cut part (incomplete):\n{cutPart}")
|
||||||
|
else:
|
||||||
|
contextStart = max(cutLevelStart, breakPosition - 300)
|
||||||
|
overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
|
||||||
|
else:
|
||||||
|
contextStart = max(cutLevelStart, breakPosition - 300)
|
||||||
|
overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
|
||||||
|
else:
|
||||||
|
# For objects or other types, show context around break point
|
||||||
|
contextStart = max(cutLevelStart, breakPosition - 300)
|
||||||
|
overlapParts.append(jsonContent[contextStart:breakPosition + len(cutPiece)].strip())
|
||||||
|
|
||||||
|
return "\n\n".join(overlapParts) if overlapParts else jsonContent[max(0, breakPosition - 200):breakPosition + 100].strip()
|
||||||
|
|
||||||
# Call AI with looping support
|
# Call AI with looping support
|
||||||
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
|
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
|
||||||
|
|
@ -304,6 +456,15 @@ CRITICAL:
|
||||||
logger.error(f"Error in generateStructure: {str(e)}")
|
logger.error(f"Error in generateStructure: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
|
||||||
|
"""
|
||||||
|
Extract overlap context: cut part and full part before (same level).
|
||||||
|
Delegates to shared function in jsonUtils for consistency.
|
||||||
|
"""
|
||||||
|
from modules.shared.jsonUtils import extractOverlapContext
|
||||||
|
return extractOverlapContext(jsonContent, breakPosition)
|
||||||
|
|
||||||
def _buildChapterStructurePrompt(
|
def _buildChapterStructurePrompt(
|
||||||
self,
|
self,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,15 @@ class CodeGenerationPath:
|
||||||
def __init__(self, services):
|
def __init__(self, services):
|
||||||
self.services = services
|
self.services = services
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
|
||||||
|
"""
|
||||||
|
Extract overlap context: cut part and full part before (same level).
|
||||||
|
Delegates to shared function in jsonUtils for consistency.
|
||||||
|
"""
|
||||||
|
from modules.shared.jsonUtils import extractOverlapContext
|
||||||
|
return extractOverlapContext(jsonContent, breakPosition)
|
||||||
|
|
||||||
async def generateCode(
|
async def generateCode(
|
||||||
self,
|
self,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
|
|
@ -354,8 +363,8 @@ Return ONLY valid JSON matching the request above.
|
||||||
breakPos = len(lastRawJson.rstrip())
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
|
||||||
# Build intelligent context showing hierarchy
|
# Build intelligent context showing hierarchy
|
||||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
from modules.shared.jsonUtils import buildIncompleteContext
|
||||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||||
elif incompletePart:
|
elif incompletePart:
|
||||||
unifiedContext = incompletePart
|
unifiedContext = incompletePart
|
||||||
else:
|
else:
|
||||||
|
|
@ -368,26 +377,32 @@ Return ONLY valid JSON matching the request above.
|
||||||
The previous JSON response was incomplete. Continue from where it stopped.
|
The previous JSON response was incomplete. Continue from where it stopped.
|
||||||
|
|
||||||
JSON Structure Template:
|
JSON Structure Template:
|
||||||
|
```json
|
||||||
{templateStructure}
|
{templateStructure}
|
||||||
|
```
|
||||||
|
|
||||||
Context showing structure hierarchy with cut point:
|
Context showing structure hierarchy with cut point:
|
||||||
|
```
|
||||||
{unifiedContext}
|
{unifiedContext}
|
||||||
|
```
|
||||||
|
|
||||||
Overlap Requirement:
|
Overlap Requirement:
|
||||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||||
|
|
||||||
Last ~100 characters from previous response (repeat these at the start):
|
Overlap context (cut part and full part before at same level):
|
||||||
|
```json
|
||||||
{overlapContext if overlapContext else "No overlap context available"}
|
{overlapContext if overlapContext else "No overlap context available"}
|
||||||
|
```
|
||||||
|
|
||||||
TASK:
|
TASK:
|
||||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||||
3. Continue generating the remaining content following the JSON structure template above
|
3. Continue generating the remaining content following the JSON structure template above
|
||||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||||
|
|
||||||
CRITICAL:
|
CRITICAL:
|
||||||
- Your response must be valid JSON matching the structure template above
|
- Your response must be valid JSON matching the structure template above
|
||||||
- Start with overlap (~100 chars) then continue seamlessly
|
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||||
- Complete the incomplete element and continue with remaining elements"""
|
- Complete the incomplete element and continue with remaining elements"""
|
||||||
return continuationPrompt
|
return continuationPrompt
|
||||||
|
|
||||||
|
|
@ -793,10 +808,18 @@ Return ONLY valid JSON in this format:
|
||||||
incompletePart = continuationContext.incomplete_part
|
incompletePart = continuationContext.incomplete_part
|
||||||
lastRawJson = continuationContext.last_raw_json
|
lastRawJson = continuationContext.last_raw_json
|
||||||
|
|
||||||
# Build overlap context: extract last ~100 characters from the response for overlap
|
# Build overlap context: extract cut part and full part before (same level) for overlap
|
||||||
overlapContext = ""
|
overlapContext = ""
|
||||||
if lastRawJson:
|
if lastRawJson:
|
||||||
overlapContext = lastRawJson[-100:].strip()
|
# Find break position
|
||||||
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
if incompletePart:
|
||||||
|
pos = lastRawJson.find(incompletePart)
|
||||||
|
if pos >= 0:
|
||||||
|
breakPos = pos
|
||||||
|
|
||||||
|
# Extract cut part and full part before (same level)
|
||||||
|
overlapContext = CodeGenerationPath._extractOverlapContext(lastRawJson, breakPos)
|
||||||
|
|
||||||
# Build unified context showing structure hierarchy with cut point
|
# Build unified context showing structure hierarchy with cut point
|
||||||
unifiedContext = ""
|
unifiedContext = ""
|
||||||
|
|
@ -810,8 +833,8 @@ Return ONLY valid JSON in this format:
|
||||||
breakPos = len(lastRawJson.rstrip())
|
breakPos = len(lastRawJson.rstrip())
|
||||||
|
|
||||||
# Build intelligent context showing hierarchy
|
# Build intelligent context showing hierarchy
|
||||||
from modules.shared.jsonUtils import _buildIncompleteContext
|
from modules.shared.jsonUtils import buildIncompleteContext
|
||||||
unifiedContext = _buildIncompleteContext(lastRawJson, breakPos)
|
unifiedContext = buildIncompleteContext(lastRawJson, breakPos)
|
||||||
elif incompletePart:
|
elif incompletePart:
|
||||||
unifiedContext = incompletePart
|
unifiedContext = incompletePart
|
||||||
else:
|
else:
|
||||||
|
|
@ -824,26 +847,32 @@ Return ONLY valid JSON in this format:
|
||||||
The previous JSON response was incomplete. Continue from where it stopped.
|
The previous JSON response was incomplete. Continue from where it stopped.
|
||||||
|
|
||||||
JSON Structure Template:
|
JSON Structure Template:
|
||||||
|
```json
|
||||||
{templateStructure}
|
{templateStructure}
|
||||||
|
```
|
||||||
|
|
||||||
Context showing structure hierarchy with cut point:
|
Context showing structure hierarchy with cut point:
|
||||||
|
```
|
||||||
{unifiedContext}
|
{unifiedContext}
|
||||||
|
```
|
||||||
|
|
||||||
Overlap Requirement:
|
Overlap Requirement:
|
||||||
To ensure proper merging, your response MUST start by repeating approximately the last 100 characters from the previous response, then continue with new content.
|
To ensure proper merging, your response MUST start by repeating the cut part and the full part before (same level) shown below, then continue with new content.
|
||||||
|
|
||||||
Last ~100 characters from previous response (repeat these at the start):
|
Overlap context (cut part and full part before at same level):
|
||||||
|
```json
|
||||||
{overlapContext if overlapContext else "No overlap context available"}
|
{overlapContext if overlapContext else "No overlap context available"}
|
||||||
|
```
|
||||||
|
|
||||||
TASK:
|
TASK:
|
||||||
1. Start your response by repeating the last ~100 characters shown above (for overlap/merging)
|
1. Start your response by repeating the overlap context shown above (cut part and full part before at same level)
|
||||||
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
2. Complete the incomplete element shown in the context above (marked with CUT POINT)
|
||||||
3. Continue generating the remaining content following the JSON structure template above
|
3. Continue generating the remaining content following the JSON structure template above
|
||||||
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
4. Return ONLY valid JSON following the structure template - no overlap/continuation wrapper objects
|
||||||
|
|
||||||
CRITICAL:
|
CRITICAL:
|
||||||
- Your response must be valid JSON matching the structure template above
|
- Your response must be valid JSON matching the structure template above
|
||||||
- Start with overlap (~100 chars) then continue seamlessly
|
- Start with overlap context (cut part and full part before at same level) then continue seamlessly
|
||||||
- Complete the incomplete element and continue with remaining elements"""
|
- Complete the incomplete element and continue with remaining elements"""
|
||||||
return continuationPrompt
|
return continuationPrompt
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -346,9 +346,18 @@ class BaseRenderer(ABC):
|
||||||
|
|
||||||
response = await aiService.callAi(request)
|
response = await aiService.callAi(request)
|
||||||
|
|
||||||
# Save styling prompt and response to debug
|
# Save styling prompt and response to debug (fire and forget - don't block on slow file I/O)
|
||||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
# The writeDebugFile calls os.listdir() which can be slow with many files
|
||||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
# Run in background thread to avoid blocking rendering
|
||||||
|
import threading
|
||||||
|
def _writeDebugFiles():
|
||||||
|
try:
|
||||||
|
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||||
|
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||||
|
except Exception:
|
||||||
|
pass # Silently fail - debug writing should never block rendering
|
||||||
|
|
||||||
|
threading.Thread(target=_writeDebugFiles, daemon=True).start()
|
||||||
|
|
||||||
# Clean and parse JSON
|
# Clean and parse JSON
|
||||||
result = response.content.strip() if response and response.content else ""
|
result = response.content.strip() if response and response.content else ""
|
||||||
|
|
|
||||||
|
|
@ -116,24 +116,37 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate DOCX content from structured JSON document."""
|
"""Generate DOCX content from structured JSON document."""
|
||||||
|
import time
|
||||||
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
|
self.logger.debug("_generateDocxFromJson: Starting document generation")
|
||||||
# Create new document
|
# Create new document
|
||||||
doc = Document()
|
doc = Document()
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
|
||||||
|
|
||||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||||
|
style_start = time.time()
|
||||||
|
self.logger.debug("_generateDocxFromJson: About to get style set")
|
||||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService)
|
styleSet = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
|
||||||
|
|
||||||
# Setup basic document styles and create all styles from style set
|
# Setup basic document styles and create all styles from style set
|
||||||
|
setup_start = time.time()
|
||||||
|
self.logger.debug("_generateDocxFromJson: Setting up document styles")
|
||||||
self._setupBasicDocumentStyles(doc)
|
self._setupBasicDocumentStyles(doc)
|
||||||
self._setupDocumentStyles(doc, styleSet)
|
self._setupDocumentStyles(doc, styleSet)
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Document styles setup in {time.time() - setup_start:.2f}s")
|
||||||
|
|
||||||
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
||||||
if not self._validateJsonStructure(json_content):
|
if not self._validateJsonStructure(json_content):
|
||||||
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
raise ValueError("JSON content must follow standardized schema: {metadata: {...}, documents: [{sections: [...]}]}")
|
||||||
|
|
||||||
# Extract sections and metadata from standardized schema
|
# Extract sections and metadata from standardized schema
|
||||||
|
extract_start = time.time()
|
||||||
|
self.logger.debug("_generateDocxFromJson: Extracting sections and metadata")
|
||||||
sections = self._extractSections(json_content)
|
sections = self._extractSections(json_content)
|
||||||
metadata = self._extractMetadata(json_content)
|
metadata = self._extractMetadata(json_content)
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Extracted {len(sections)} sections in {time.time() - extract_start:.2f}s")
|
||||||
|
|
||||||
# Use provided title (which comes from documents[].title) as primary source
|
# Use provided title (which comes from documents[].title) as primary source
|
||||||
# Fallback to metadata.title only if title parameter is empty
|
# Fallback to metadata.title only if title parameter is empty
|
||||||
|
|
@ -144,18 +157,32 @@ class RendererDocx(BaseRenderer):
|
||||||
doc.add_paragraph(document_title, style='Title')
|
doc.add_paragraph(document_title, style='Title')
|
||||||
|
|
||||||
# Process each section in order
|
# Process each section in order
|
||||||
for section in sections:
|
render_start = time.time()
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Starting to render {len(sections)} sections")
|
||||||
|
for idx, section in enumerate(sections):
|
||||||
|
section_start = time.time()
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Rendering section {idx + 1}/{len(sections)}")
|
||||||
self._renderJsonSection(doc, section, styleSet)
|
self._renderJsonSection(doc, section, styleSet)
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Section {idx + 1} rendered in {time.time() - section_start:.2f}s")
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: All sections rendered in {time.time() - render_start:.2f}s")
|
||||||
|
|
||||||
# Save to buffer
|
# Save to buffer
|
||||||
|
save_start = time.time()
|
||||||
|
self.logger.debug("_generateDocxFromJson: Starting to save document to buffer")
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
doc.save(buffer)
|
doc.save(buffer)
|
||||||
buffer.seek(0)
|
buffer.seek(0)
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Document saved to buffer in {time.time() - save_start:.2f}s")
|
||||||
|
|
||||||
# Convert to base64
|
# Convert to base64
|
||||||
|
encode_start = time.time()
|
||||||
|
self.logger.debug("_generateDocxFromJson: Converting to base64")
|
||||||
docx_bytes = buffer.getvalue()
|
docx_bytes = buffer.getvalue()
|
||||||
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
|
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
|
||||||
|
self.logger.debug(f"_generateDocxFromJson: Converted to base64 in {time.time() - encode_start:.2f}s (document size: {len(docx_bytes)} bytes)")
|
||||||
|
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
self.logger.info(f"_generateDocxFromJson: Document generation completed in {total_time:.2f}s")
|
||||||
return docx_base64
|
return docx_base64
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -381,6 +408,8 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON table to DOCX using AI-generated styles."""
|
"""Render a JSON table to DOCX using AI-generated styles."""
|
||||||
|
import time
|
||||||
|
table_start = time.time()
|
||||||
try:
|
try:
|
||||||
# Extract from nested content structure
|
# Extract from nested content structure
|
||||||
content = table_data.get("content", {})
|
content = table_data.get("content", {})
|
||||||
|
|
@ -392,19 +421,26 @@ class RendererDocx(BaseRenderer):
|
||||||
if not headers or not rows:
|
if not headers or not rows:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self.logger.debug(f"_renderJsonTable: Starting table render - {len(rows)} rows × {len(headers)} columns = {len(rows) * len(headers)} cells")
|
||||||
|
|
||||||
# Create table
|
# Create table
|
||||||
|
create_start = time.time()
|
||||||
table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
|
table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
|
||||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||||
|
self.logger.debug(f"_renderJsonTable: Table created in {time.time() - create_start:.2f}s")
|
||||||
|
|
||||||
# Apply table borders based on AI style
|
# Apply table borders based on AI style
|
||||||
|
border_start = time.time()
|
||||||
border_style = styles["table_border"]["style"]
|
border_style = styles["table_border"]["style"]
|
||||||
if border_style == "horizontal_only":
|
if border_style == "horizontal_only":
|
||||||
self._applyHorizontalBordersOnly(table)
|
self._applyHorizontalBordersOnly(table)
|
||||||
elif border_style == "grid":
|
elif border_style == "grid":
|
||||||
table.style = 'Table Grid'
|
table.style = 'Table Grid'
|
||||||
# else: no borders
|
# else: no borders
|
||||||
|
self.logger.debug(f"_renderJsonTable: Borders applied in {time.time() - border_start:.2f}s")
|
||||||
|
|
||||||
# Add headers with AI-generated styling
|
# Add headers with AI-generated styling
|
||||||
|
header_start = time.time()
|
||||||
header_row = table.rows[0]
|
header_row = table.rows[0]
|
||||||
header_style = styles["table_header"]
|
header_style = styles["table_header"]
|
||||||
for i, header in enumerate(headers):
|
for i, header in enumerate(headers):
|
||||||
|
|
@ -424,9 +460,14 @@ class RendererDocx(BaseRenderer):
|
||||||
run.font.size = Pt(11)
|
run.font.size = Pt(11)
|
||||||
text_color = header_style["text_color"].lstrip('#')
|
text_color = header_style["text_color"].lstrip('#')
|
||||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||||
|
self.logger.debug(f"_renderJsonTable: Headers rendered in {time.time() - header_start:.2f}s")
|
||||||
|
|
||||||
# Add data rows with AI-generated styling
|
# Add data rows with AI-generated styling
|
||||||
|
rows_start = time.time()
|
||||||
cell_style = styles["table_cell"]
|
cell_style = styles["table_cell"]
|
||||||
|
total_cells = len(rows) * len(headers)
|
||||||
|
log_interval = max(1, total_cells // 20) # Log every 5% progress
|
||||||
|
|
||||||
for row_idx, row_data in enumerate(rows):
|
for row_idx, row_data in enumerate(rows):
|
||||||
if row_idx + 1 < len(table.rows):
|
if row_idx + 1 < len(table.rows):
|
||||||
table_row = table.rows[row_idx + 1]
|
table_row = table.rows[row_idx + 1]
|
||||||
|
|
@ -435,16 +476,30 @@ class RendererDocx(BaseRenderer):
|
||||||
cell = table_row.cells[col_idx]
|
cell = table_row.cells[col_idx]
|
||||||
cell.text = str(cell_data)
|
cell.text = str(cell_data)
|
||||||
|
|
||||||
# Apply text styling
|
# Apply text styling - OPTIMIZED: Only style if needed
|
||||||
for paragraph in cell.paragraphs:
|
# For large tables, styling every cell can be very slow
|
||||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
# Check if we need to apply styling (only if style differs from default)
|
||||||
for run in paragraph.runs:
|
if cell_style.get("text_color") != "#2F2F2F" or cell_style.get("font_size") != 10:
|
||||||
run.font.size = Pt(10)
|
for paragraph in cell.paragraphs:
|
||||||
text_color = cell_style["text_color"].lstrip('#')
|
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||||
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
for run in paragraph.runs:
|
||||||
|
run.font.size = Pt(10)
|
||||||
|
text_color = cell_style["text_color"].lstrip('#')
|
||||||
|
run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
|
||||||
|
|
||||||
|
# Log progress for large tables
|
||||||
|
if (row_idx + 1) % log_interval == 0 or row_idx == len(rows) - 1:
|
||||||
|
elapsed = time.time() - rows_start
|
||||||
|
progress = ((row_idx + 1) / len(rows)) * 100
|
||||||
|
cells_processed = (row_idx + 1) * len(headers)
|
||||||
|
rate = cells_processed / elapsed if elapsed > 0 else 0
|
||||||
|
self.logger.debug(f"_renderJsonTable: Progress {progress:.1f}% ({row_idx + 1}/{len(rows)} rows, {cells_processed}/{total_cells} cells) - Rate: {rate:.1f} cells/s, Elapsed: {elapsed:.2f}s")
|
||||||
|
|
||||||
|
total_time = time.time() - table_start
|
||||||
|
self.logger.info(f"_renderJsonTable: Table rendering completed in {total_time:.2f}s ({len(rows)} rows × {len(headers)} cols = {total_cells} cells)")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
def _applyHorizontalBordersOnly(self, table) -> None:
|
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||||
"""Apply only horizontal borders to the table (no vertical borders)."""
|
"""Apply only horizontal borders to the table (no vertical borders)."""
|
||||||
|
|
|
||||||
|
|
@ -1300,16 +1300,16 @@ def _extractLastCompleteAndIncomplete(jsonContent: str) -> Tuple[str, str]:
|
||||||
lastCompleteElement = _findLastCompleteElement(lastCompletePart)
|
lastCompleteElement = _findLastCompleteElement(lastCompletePart)
|
||||||
if lastCompleteElement:
|
if lastCompleteElement:
|
||||||
# Build context for incomplete part - show structure around the break
|
# Build context for incomplete part - show structure around the break
|
||||||
incompleteWithContext = _buildIncompleteContext(jsonContent, lastCompleteEnd)
|
incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
|
||||||
return lastCompleteElement, incompleteWithContext
|
return lastCompleteElement, incompleteWithContext
|
||||||
else:
|
else:
|
||||||
# Build context for incomplete part
|
# Build context for incomplete part
|
||||||
incompleteWithContext = _buildIncompleteContext(jsonContent, lastCompleteEnd)
|
incompleteWithContext = buildIncompleteContext(jsonContent, lastCompleteEnd)
|
||||||
return lastCompletePart, incompleteWithContext
|
return lastCompletePart, incompleteWithContext
|
||||||
else:
|
else:
|
||||||
# No complete structure found - everything is incomplete
|
# No complete structure found - everything is incomplete
|
||||||
# Still try to show context
|
# Still try to show context
|
||||||
incompleteWithContext = _buildIncompleteContext(jsonContent, 0)
|
incompleteWithContext = buildIncompleteContext(jsonContent, 0)
|
||||||
return "", incompleteWithContext
|
return "", incompleteWithContext
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1359,183 +1359,449 @@ def _findLastCompleteElement(jsonStr: str) -> str:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _buildIncompleteContext(jsonContent: str, breakPosition: int) -> str:
|
def buildIncompleteContext(jsonContent: str, breakPosition: int) -> str:
|
||||||
"""
|
"""
|
||||||
Build intelligent context showing the incomplete element with its parent structure hierarchy.
|
Build hierarchical context showing incomplete JSON structure.
|
||||||
|
|
||||||
Logic (as per user instruction):
|
Shows:
|
||||||
1. Cut piece level: element of a list (the incomplete element at cut point)
|
- Full hierarchy structure (always shown)
|
||||||
2. Parent of the cut element: the list/array containing the cut piece (with cut point shown)
|
- Complete elements before cut (within 200 char DATA budget)
|
||||||
3. Last complete object on the same level like the cut object (if exists) PLUS further previous
|
- Cut piece marked with <-- CUT POINT (incomplete)
|
||||||
content from the json string (maximum 1000 characters)
|
- Does NOT close open structures
|
||||||
4. Next parent levels, until root. Further 1000 characters to show content (but only complete
|
|
||||||
objects - if too big, not to show), then only showing metadata until root
|
|
||||||
|
|
||||||
Example output structure:
|
|
||||||
{
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"content": {
|
|
||||||
"rows": [
|
|
||||||
[37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957],
|
|
||||||
[37957, 37963, 37967, 37987, 37991, <-- CUT POINT (incomplete)
|
|
||||||
"""
|
"""
|
||||||
import json
|
if breakPosition <= 0 or breakPosition > len(jsonContent):
|
||||||
import re
|
return jsonContent
|
||||||
|
|
||||||
if breakPosition <= 0 or breakPosition >= len(jsonContent):
|
|
||||||
# Invalid break position - show last 500 chars
|
|
||||||
return jsonContent[-500:] if len(jsonContent) > 500 else jsonContent
|
|
||||||
|
|
||||||
contextParts = []
|
|
||||||
|
|
||||||
# Find structure hierarchy backwards from break point
|
|
||||||
hierarchy = _findStructureHierarchy(jsonContent, breakPosition)
|
|
||||||
|
|
||||||
|
hierarchy = findStructureHierarchy(jsonContent, breakPosition)
|
||||||
if not hierarchy:
|
if not hierarchy:
|
||||||
# Fallback: show simple context
|
return jsonContent[:breakPosition]
|
||||||
contextParts.append("Cut point context:\n")
|
|
||||||
contextStart = max(0, breakPosition - 500)
|
|
||||||
contextParts.append(jsonContent[contextStart:breakPosition + 100])
|
|
||||||
return "\n".join(contextParts)
|
|
||||||
|
|
||||||
# Step 1: Extract cut piece (incomplete element at cut point)
|
|
||||||
cutPiece = _extractCutPiece(jsonContent, breakPosition)
|
|
||||||
|
|
||||||
# Step 2: Find the cut level (the array/object containing the cut piece)
|
|
||||||
cutLevel = hierarchy[-1] if hierarchy else None
|
|
||||||
|
|
||||||
if not cutLevel:
|
|
||||||
# Fallback
|
|
||||||
contextParts.append("Cut point context:\n")
|
|
||||||
contextStart = max(0, breakPosition - 500)
|
|
||||||
contextParts.append(jsonContent[contextStart:breakPosition + 100])
|
|
||||||
return "\n".join(contextParts)
|
|
||||||
|
|
||||||
# Build context following the exact structure requested
|
|
||||||
# Show hierarchical structure from root to cut point
|
|
||||||
|
|
||||||
# Extract the actual JSON structure from root to cut point
|
|
||||||
# Build the full hierarchical structure showing:
|
|
||||||
# 4. Parent levels until root (with content/metadata limits)
|
|
||||||
# 3. Last complete elements on same level + previous content (max 1000 chars)
|
|
||||||
# 2. Parent container (the list) with cut piece
|
|
||||||
# 1. Cut piece
|
|
||||||
|
|
||||||
|
cutPiece = extractCutPiece(jsonContent, breakPosition)
|
||||||
resultLines = []
|
resultLines = []
|
||||||
|
DATA_BUDGET = 500
|
||||||
|
|
||||||
# Build structure from root to cut level
|
# Build hierarchy level by level - show actual JSON structure
|
||||||
# Extract actual JSON content for each level
|
for levelIndex, level in enumerate(hierarchy):
|
||||||
for i, level in enumerate(hierarchy):
|
|
||||||
levelType = level['type']
|
levelType = level['type']
|
||||||
start = level['start_pos']
|
levelStart = level['start_pos']
|
||||||
end = level['end_pos'] if i < len(hierarchy) - 1 else breakPosition
|
levelDepth = level['depth']
|
||||||
key = level.get('key')
|
indent = " " * levelDepth
|
||||||
depth = level['depth']
|
isCutLevel = (levelIndex == len(hierarchy) - 1)
|
||||||
|
isParentOfCutLevel = (levelIndex == len(hierarchy) - 2)
|
||||||
|
|
||||||
indent = " " * depth
|
# Get next level info
|
||||||
|
if levelIndex < len(hierarchy) - 1:
|
||||||
if i < len(hierarchy) - 1:
|
nextLevel = hierarchy[levelIndex + 1]
|
||||||
# Parent levels - show opening structure
|
nextLevelStart = nextLevel['start_pos']
|
||||||
levelContent = jsonContent[start:end]
|
|
||||||
|
|
||||||
# If content is too large, show only metadata
|
|
||||||
if len(levelContent) > 1000:
|
|
||||||
# Show opening with key
|
|
||||||
opening = jsonContent[start:min(start + 100, end)]
|
|
||||||
if key:
|
|
||||||
resultLines.append(f'{indent}"{key}": {{')
|
|
||||||
else:
|
|
||||||
resultLines.append(f'{indent}{{')
|
|
||||||
resultLines.append(f'{indent} ...')
|
|
||||||
else:
|
|
||||||
# Show opening structure
|
|
||||||
if key:
|
|
||||||
# Find where the key's value starts
|
|
||||||
keyEnd = jsonContent.find(':', start)
|
|
||||||
if keyEnd > 0:
|
|
||||||
opening = jsonContent[start:min(keyEnd + 50, end)]
|
|
||||||
resultLines.append(f'{indent}{opening}')
|
|
||||||
else:
|
|
||||||
opening = jsonContent[start:min(start + 50, end)]
|
|
||||||
resultLines.append(f'{indent}{opening}')
|
|
||||||
else:
|
else:
|
||||||
# Cut level - show detailed context
|
nextLevelStart = breakPosition
|
||||||
cutLevelType = levelType
|
|
||||||
cutLevelStart = start
|
# Show opening structure for this level
|
||||||
cutLevelKey = key
|
resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
|
||||||
cutLevelDepth = depth
|
childIndent = indent + " "
|
||||||
|
|
||||||
# Show key if available
|
if isCutLevel:
|
||||||
if cutLevelKey:
|
# Cut level: show cut piece
|
||||||
resultLines.append(f'{indent}"{cutLevelKey}": {{')
|
if cutPiece:
|
||||||
indent += " "
|
for line in cutPiece.split('\n'):
|
||||||
|
stripped = line.strip()
|
||||||
if cutLevelType == 'array':
|
if stripped:
|
||||||
# Show array opening
|
resultLines.append(f'{childIndent}{stripped}')
|
||||||
arrayKey = _findKeyBefore(jsonContent, cutLevelStart)
|
resultLines[-1] += ' <-- CUT POINT (incomplete)'
|
||||||
if arrayKey:
|
|
||||||
resultLines.append(f'{indent}"{arrayKey}": [')
|
|
||||||
else:
|
|
||||||
resultLines.append(f'{indent}[')
|
|
||||||
indent += " "
|
|
||||||
|
|
||||||
# 3. Show last complete elements on same level + previous content (max 1000 chars)
|
|
||||||
contentBeforeBreak = jsonContent[cutLevelStart:breakPosition]
|
|
||||||
lastCompleteElements = _extractLastCompleteArrayElementsWithContext(
|
|
||||||
contentBeforeBreak, jsonContent, cutLevelStart, maxChars=1000
|
|
||||||
)
|
|
||||||
if lastCompleteElements:
|
|
||||||
resultLines.append(lastCompleteElements)
|
|
||||||
|
|
||||||
# 2. Show parent container (the list) with cut piece
|
|
||||||
cutArrayElement = _findCutArrayElement(jsonContent, breakPosition, cutLevelStart)
|
|
||||||
if cutArrayElement:
|
|
||||||
resultLines.append(f'{indent}{cutArrayElement} <-- CUT POINT (incomplete)')
|
|
||||||
else:
|
|
||||||
# Fallback: show what we have at break point
|
|
||||||
cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
|
|
||||||
resultLines.append(f'{indent}{cutPart} <-- CUT POINT (incomplete)')
|
|
||||||
|
|
||||||
# Close the array
|
|
||||||
indent = indent[:-2] if len(indent) >= 2 else indent
|
|
||||||
resultLines.append(f'{indent}]')
|
|
||||||
else:
|
else:
|
||||||
# Object at cut level
|
resultLines.append(f'{childIndent}... <-- CUT POINT (incomplete)')
|
||||||
cutPart = jsonContent[breakPosition:breakPosition + 200].strip()
|
|
||||||
preview = jsonContent[cutLevelStart:breakPosition]
|
elif isParentOfCutLevel and levelType == 'array':
|
||||||
preview = preview[-500:] if len(preview) > 500 else preview
|
# Parent of cut level: show complete elements with budget
|
||||||
resultLines.append(f'{indent}{preview}... {cutPart} <-- CUT POINT (incomplete)')
|
completeElements = _findCompleteElementsAtLevel(
|
||||||
|
jsonContent, levelStart, nextLevelStart, levelDepth
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"DEBUG: Found {len(completeElements)} complete elements")
|
||||||
|
print(f"DEBUG: Budget = {DATA_BUDGET}")
|
||||||
|
|
||||||
|
dataBudget = DATA_BUDGET
|
||||||
|
for elementStart, elementEnd in reversed(completeElements):
|
||||||
|
elementData = jsonContent[elementStart:elementEnd].strip()
|
||||||
|
elementSize = len(elementData)
|
||||||
|
|
||||||
|
print(f"DEBUG: Element size = {elementSize}, remaining budget = {dataBudget}")
|
||||||
|
|
||||||
|
if elementSize == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if elementSize > dataBudget:
|
||||||
|
print(f"DEBUG: Element too large, stopping")
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"DEBUG: Adding element (size {elementSize})")
|
||||||
|
for line in elementData.split('\n'):
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped:
|
||||||
|
resultLines.append(f'{childIndent}{stripped}')
|
||||||
|
if elementEnd < nextLevelStart:
|
||||||
|
resultLines[-1] += ','
|
||||||
|
|
||||||
|
dataBudget -= elementSize
|
||||||
|
print(f"DEBUG: Budget after decrement = {dataBudget}")
|
||||||
|
|
||||||
|
if dataBudget <= 0:
|
||||||
|
print(f"DEBUG: Budget exhausted, stopping")
|
||||||
|
break
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Other parent levels: show path content (keys and values) leading to next level
|
||||||
|
pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
|
||||||
|
if pathContent:
|
||||||
|
# Show all path content (structure is always shown, not truncated)
|
||||||
|
for line in pathContent.split('\n'):
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped:
|
||||||
|
resultLines.append(f'{childIndent}{stripped}')
|
||||||
|
|
||||||
# Close all parent structures
|
return "\n".join(resultLines)
|
||||||
for i in range(len(hierarchy) - 2, -1, -1):
|
|
||||||
level = hierarchy[i]
|
|
||||||
depth = level['depth']
|
def _buildNestedHierarchy(
|
||||||
indent = " " * depth
|
resultLines: List[str],
|
||||||
resultLines.append(f'{indent}}}')
|
jsonContent: str,
|
||||||
|
hierarchy: List[Dict[str, Any]],
|
||||||
|
levelIndex: int,
|
||||||
|
breakPosition: int,
|
||||||
|
cutPiece: str,
|
||||||
|
cutLevel: Dict[str, Any]
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Recursively build nested hierarchy from root to cut level.
|
||||||
|
This ensures proper nesting where each level contains the next level.
|
||||||
|
"""
|
||||||
|
if levelIndex >= len(hierarchy):
|
||||||
|
return
|
||||||
|
|
||||||
contextParts.append("\n".join(resultLines))
|
level = hierarchy[levelIndex]
|
||||||
|
levelType = level['type']
|
||||||
|
levelStart = level['start_pos']
|
||||||
|
levelKey = level.get('key')
|
||||||
|
levelDepth = level['depth']
|
||||||
|
indent = " " * levelDepth
|
||||||
|
|
||||||
return "\n".join(contextParts)
|
isCutLevel = (levelIndex == len(hierarchy) - 1)
|
||||||
|
|
||||||
|
# Show opening structure for this level
|
||||||
|
if levelKey:
|
||||||
|
resultLines.append(f'{indent}"{levelKey}": {{' if levelType == 'object' else f'{indent}"{levelKey}": [')
|
||||||
|
else:
|
||||||
|
resultLines.append(f'{indent}{{' if levelType == 'object' else f'{indent}[')
|
||||||
|
|
||||||
|
childIndent = indent + " "
|
||||||
|
|
||||||
|
if isCutLevel:
|
||||||
|
# Cut level - show content (complete elements + cut piece)
|
||||||
|
if levelType == 'array':
|
||||||
|
charBudget = 1000
|
||||||
|
completeElements = _findCompleteElementsAtLevel(
|
||||||
|
jsonContent, levelStart, breakPosition, levelDepth
|
||||||
|
)
|
||||||
|
|
||||||
|
# Show complete elements (working backwards from the cut)
|
||||||
|
for elementStart, elementEnd in reversed(completeElements):
|
||||||
|
elementSize = elementEnd - elementStart
|
||||||
|
if charBudget >= elementSize:
|
||||||
|
element = jsonContent[elementStart:elementEnd].strip()
|
||||||
|
if element:
|
||||||
|
elementLines = element.split('\n')
|
||||||
|
for line in elementLines:
|
||||||
|
if line.strip():
|
||||||
|
resultLines.append(f'{childIndent}{line}')
|
||||||
|
if elementEnd < breakPosition:
|
||||||
|
resultLines[-1] += ','
|
||||||
|
charBudget -= elementSize
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Show cut piece
|
||||||
|
if cutPiece:
|
||||||
|
cutPieceLines = cutPiece.split('\n')
|
||||||
|
for line in cutPieceLines:
|
||||||
|
if line.strip():
|
||||||
|
resultLines.append(f'{childIndent}{line}')
|
||||||
|
resultLines[-1] += ' <-- CUT POINT (incomplete)'
|
||||||
|
else:
|
||||||
|
cutPart = jsonContent[max(0, breakPosition-50):breakPosition]
|
||||||
|
resultLines.append(f'{childIndent}{cutPart} <-- CUT POINT (incomplete)')
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Object at cut level
|
||||||
|
previewSize = breakPosition - levelStart
|
||||||
|
maxPreviewSize = 500
|
||||||
|
if previewSize > maxPreviewSize:
|
||||||
|
previewStart = breakPosition - maxPreviewSize
|
||||||
|
preview = jsonContent[previewStart:breakPosition]
|
||||||
|
else:
|
||||||
|
preview = jsonContent[levelStart:breakPosition]
|
||||||
|
|
||||||
|
previewLines = preview.split('\n')
|
||||||
|
for line in previewLines:
|
||||||
|
if line.strip():
|
||||||
|
resultLines.append(f'{childIndent}{line}')
|
||||||
|
|
||||||
|
cutPart = jsonContent[breakPosition:min(breakPosition + 50, len(jsonContent))]
|
||||||
|
resultLines.append(f'{childIndent}... {cutPart} <-- CUT POINT (incomplete)')
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Parent level - show path to next level, then recursively build next level
|
||||||
|
nextLevel = hierarchy[levelIndex + 1]
|
||||||
|
nextLevelKey = nextLevel.get('key')
|
||||||
|
nextLevelStart = nextLevel['start_pos']
|
||||||
|
nextLevelType = nextLevel['type']
|
||||||
|
|
||||||
|
# Extract content between this level's opening and next level's start
|
||||||
|
# This shows any keys/values that come before the next level
|
||||||
|
pathContent = jsonContent[levelStart + 1:nextLevelStart].strip()
|
||||||
|
|
||||||
|
# Show the path content (keys/values before next level)
|
||||||
|
if len(pathContent) > 0 and len(pathContent) <= 500:
|
||||||
|
pathLines = pathContent.split('\n')
|
||||||
|
nonEmptyLines = [line for line in pathLines if line.strip()]
|
||||||
|
if nonEmptyLines:
|
||||||
|
for line in nonEmptyLines[:20]: # Show more lines
|
||||||
|
if line.strip():
|
||||||
|
resultLines.append(f'{childIndent}{line}')
|
||||||
|
if len(nonEmptyLines) > 20:
|
||||||
|
resultLines.append(f'{childIndent}... ({len(nonEmptyLines) - 20} more lines) ...')
|
||||||
|
elif len(pathContent) > 500:
|
||||||
|
# Content too large - show placeholder
|
||||||
|
resultLines.append(f'{childIndent}... (content too large, {len(pathContent)} chars) ...')
|
||||||
|
|
||||||
|
# Always show the key leading to next level if it exists
|
||||||
|
# The recursive call will show the opening bracket/brace, so we just show the key here
|
||||||
|
if nextLevelKey:
|
||||||
|
# Show the key (the recursive call will add the opening bracket/brace)
|
||||||
|
# Actually, the recursive call already shows the full opening with key,
|
||||||
|
# so we don't need to show it here - just let the recursive call handle it
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Recursively build next level (this will show its opening structure and content)
|
||||||
|
_buildNestedHierarchy(resultLines, jsonContent, hierarchy, levelIndex + 1, breakPosition, cutPiece, cutLevel)
|
||||||
|
|
||||||
|
# Close this level
|
||||||
|
resultLines.append(f'{indent}}}' if levelType == 'object' else f'{indent}]')
|
||||||
|
|
||||||
|
|
||||||
def _extractCutPiece(jsonContent: str, breakPosition: int) -> str:
|
def _findCompleteElementsAtLevel(
|
||||||
"""Extract the incomplete piece at the cut point."""
|
jsonContent: str,
|
||||||
# Get characters after break point (incomplete part)
|
levelStart: int,
|
||||||
afterBreak = jsonContent[breakPosition:breakPosition + 200].strip()
|
breakPosition: int,
|
||||||
# Find where the incomplete piece ends (next comma, bracket, brace, or end)
|
targetDepth: int
|
||||||
for i, char in enumerate(afterBreak):
|
) -> List[Tuple[int, int]]:
|
||||||
if char in [',', ']', '}', '\n']:
|
"""
|
||||||
return afterBreak[:i].strip()
|
Find all complete elements at a specific depth level.
|
||||||
return afterBreak[:50].strip() # Limit to 50 chars if no delimiter found
|
|
||||||
|
Elements inside the structure at targetDepth are at targetDepth + 1.
|
||||||
|
We track depth relative to the start of the structure.
|
||||||
|
|
||||||
|
Returns list of (start, end) tuples for complete elements.
|
||||||
|
"""
|
||||||
|
completeElements = []
|
||||||
|
|
||||||
|
# Track depth relative to the level start
|
||||||
|
# When we're at levelStart, we're at the opening bracket/brace (depth = targetDepth)
|
||||||
|
# Elements inside are at depth = targetDepth + 1
|
||||||
|
relativeDepth = 0 # Depth relative to level start (0 = at opening bracket/brace)
|
||||||
|
inString = False
|
||||||
|
escapeNext = False
|
||||||
|
currentElementStart = None
|
||||||
|
|
||||||
|
# Find the first non-whitespace character after the opening bracket/brace
|
||||||
|
for i in range(levelStart + 1, min(breakPosition, len(jsonContent))):
|
||||||
|
if jsonContent[i] not in [' ', '\n', '\r', '\t']:
|
||||||
|
currentElementStart = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if currentElementStart is None:
|
||||||
|
return completeElements
|
||||||
|
|
||||||
|
for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
|
||||||
|
char = jsonContent[i]
|
||||||
|
|
||||||
|
if escapeNext:
|
||||||
|
escapeNext = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '\\':
|
||||||
|
escapeNext = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '"':
|
||||||
|
inString = not inString
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not inString:
|
||||||
|
if char == '{':
|
||||||
|
relativeDepth += 1
|
||||||
|
elif char == '}':
|
||||||
|
relativeDepth -= 1
|
||||||
|
# Element is complete when we return to the level's depth (relativeDepth == 0)
|
||||||
|
if relativeDepth == 0:
|
||||||
|
# Found end of complete element
|
||||||
|
if currentElementStart is not None:
|
||||||
|
completeElements.append((currentElementStart, i + 1))
|
||||||
|
# Find start of next element
|
||||||
|
j = i + 1
|
||||||
|
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
|
||||||
|
j += 1
|
||||||
|
if j < breakPosition:
|
||||||
|
currentElementStart = j
|
||||||
|
else:
|
||||||
|
currentElementStart = None
|
||||||
|
elif char == '[':
|
||||||
|
relativeDepth += 1
|
||||||
|
elif char == ']':
|
||||||
|
relativeDepth -= 1
|
||||||
|
# Element is complete when we return to the level's depth (relativeDepth == 0)
|
||||||
|
if relativeDepth == 0:
|
||||||
|
# Found end of complete element
|
||||||
|
if currentElementStart is not None:
|
||||||
|
completeElements.append((currentElementStart, i + 1))
|
||||||
|
# Find start of next element
|
||||||
|
j = i + 1
|
||||||
|
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t', ',']:
|
||||||
|
j += 1
|
||||||
|
if j < breakPosition:
|
||||||
|
currentElementStart = j
|
||||||
|
else:
|
||||||
|
currentElementStart = None
|
||||||
|
elif char == ',':
|
||||||
|
# Comma at relativeDepth == 0 means we're between elements at the cut level
|
||||||
|
if relativeDepth == 0:
|
||||||
|
# Element boundary - check if we have a complete element
|
||||||
|
if currentElementStart is not None and currentElementStart < i:
|
||||||
|
# Simple value (string, number, boolean, null) - complete at comma
|
||||||
|
completeElements.append((currentElementStart, i))
|
||||||
|
# Find start of next element
|
||||||
|
j = i + 1
|
||||||
|
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
|
||||||
|
j += 1
|
||||||
|
if j < breakPosition:
|
||||||
|
currentElementStart = j
|
||||||
|
else:
|
||||||
|
currentElementStart = None
|
||||||
|
|
||||||
|
return completeElements
|
||||||
|
|
||||||
|
|
||||||
def _findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]:
|
def extractCutPiece(jsonContent: str, breakPosition: int) -> str:
|
||||||
|
"""
|
||||||
|
Extract the incomplete piece at the cut point.
|
||||||
|
Generic function that works with all JSON types: arrays, objects, strings, numbers, booleans, null.
|
||||||
|
|
||||||
|
CRITICAL: Uses findStructureHierarchy to identify the cut level, then parses from the cut level start
|
||||||
|
to correctly identify which element contains the break position.
|
||||||
|
This approach handles all JSON structures generically, including:
|
||||||
|
- Nested objects and arrays
|
||||||
|
- Strings containing brackets, braces, commas
|
||||||
|
- Complex nested structures
|
||||||
|
|
||||||
|
Returns the incomplete element from its start to the break position.
|
||||||
|
"""
|
||||||
|
if breakPosition <= 0 or breakPosition > len(jsonContent):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# First, find the structure hierarchy to identify the cut level
|
||||||
|
hierarchy = findStructureHierarchy(jsonContent, breakPosition)
|
||||||
|
if not hierarchy:
|
||||||
|
# Fallback: return content before break
|
||||||
|
return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
|
||||||
|
|
||||||
|
# Get the cut level (the structure containing the break position)
|
||||||
|
cutLevel = hierarchy[-1]
|
||||||
|
cutLevelStart = cutLevel['start_pos']
|
||||||
|
cutLevelDepth = cutLevel.get('depth', 0)
|
||||||
|
|
||||||
|
# Parse from cutLevelStart to breakPosition to find element boundaries
|
||||||
|
braceDepth = 0 # Absolute brace depth
|
||||||
|
bracketDepth = 0 # Absolute bracket depth
|
||||||
|
inString = False
|
||||||
|
escapeNext = False
|
||||||
|
|
||||||
|
# Track element start at the cut level
|
||||||
|
currentElementStart = cutLevelStart # Start of current element
|
||||||
|
|
||||||
|
# Parse from cut level start to break position
|
||||||
|
for i in range(cutLevelStart, min(breakPosition, len(jsonContent))):
|
||||||
|
char = jsonContent[i]
|
||||||
|
|
||||||
|
if escapeNext:
|
||||||
|
escapeNext = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '\\':
|
||||||
|
escapeNext = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '"':
|
||||||
|
inString = not inString
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not inString:
|
||||||
|
if char == '{':
|
||||||
|
braceDepth += 1
|
||||||
|
elif char == '}':
|
||||||
|
braceDepth -= 1
|
||||||
|
elif char == '[':
|
||||||
|
bracketDepth += 1
|
||||||
|
elif char == ']':
|
||||||
|
bracketDepth -= 1
|
||||||
|
elif char == ',':
|
||||||
|
# Comma at cut level separates elements
|
||||||
|
currentDepth = braceDepth + bracketDepth
|
||||||
|
if currentDepth == cutLevelDepth:
|
||||||
|
# This comma is at the cut level - next element starts after it
|
||||||
|
j = i + 1
|
||||||
|
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
|
||||||
|
j += 1
|
||||||
|
if j < breakPosition:
|
||||||
|
currentElementStart = j
|
||||||
|
elif char == ':':
|
||||||
|
# Colon at cut level separates key from value
|
||||||
|
currentDepth = braceDepth + bracketDepth
|
||||||
|
if currentDepth == cutLevelDepth:
|
||||||
|
# This colon is at the cut level - value starts after it
|
||||||
|
j = i + 1
|
||||||
|
while j < breakPosition and j < len(jsonContent) and jsonContent[j] in [' ', '\n', '\r', '\t']:
|
||||||
|
j += 1
|
||||||
|
if j < breakPosition:
|
||||||
|
currentElementStart = j
|
||||||
|
|
||||||
|
# The element containing breakPosition starts at currentElementStart
|
||||||
|
# Find the actual start by skipping leading whitespace
|
||||||
|
actualStart = currentElementStart
|
||||||
|
for i in range(currentElementStart, min(breakPosition, len(jsonContent))):
|
||||||
|
char = jsonContent[i]
|
||||||
|
if char not in [' ', '\n', '\r', '\t']:
|
||||||
|
actualStart = i
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract the incomplete piece from actualStart to breakPosition
|
||||||
|
# Preserve trailing whitespace - it's needed for merging
|
||||||
|
cutPiece = jsonContent[actualStart:breakPosition]
|
||||||
|
|
||||||
|
# Remove leading whitespace but preserve trailing whitespace
|
||||||
|
cutPiece = cutPiece.lstrip()
|
||||||
|
|
||||||
|
return cutPiece if cutPiece else jsonContent[actualStart:breakPosition]
|
||||||
|
|
||||||
|
|
||||||
|
def findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Find the structure hierarchy backwards from break point to root.
|
Find the structure hierarchy backwards from break point to root.
|
||||||
|
|
||||||
Returns list of level info dicts, from root to cut level.
|
Returns list of level info dicts, from root to cut level.
|
||||||
Each level has: type, start_pos, end_pos, parent_start, content_preview
|
Each level has: type, start_pos, end_pos, depth, key
|
||||||
|
|
||||||
|
CRITICAL: Returns the path from root to cut point.
|
||||||
|
- For closed structures: uses actual end position
|
||||||
|
- For open structures: uses breakPosition
|
||||||
"""
|
"""
|
||||||
hierarchy = []
|
hierarchy = []
|
||||||
|
|
||||||
|
|
@ -1545,8 +1811,11 @@ def _findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[s
|
||||||
inString = False
|
inString = False
|
||||||
escapeNext = False
|
escapeNext = False
|
||||||
|
|
||||||
# Find all structure boundaries before break point
|
# Track ALL structures (both closed and open) to get correct end positions
|
||||||
structureStack = [] # Stack of (type, start_pos, depth)
|
# Stack of (type, start_pos, depth, end_pos)
|
||||||
|
# end_pos is None until structure is closed
|
||||||
|
structureStack = [] # Stack of (type, start_pos, depth, end_pos)
|
||||||
|
closedStructures = [] # List of closed structures with their end positions
|
||||||
|
|
||||||
for i in range(breakPosition):
|
for i in range(breakPosition):
|
||||||
if i >= len(jsonContent):
|
if i >= len(jsonContent):
|
||||||
|
|
@ -1568,52 +1837,179 @@ def _findStructureHierarchy(jsonContent: str, breakPosition: int) -> List[Dict[s
|
||||||
|
|
||||||
if not inString:
|
if not inString:
|
||||||
if char == '{':
|
if char == '{':
|
||||||
structureStack.append(('object', i, braceDepth + bracketDepth))
|
# Store depth BEFORE incrementing (this is the level of the structure being opened)
|
||||||
|
currentDepth = braceDepth + bracketDepth
|
||||||
|
structureStack.append(('object', i, currentDepth, None))
|
||||||
braceDepth += 1
|
braceDepth += 1
|
||||||
elif char == '}':
|
elif char == '}':
|
||||||
|
# When closing, record the end position and move to closed structures
|
||||||
if structureStack and structureStack[-1][0] == 'object':
|
if structureStack and structureStack[-1][0] == 'object':
|
||||||
_, start, depth = structureStack.pop()
|
structType, start, depth, _ = structureStack.pop()
|
||||||
hierarchy.append({
|
closedStructures.append({
|
||||||
'type': 'object',
|
'type': structType,
|
||||||
'start_pos': start,
|
'start_pos': start,
|
||||||
'end_pos': i + 1,
|
'end_pos': i + 1, # Actual end position
|
||||||
'depth': depth,
|
'depth': depth,
|
||||||
'key': _findKeyBefore(jsonContent, start)
|
'key': findKeyBefore(jsonContent, start)
|
||||||
})
|
})
|
||||||
braceDepth -= 1
|
braceDepth -= 1
|
||||||
elif char == '[':
|
elif char == '[':
|
||||||
structureStack.append(('array', i, braceDepth + bracketDepth))
|
# Store depth BEFORE incrementing
|
||||||
|
currentDepth = braceDepth + bracketDepth
|
||||||
|
structureStack.append(('array', i, currentDepth, None))
|
||||||
bracketDepth += 1
|
bracketDepth += 1
|
||||||
elif char == ']':
|
elif char == ']':
|
||||||
|
# When closing, record the end position
|
||||||
if structureStack and structureStack[-1][0] == 'array':
|
if structureStack and structureStack[-1][0] == 'array':
|
||||||
_, start, depth = structureStack.pop()
|
structType, start, depth, _ = structureStack.pop()
|
||||||
hierarchy.append({
|
closedStructures.append({
|
||||||
'type': 'array',
|
'type': structType,
|
||||||
'start_pos': start,
|
'start_pos': start,
|
||||||
'end_pos': i + 1,
|
'end_pos': i + 1, # Actual end position
|
||||||
'depth': depth,
|
'depth': depth,
|
||||||
'key': _findKeyBefore(jsonContent, start)
|
'key': findKeyBefore(jsonContent, start)
|
||||||
})
|
})
|
||||||
bracketDepth -= 1
|
bracketDepth -= 1
|
||||||
|
|
||||||
# Sort by depth (root first) and filter to get hierarchy from root to cut
|
# Build hierarchy: we need the actual path from root to cut level
|
||||||
hierarchy.sort(key=lambda x: x['depth'])
|
# CRITICAL: Only include structures that are actually on the path
|
||||||
|
# A structure is on the path if it contains the next level's start position
|
||||||
|
|
||||||
# Find which level contains the break point
|
if not structureStack:
|
||||||
cutLevelIndex = -1
|
# No open structures - all were closed before break
|
||||||
for i, level in enumerate(hierarchy):
|
# Return path to deepest closed structure
|
||||||
if level['start_pos'] < breakPosition <= level['end_pos']:
|
if closedStructures:
|
||||||
cutLevelIndex = i
|
maxDepth = max(s['depth'] for s in closedStructures)
|
||||||
|
# Build path: each level must contain the next level
|
||||||
|
path = []
|
||||||
|
for depth in range(maxDepth + 1):
|
||||||
|
candidates = [s for s in closedStructures if s['depth'] == depth]
|
||||||
|
if candidates:
|
||||||
|
# If multiple at same depth, use the one that contains structures at deeper depths
|
||||||
|
if depth < maxDepth:
|
||||||
|
# Find the one that contains a structure at depth + 1
|
||||||
|
nextDepthCandidates = [s for s in closedStructures if s['depth'] == depth + 1]
|
||||||
|
if nextDepthCandidates:
|
||||||
|
nextStart = min(s['start_pos'] for s in nextDepthCandidates)
|
||||||
|
# Find candidate that contains nextStart
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate['start_pos'] < nextStart < candidate['end_pos']:
|
||||||
|
path.append(candidate)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Fallback: use first candidate
|
||||||
|
path.append(candidates[0])
|
||||||
|
else:
|
||||||
|
path.append(candidates[0])
|
||||||
|
else:
|
||||||
|
path.append(candidates[0])
|
||||||
|
return path
|
||||||
|
return []
|
||||||
|
|
||||||
|
# We have open structures - build path from root to deepest open structure
|
||||||
|
# Strategy: Start from deepest open structure and work backwards to root,
|
||||||
|
# ensuring each level contains the next level
|
||||||
|
|
||||||
|
openByDepth = {}
|
||||||
|
for structType, start, depth, _ in structureStack:
|
||||||
|
openByDepth[depth] = {
|
||||||
|
'type': structType,
|
||||||
|
'start_pos': start,
|
||||||
|
'end_pos': breakPosition,
|
||||||
|
'depth': depth,
|
||||||
|
'key': findKeyBefore(jsonContent, start)
|
||||||
|
}
|
||||||
|
|
||||||
|
maxOpenDepth = max(openByDepth.keys())
|
||||||
|
|
||||||
|
# Build path backwards from deepest to root
|
||||||
|
path = []
|
||||||
|
currentDepth = maxOpenDepth
|
||||||
|
currentStart = openByDepth[maxOpenDepth]['start_pos']
|
||||||
|
|
||||||
|
while currentDepth >= 0:
|
||||||
|
# Look for structure at currentDepth that contains currentStart
|
||||||
|
# First check open structures
|
||||||
|
if currentDepth in openByDepth:
|
||||||
|
struct = openByDepth[currentDepth]
|
||||||
|
if struct['start_pos'] <= currentStart:
|
||||||
|
path.insert(0, struct)
|
||||||
|
currentStart = struct['start_pos']
|
||||||
|
currentDepth -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check closed structures
|
||||||
|
candidates = [s for s in closedStructures if s['depth'] == currentDepth and s['start_pos'] <= currentStart < s['end_pos']]
|
||||||
|
if candidates:
|
||||||
|
# Use the one that ends latest (most recent)
|
||||||
|
struct = max(candidates, key=lambda x: x['end_pos'])
|
||||||
|
path.insert(0, struct)
|
||||||
|
currentStart = struct['start_pos']
|
||||||
|
currentDepth -= 1
|
||||||
|
else:
|
||||||
|
# No structure found at this depth - break
|
||||||
break
|
break
|
||||||
|
|
||||||
if cutLevelIndex >= 0:
|
return path
|
||||||
# Return hierarchy from root to cut level
|
|
||||||
return hierarchy[:cutLevelIndex + 1]
|
# Return the hierarchy (path from root to cut level)
|
||||||
|
if hierarchy:
|
||||||
|
return hierarchy
|
||||||
|
|
||||||
|
# Fallback: if JSON starts with { or [, create a root level
|
||||||
|
if jsonContent and jsonContent.strip():
|
||||||
|
firstChar = jsonContent.strip()[0]
|
||||||
|
if firstChar == '{':
|
||||||
|
return [{
|
||||||
|
'type': 'object',
|
||||||
|
'start_pos': 0,
|
||||||
|
'end_pos': breakPosition,
|
||||||
|
'depth': 0,
|
||||||
|
'key': None
|
||||||
|
}]
|
||||||
|
elif firstChar == '[':
|
||||||
|
return [{
|
||||||
|
'type': 'array',
|
||||||
|
'start_pos': 0,
|
||||||
|
'end_pos': breakPosition,
|
||||||
|
'depth': 0,
|
||||||
|
'key': None
|
||||||
|
}]
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _findKeyBefore(jsonContent: str, pos: int) -> Optional[str]:
|
def extractOverlapContext(jsonContent: str, breakPosition: int) -> str:
|
||||||
|
"""
|
||||||
|
Extract overlap context: the object containing the cut element.
|
||||||
|
|
||||||
|
Returns ONLY the object containing the cut element (the incomplete element itself).
|
||||||
|
This is what the continuation should start with for proper merging.
|
||||||
|
|
||||||
|
CRITICAL: Preserves trailing whitespace for proper merging.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
jsonContent: The incomplete JSON string
|
||||||
|
breakPosition: Position where JSON was cut
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
String with the object containing the cut element
|
||||||
|
"""
|
||||||
|
if not jsonContent or breakPosition <= 0:
|
||||||
|
return jsonContent[-200:].strip() if jsonContent else ""
|
||||||
|
|
||||||
|
# Extract cut piece (incomplete element) - this is the object containing the cut element
|
||||||
|
cutPiece = extractCutPiece(jsonContent, breakPosition)
|
||||||
|
|
||||||
|
# Return only the cut piece - the object containing the cut element
|
||||||
|
if cutPiece:
|
||||||
|
return cutPiece
|
||||||
|
|
||||||
|
# Fallback: show content before break
|
||||||
|
return jsonContent[max(0, breakPosition - 200):breakPosition].lstrip()
|
||||||
|
|
||||||
|
|
||||||
|
def findKeyBefore(jsonContent: str, pos: int) -> Optional[str]:
|
||||||
"""Find the key name before a structure start position."""
|
"""Find the key name before a structure start position."""
|
||||||
# Look backwards for "key": pattern
|
# Look backwards for "key": pattern
|
||||||
before = jsonContent[max(0, pos - 100):pos]
|
before = jsonContent[max(0, pos - 100):pos]
|
||||||
|
|
@ -1832,10 +2228,13 @@ def _extractLastCompleteArrayElementsWithContext(
|
||||||
break
|
break
|
||||||
|
|
||||||
if formattedElements:
|
if formattedElements:
|
||||||
# Format as JSON array rows
|
# Format as JSON array rows (without hardcoded indentation - caller will add it)
|
||||||
result = []
|
result = []
|
||||||
for elem in formattedElements:
|
for elem in formattedElements:
|
||||||
result.append(f" {elem},")
|
# Remove leading comma if present (from mid-element extraction)
|
||||||
|
cleanElem = elem.lstrip(',').strip()
|
||||||
|
if cleanElem:
|
||||||
|
result.append(f"{cleanElem},")
|
||||||
return "\n".join(result)
|
return "\n".join(result)
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
|
||||||
216
tests/test_overlap_context.py
Normal file
216
tests/test_overlap_context.py
Normal file
|
|
@ -0,0 +1,216 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""
|
||||||
|
Test function to verify structure hierarchy and overlap context generation.
|
||||||
|
Tests the functions used to generate continuation prompts for incomplete JSON.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def testOverlapContext():
|
||||||
|
"""
|
||||||
|
Test function that loads two JSON parts and returns:
|
||||||
|
1. Structure hierarchy result
|
||||||
|
2. Overlap requirement context result
|
||||||
|
"""
|
||||||
|
# Load the JSON file (incomplete/cut JSON)
|
||||||
|
basePath = Path(__file__).parent.parent.parent / "local" / "debug" / "prompts"
|
||||||
|
|
||||||
|
file1Path = basePath / "20260104-220716-032-chapter_2_section_section_2_response.txt"
|
||||||
|
|
||||||
|
# Read JSON (incomplete)
|
||||||
|
with open(file1Path, 'r', encoding='utf-8') as f:
|
||||||
|
json1Content = f.read().strip()
|
||||||
|
|
||||||
|
# Find the break position in json1 (where it was cut)
|
||||||
|
# The last line in json1 is incomplete: [37963, 37967, 37987, 37991, 37993, 37997, 38011, 38039
|
||||||
|
# We need to find where this incomplete array element ends (right after the last number)
|
||||||
|
# Find the last number in the file - that's where the content actually ends
|
||||||
|
import re
|
||||||
|
# Find all numbers at the end and get the position of the last one
|
||||||
|
# Look for the pattern: number followed by whitespace/newline or end of string
|
||||||
|
matches = list(re.finditer(r'\d+', json1Content))
|
||||||
|
if matches:
|
||||||
|
lastMatch = matches[-1]
|
||||||
|
# Break position is right after the last number (where the closing ] should be)
|
||||||
|
breakPosition = lastMatch.end()
|
||||||
|
else:
|
||||||
|
# Fallback: use end of file
|
||||||
|
breakPosition = len(json1Content.rstrip())
|
||||||
|
|
||||||
|
print(f"Break position determined: {breakPosition}")
|
||||||
|
print(f"Content at break position: '{json1Content[max(0, breakPosition-50):breakPosition+10]}'")
|
||||||
|
|
||||||
|
# Import the functions we need to test
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from modules.shared.jsonUtils import findStructureHierarchy, extractCutPiece, buildIncompleteContext
|
||||||
|
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
|
||||||
|
|
||||||
|
# Test 1: Find structure hierarchy
|
||||||
|
print("=" * 80)
|
||||||
|
print("TEST 1: Structure Hierarchy")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"Break position: {breakPosition}")
|
||||||
|
print(f"JSON length: {len(json1Content)}")
|
||||||
|
print(f"Content around break: '{json1Content[max(0, breakPosition-100):breakPosition+20]}'")
|
||||||
|
hierarchy = findStructureHierarchy(json1Content, breakPosition)
|
||||||
|
print(f"\nHierarchy levels found: {len(hierarchy) if hierarchy else 0}")
|
||||||
|
if not hierarchy:
|
||||||
|
print("WARNING: No hierarchy found! This suggests the function isn't working correctly.")
|
||||||
|
else:
|
||||||
|
print("\nHierarchy details (from root to cut level):")
|
||||||
|
for i, level in enumerate(hierarchy):
|
||||||
|
levelType = level['type']
|
||||||
|
levelKey = level.get('key', 'N/A')
|
||||||
|
levelDepth = level['depth']
|
||||||
|
levelStart = level['start_pos']
|
||||||
|
levelEnd = level['end_pos']
|
||||||
|
print(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}")
|
||||||
|
# Show a snippet of content at this level
|
||||||
|
if levelStart < len(json1Content):
|
||||||
|
snippet = json1Content[levelStart:min(levelStart + 50, levelEnd, len(json1Content))]
|
||||||
|
print(f" Content: {repr(snippet)}")
|
||||||
|
|
||||||
|
# Test 2: Extract cut piece
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("TEST 2: Extract Cut Piece")
|
||||||
|
print("=" * 80)
|
||||||
|
cutPiece = extractCutPiece(json1Content, breakPosition)
|
||||||
|
print(f"\nCut piece extracted (length: {len(cutPiece)}):")
|
||||||
|
if cutPiece:
|
||||||
|
print(cutPiece[:500] if len(cutPiece) > 500 else cutPiece)
|
||||||
|
else:
|
||||||
|
print("WARNING: Cut piece is empty! This suggests the function isn't working correctly.")
|
||||||
|
# Try to manually find the cut piece
|
||||||
|
# Look backwards from break position for the start of the incomplete array
|
||||||
|
i = breakPosition - 1
|
||||||
|
while i >= 0 and json1Content[i] not in ['[', ',', '\n']:
|
||||||
|
i -= 1
|
||||||
|
if i >= 0 and json1Content[i] == '[':
|
||||||
|
manualCutPiece = json1Content[i:breakPosition]
|
||||||
|
print(f"\nManually found cut piece: {manualCutPiece[:200]}")
|
||||||
|
|
||||||
|
# Test 3: Build incomplete context (structure hierarchy with cut point)
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("TEST 3: Build Incomplete Context (Structure Hierarchy with Cut Point)")
|
||||||
|
print("=" * 80)
|
||||||
|
print("Expected: Should show complete hierarchy from root to cut point")
|
||||||
|
print(" with complete elements before cut and cut piece marked")
|
||||||
|
incompleteContext = buildIncompleteContext(json1Content, breakPosition)
|
||||||
|
print(f"\nIncomplete context (length: {len(incompleteContext)} chars):")
|
||||||
|
print("-" * 80)
|
||||||
|
print(incompleteContext)
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
# Validate the output
|
||||||
|
if incompleteContext:
|
||||||
|
# Check if it shows hierarchy (should have multiple levels of indentation)
|
||||||
|
lines = incompleteContext.split('\n')
|
||||||
|
indentLevels = set()
|
||||||
|
for line in lines:
|
||||||
|
if line.strip():
|
||||||
|
indent = len(line) - len(line.lstrip())
|
||||||
|
indentLevels.add(indent)
|
||||||
|
print(f"\nValidation: Found {len(indentLevels)} different indent levels (should be > 1 for hierarchy)")
|
||||||
|
|
||||||
|
# Check if cut point is marked
|
||||||
|
if "<-- CUT POINT" in incompleteContext:
|
||||||
|
print("Validation: Cut point marker found ✓")
|
||||||
|
else:
|
||||||
|
print("Validation: WARNING - Cut point marker NOT found!")
|
||||||
|
|
||||||
|
# Check if root structure is shown
|
||||||
|
if incompleteContext.strip().startswith('{') or incompleteContext.strip().startswith('['):
|
||||||
|
print("Validation: Root structure opening found ✓")
|
||||||
|
else:
|
||||||
|
print("Validation: WARNING - Root structure opening NOT found!")
|
||||||
|
else:
|
||||||
|
print("WARNING: Incomplete context is empty!")
|
||||||
|
|
||||||
|
# Test 4: Extract overlap context (cut part and full part before same level)
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("TEST 4: Extract Overlap Context (Cut Part + Full Part Before Same Level)")
|
||||||
|
print("=" * 80)
|
||||||
|
overlapContext = CodeGenerationPath._extractOverlapContext(json1Content, breakPosition)
|
||||||
|
print(f"\nOverlap context:")
|
||||||
|
print(overlapContext)
|
||||||
|
|
||||||
|
# Return results as dictionary
|
||||||
|
results = {
|
||||||
|
"hierarchy": hierarchy,
|
||||||
|
"cutPiece": cutPiece,
|
||||||
|
"incompleteContext": incompleteContext,
|
||||||
|
"overlapContext": overlapContext,
|
||||||
|
"breakPosition": breakPosition,
|
||||||
|
"json1Length": len(json1Content),
|
||||||
|
"json1Content": json1Content
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Testing Overlap Context Generation")
|
||||||
|
print("=" * 80)
|
||||||
|
results = testOverlapContext()
|
||||||
|
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("SUMMARY")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"\nBreak position: {results['breakPosition']}")
|
||||||
|
print(f"JSON1 length: {results['json1Length']}")
|
||||||
|
print(f"Hierarchy levels: {len(results['hierarchy']) if results['hierarchy'] else 0}")
|
||||||
|
print(f"Cut piece length: {len(results['cutPiece'])}")
|
||||||
|
print(f"Incomplete context length: {len(results['incompleteContext'])}")
|
||||||
|
print(f"Overlap context length: {len(results['overlapContext'])}")
|
||||||
|
|
||||||
|
# Save results to file for inspection
|
||||||
|
outputPath = Path(__file__).parent.parent.parent / "local" / "debug" / "test_overlap_results.txt"
|
||||||
|
outputPath.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(outputPath, 'w', encoding='utf-8') as f:
|
||||||
|
f.write("=" * 80 + "\n")
|
||||||
|
f.write("OVERLAP CONTEXT TEST RESULTS\n")
|
||||||
|
f.write("=" * 80 + "\n\n")
|
||||||
|
|
||||||
|
f.write("FIRST JSON (CUT/INCOMPLETE):\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
f.write(f"Break position: {results['breakPosition']}\n")
|
||||||
|
f.write(f"JSON length: {results['json1Length']}\n")
|
||||||
|
json1Content = results['json1Content']
|
||||||
|
f.write(f"Content around break: '{json1Content[max(0, results['breakPosition']-100):results['breakPosition']+20]}'\n\n")
|
||||||
|
f.write("Full JSON1 content:\n")
|
||||||
|
f.write(json1Content)
|
||||||
|
|
||||||
|
f.write("\n\n" + "=" * 80 + "\n")
|
||||||
|
f.write("STRUCTURE HIERARCHY:\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
if results['hierarchy']:
|
||||||
|
f.write(f"Hierarchy levels found: {len(results['hierarchy'])}\n\n")
|
||||||
|
f.write("Hierarchy details (from root to cut level):\n")
|
||||||
|
for i, level in enumerate(results['hierarchy']):
|
||||||
|
levelType = level['type']
|
||||||
|
levelKey = level.get('key', 'N/A')
|
||||||
|
levelDepth = level['depth']
|
||||||
|
levelStart = level['start_pos']
|
||||||
|
levelEnd = level['end_pos']
|
||||||
|
f.write(f" Level {i}: {levelType:6s} depth={levelDepth} key='{levelKey}' start={levelStart} end={levelEnd}\n")
|
||||||
|
else:
|
||||||
|
f.write("No hierarchy found\n")
|
||||||
|
|
||||||
|
f.write("\n\n" + "=" * 80 + "\n")
|
||||||
|
f.write("INCOMPLETE CONTEXT (Structure Hierarchy with Cut Point):\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
f.write(results['incompleteContext'])
|
||||||
|
|
||||||
|
f.write("\n\n" + "=" * 80 + "\n")
|
||||||
|
f.write("OVERLAP CONTEXT (Object containing the cut element):\n")
|
||||||
|
f.write("-" * 80 + "\n")
|
||||||
|
f.write(results['overlapContext'])
|
||||||
|
|
||||||
|
print(f"\n\nFull results saved to: {outputPath}")
|
||||||
Loading…
Reference in a new issue