ai iteration loops running 21 of 22 test cases - fixing object tree extraction
This commit is contained in:
parent
24f152d0b9
commit
adbc29f069
7 changed files with 1071 additions and 127 deletions
|
|
@ -0,0 +1,72 @@
|
||||||
|
Message 0 (user)
|
||||||
|
Length: 2015 chars
|
||||||
|
================================================================================
|
||||||
|
User request: "Generate the first 1000 prime numbers."
|
||||||
|
|
||||||
|
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
|
||||||
|
|
||||||
|
JSON structure template (reference only - shows the pattern):
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "ai_generation"
|
||||||
|
},
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"id": "doc_1",
|
||||||
|
"title": "Generated Document",
|
||||||
|
"filename": "document.json",
|
||||||
|
"sections": [
|
||||||
|
{
|
||||||
|
"id": "section_heading_example",
|
||||||
|
"content_type": "heading",
|
||||||
|
"elements": [
|
||||||
|
{"level": 1, "text": "Heading Text"}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_paragraph_example",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [
|
||||||
|
{"text": "Paragraph text content"}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_list_example",
|
||||||
|
"content_type": "list",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{"text": "Item 1"},
|
||||||
|
{"text": "Item 2"}
|
||||||
|
],
|
||||||
|
"list_type": "numbered"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_table_example",
|
||||||
|
"content_type": "table",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"headers": ["Column 1", "Column 2"],
|
||||||
|
"rows": [
|
||||||
|
["Row 1 Col 1", "Row 1 Col 2"],
|
||||||
|
["Row 2 Col 1", "Row 2 Col 2"]
|
||||||
|
],
|
||||||
|
"caption": "Table caption"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_code_example",
|
||||||
|
"content_type": "code",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"code... [truncated]
|
||||||
|
================================================================================
|
||||||
|
|
@ -0,0 +1,200 @@
|
||||||
|
API Response
|
||||||
|
Length: 6082 chars
|
||||||
|
================================================================================
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "ai_generation"
|
||||||
|
},
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"id": "doc_1",
|
||||||
|
"title": "List of First 1000 Prime Numbers",
|
||||||
|
"filename": "prime_numbers.json",
|
||||||
|
"sections": [
|
||||||
|
{
|
||||||
|
"id": "section_prime_numbers",
|
||||||
|
"content_type": "list",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{"number": 2},
|
||||||
|
{"number": 3},
|
||||||
|
{"number": 5},
|
||||||
|
{"number": 7},
|
||||||
|
{"number": 11},
|
||||||
|
{"number": 13},
|
||||||
|
{"number": 17},
|
||||||
|
{"number": 19},
|
||||||
|
{"number": 23},
|
||||||
|
{"number": 29},
|
||||||
|
{"number": 31},
|
||||||
|
{"number": 37},
|
||||||
|
{"number": 41},
|
||||||
|
{"number": 43},
|
||||||
|
{"number": 47},
|
||||||
|
{"number": 53},
|
||||||
|
{"number": 59},
|
||||||
|
{"number": 61},
|
||||||
|
{"number": 67},
|
||||||
|
{"number": 71},
|
||||||
|
{"number": 73},
|
||||||
|
{"number": 79},
|
||||||
|
{"number": 83},
|
||||||
|
{"number": 89},
|
||||||
|
{"number": 97},
|
||||||
|
{"number": 101},
|
||||||
|
{"number": 103},
|
||||||
|
{"number": 107},
|
||||||
|
{"number": 109},
|
||||||
|
{"number": 113},
|
||||||
|
{"number": 127},
|
||||||
|
{"number": 131},
|
||||||
|
{"number": 137},
|
||||||
|
{"number": 139},
|
||||||
|
{"number": 149},
|
||||||
|
{"number": 151},
|
||||||
|
{"number": 157},
|
||||||
|
{"number": 163},
|
||||||
|
{"number": 167},
|
||||||
|
{"number": 173},
|
||||||
|
{"number": 179},
|
||||||
|
{"number": 181},
|
||||||
|
{"number": 191},
|
||||||
|
{"number": 193},
|
||||||
|
{"number": 197},
|
||||||
|
{"number": 199},
|
||||||
|
{"number": 211},
|
||||||
|
{"number": 223},
|
||||||
|
{"number": 227},
|
||||||
|
{"number": 229},
|
||||||
|
{"number": 233},
|
||||||
|
{"number": 239},
|
||||||
|
{"number": 241},
|
||||||
|
{"number": 251},
|
||||||
|
{"number": 257},
|
||||||
|
{"number": 263},
|
||||||
|
{"number": 269},
|
||||||
|
{"number": 271},
|
||||||
|
{"number": 277},
|
||||||
|
{"number": 281},
|
||||||
|
{"number": 283},
|
||||||
|
{"number": 293},
|
||||||
|
{"number": 307},
|
||||||
|
{"number": 311},
|
||||||
|
{"number": 313},
|
||||||
|
{"number": 317},
|
||||||
|
{"number": 331},
|
||||||
|
{"number": 337},
|
||||||
|
{"number": 347},
|
||||||
|
{"number": 349},
|
||||||
|
{"number": 353},
|
||||||
|
{"number": 359},
|
||||||
|
{"number": 367},
|
||||||
|
{"number": 373},
|
||||||
|
{"number": 379},
|
||||||
|
{"number": 383},
|
||||||
|
{"number": 389},
|
||||||
|
{"number": 397},
|
||||||
|
{"number": 401},
|
||||||
|
{"number": 409},
|
||||||
|
{"number": 419},
|
||||||
|
{"number": 421},
|
||||||
|
{"number": 431},
|
||||||
|
{"number": 433},
|
||||||
|
{"number": 439},
|
||||||
|
{"number": 443},
|
||||||
|
{"number": 449},
|
||||||
|
{"number": 457},
|
||||||
|
{"number": 461},
|
||||||
|
{"number": 463},
|
||||||
|
{"number": 467},
|
||||||
|
{"number": 479},
|
||||||
|
{"number": 487},
|
||||||
|
{"number": 491},
|
||||||
|
{"number": 499},
|
||||||
|
{"number": 503},
|
||||||
|
{"number": 509},
|
||||||
|
{"number": 521},
|
||||||
|
{"number": 523},
|
||||||
|
{"number": 541},
|
||||||
|
{"number": 547},
|
||||||
|
{"number": 557},
|
||||||
|
{"number": 563},
|
||||||
|
{"number": 569},
|
||||||
|
{"number": 571},
|
||||||
|
{"number": 577},
|
||||||
|
{"number": 587},
|
||||||
|
{"number": 593},
|
||||||
|
{"number": 599},
|
||||||
|
{"number": 601},
|
||||||
|
{"number": 607},
|
||||||
|
{"number": 613},
|
||||||
|
{"number": 617},
|
||||||
|
{"number": 619},
|
||||||
|
{"number": 631},
|
||||||
|
{"number": 641},
|
||||||
|
{"number": 643},
|
||||||
|
{"number": 647},
|
||||||
|
{"number": 653},
|
||||||
|
{"number": 659},
|
||||||
|
{"number": 661},
|
||||||
|
{"number": 673},
|
||||||
|
{"number": 677},
|
||||||
|
{"number": 683},
|
||||||
|
{"number": 691},
|
||||||
|
{"number": 701},
|
||||||
|
{"number": 709},
|
||||||
|
{"number": 719},
|
||||||
|
{"number": 727},
|
||||||
|
{"number": 733},
|
||||||
|
{"number": 739},
|
||||||
|
{"number": 743},
|
||||||
|
{"number": 751},
|
||||||
|
{"number": 757},
|
||||||
|
{"number": 761},
|
||||||
|
{"number": 769},
|
||||||
|
{"number": 773},
|
||||||
|
{"number": 787},
|
||||||
|
{"number": 797},
|
||||||
|
{"number": 809},
|
||||||
|
{"number": 811},
|
||||||
|
{"number": 821},
|
||||||
|
{"number": 823},
|
||||||
|
{"number": 827},
|
||||||
|
{"number": 829},
|
||||||
|
{"number": 839},
|
||||||
|
{"number": 853},
|
||||||
|
{"number": 857},
|
||||||
|
{"number": 859},
|
||||||
|
{"number": 863},
|
||||||
|
{"number": 877},
|
||||||
|
{"number": 881},
|
||||||
|
{"number": 883},
|
||||||
|
{"number": 887},
|
||||||
|
{"number": 907},
|
||||||
|
{"number": 911},
|
||||||
|
{"number": 919},
|
||||||
|
{"number": 929},
|
||||||
|
{"number": 937},
|
||||||
|
{"number": 941},
|
||||||
|
{"number": 947},
|
||||||
|
{"number": 953},
|
||||||
|
{"number": 967},
|
||||||
|
{"number": 971},
|
||||||
|
{"number": 977},
|
||||||
|
{"number": 983},
|
||||||
|
{"number": 991},
|
||||||
|
{"number": 997}
|
||||||
|
],
|
||||||
|
"list_type": "numbered"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
================================================================================
|
||||||
|
|
@ -92,18 +92,6 @@ class AiObjects:
|
||||||
|
|
||||||
# Input bytes will be calculated inside _callWithModel
|
# Input bytes will be calculated inside _callWithModel
|
||||||
|
|
||||||
# Compress optionally (prompt/context) - simple truncation fallback kept here
|
|
||||||
def _maybeTruncate(text: str, limit: int) -> str:
|
|
||||||
data = text.encode("utf-8")
|
|
||||||
if len(data) <= limit:
|
|
||||||
return text
|
|
||||||
return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
|
|
||||||
|
|
||||||
if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
|
|
||||||
prompt = _maybeTruncate(prompt, 2000)
|
|
||||||
if options.compressContext and len(context.encode("utf-8")) > 70000:
|
|
||||||
context = _maybeTruncate(context, 70000)
|
|
||||||
|
|
||||||
# Generation parameters are handled inside _callWithModel
|
# Generation parameters are handled inside _callWithModel
|
||||||
|
|
||||||
# Get failover models for this operation type
|
# Get failover models for this operation type
|
||||||
|
|
@ -129,7 +117,7 @@ class AiObjects:
|
||||||
try:
|
try:
|
||||||
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||||
|
|
||||||
# Call the model
|
# Call the model directly - no truncation or compression here
|
||||||
response = await self._callWithModel(model, prompt, context, options)
|
response = await self._callWithModel(model, prompt, context, options)
|
||||||
|
|
||||||
logger.info(f"✅ AI call successful with model: {model.name}")
|
logger.info(f"✅ AI call successful with model: {model.name}")
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,9 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
options: AiCallOptions,
|
options: AiCallOptions,
|
||||||
debugPrefix: str = "ai_call"
|
debugPrefix: str = "ai_call",
|
||||||
|
promptBuilder: Optional[callable] = None,
|
||||||
|
promptArgs: Optional[Dict[str, Any]] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Shared core function for AI calls with repair-based looping system.
|
Shared core function for AI calls with repair-based looping system.
|
||||||
|
|
@ -141,6 +143,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
max_iterations = 50 # Prevent infinite loops
|
max_iterations = 50 # Prevent infinite loops
|
||||||
iteration = 0
|
iteration = 0
|
||||||
allSections = [] # Accumulate all sections across iterations
|
allSections = [] # Accumulate all sections across iterations
|
||||||
|
lastRawResponse = None # Store last raw JSON response for continuation
|
||||||
|
|
||||||
logger.debug(f"Starting AI call with repair-based looping (debug prefix: {debugPrefix})")
|
logger.debug(f"Starting AI call with repair-based looping (debug prefix: {debugPrefix})")
|
||||||
|
|
||||||
|
|
@ -149,14 +152,18 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
logger.debug(f"AI call iteration {iteration}/{max_iterations}")
|
logger.debug(f"AI call iteration {iteration}/{max_iterations}")
|
||||||
|
|
||||||
# Build iteration prompt
|
# Build iteration prompt
|
||||||
if len(allSections) > 0:
|
if len(allSections) > 0 and promptBuilder and promptArgs:
|
||||||
# This is a continuation - build continuation context
|
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
||||||
continuationContext = buildContinuationContext(allSections)
|
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
||||||
logger.info(f"Continuation context: {continuationContext.get('section_count')} sections, next order: {continuationContext.get('next_order')}")
|
logger.info(f"Continuation context: {continuationContext.get('section_count')} sections")
|
||||||
|
if lastRawResponse:
|
||||||
|
logger.debug(f"Iteration {iteration}: Including previous response in continuation context ({len(lastRawResponse)} chars)")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
||||||
|
|
||||||
# If prompt contains a placeholder for continuation, inject the context
|
# Rebuild prompt with continuation context using the provided prompt builder
|
||||||
# For now, we'll handle this at the calling code level
|
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
||||||
iterationPrompt = prompt
|
logger.debug(f"Rebuilt prompt with continuation context for iteration {iteration}")
|
||||||
else:
|
else:
|
||||||
# First iteration - use original prompt
|
# First iteration - use original prompt
|
||||||
iterationPrompt = prompt
|
iterationPrompt = prompt
|
||||||
|
|
@ -179,6 +186,13 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
response = await self.aiObjects.call(request)
|
response = await self.aiObjects.call(request)
|
||||||
result = response.content
|
result = response.content
|
||||||
|
|
||||||
|
# Debug: Check response immediately from API
|
||||||
|
if iteration == 1 and result:
|
||||||
|
first_chars = result[:200].replace('\n', '\\n').replace('\r', '\\r')
|
||||||
|
logger.debug(f"Iteration 1: Raw API response starts with (first 200 chars): '{first_chars}'")
|
||||||
|
if result.strip().startswith('},') or result.strip().startswith('],'):
|
||||||
|
logger.error(f"Iteration 1: API returned fragment! Full start: '{result[:200]}'")
|
||||||
|
|
||||||
# Write raw AI response to debug file
|
# Write raw AI response to debug file
|
||||||
if iteration == 1:
|
if iteration == 1:
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||||
|
|
@ -196,10 +210,23 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Store raw response for continuation (even if broken)
|
||||||
|
lastRawResponse = result
|
||||||
|
|
||||||
|
# Check for complete_response flag in raw response (before parsing)
|
||||||
|
import re
|
||||||
|
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
|
||||||
|
logger.info(f"Iteration {iteration}: Detected complete_response flag in raw response")
|
||||||
|
|
||||||
# Extract sections from response (handles both valid and broken JSON)
|
# Extract sections from response (handles both valid and broken JSON)
|
||||||
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
||||||
|
|
||||||
if not extractedSections:
|
if not extractedSections:
|
||||||
|
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
|
||||||
|
if iteration > 1 and not wasJsonComplete:
|
||||||
|
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
|
||||||
|
continue
|
||||||
|
# Otherwise, stop if no sections
|
||||||
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
@ -208,7 +235,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
logger.info(f"Iteration {iteration}: Extracted {len(extractedSections)} sections (total: {len(allSections)})")
|
logger.info(f"Iteration {iteration}: Extracted {len(extractedSections)} sections (total: {len(allSections)})")
|
||||||
|
|
||||||
# Check if we should continue (completion detection)
|
# Check if we should continue (completion detection)
|
||||||
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete):
|
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
|
||||||
logger.debug(f"Iteration {iteration}: Continuing generation")
|
logger.debug(f"Iteration {iteration}: Continuing generation")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
|
@ -241,6 +268,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
"""
|
"""
|
||||||
Extract sections from AI response, handling both valid and broken JSON.
|
Extract sections from AI response, handling both valid and broken JSON.
|
||||||
Uses repair mechanism for broken JSON.
|
Uses repair mechanism for broken JSON.
|
||||||
|
Checks for "complete_response": true flag to determine completion.
|
||||||
Returns (sections, wasJsonComplete)
|
Returns (sections, wasJsonComplete)
|
||||||
"""
|
"""
|
||||||
# First, try to parse as valid JSON
|
# First, try to parse as valid JSON
|
||||||
|
|
@ -248,14 +276,35 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
extracted = extractJsonString(result)
|
extracted = extractJsonString(result)
|
||||||
parsed_result = json.loads(extracted)
|
parsed_result = json.loads(extracted)
|
||||||
|
|
||||||
|
# Check if AI marked response as complete
|
||||||
|
isComplete = parsed_result.get("complete_response", False) == True
|
||||||
|
if isComplete:
|
||||||
|
logger.info(f"Iteration {iteration}: AI marked response as complete (complete_response: true)")
|
||||||
|
|
||||||
# Extract sections from parsed JSON
|
# Extract sections from parsed JSON
|
||||||
sections = extractSectionsFromDocument(parsed_result)
|
sections = extractSectionsFromDocument(parsed_result)
|
||||||
logger.debug(f"Iteration {iteration}: Valid JSON - extracted {len(sections)} sections")
|
logger.debug(f"Iteration {iteration}: Valid JSON - extracted {len(sections)} sections")
|
||||||
return sections, True # JSON was complete
|
|
||||||
|
# If AI marked as complete, always return as complete
|
||||||
|
if isComplete:
|
||||||
|
return sections, True
|
||||||
|
|
||||||
|
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
|
||||||
|
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
|
||||||
|
# Don't use repair mechanism - just mark as incomplete so loop continues
|
||||||
|
if len(sections) == 0 and iteration > 1:
|
||||||
|
logger.info(f"Iteration {iteration}: Continuation fragment with 0 extractable sections - JSON incomplete, continuing")
|
||||||
|
return sections, False # Mark as incomplete so loop continues
|
||||||
|
|
||||||
|
# First iteration with 0 sections means empty response - stop
|
||||||
|
if len(sections) == 0:
|
||||||
|
return sections, True # Complete but empty
|
||||||
|
|
||||||
|
return sections, True # JSON was complete with sections
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
# Broken JSON - try repair mechanism
|
# Broken JSON - try repair mechanism (normal in iterative generation)
|
||||||
logger.warning(f"Iteration {iteration}: Invalid JSON, attempting repair: {str(e)}")
|
logger.info(f"Iteration {iteration}: JSON incomplete/broken, attempting repair: {str(e)}")
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
||||||
|
|
||||||
# Try to repair
|
# Try to repair
|
||||||
|
|
@ -279,16 +328,25 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
self,
|
self,
|
||||||
allSections: List[Dict[str, Any]],
|
allSections: List[Dict[str, Any]],
|
||||||
iteration: int,
|
iteration: int,
|
||||||
wasJsonComplete: bool
|
wasJsonComplete: bool,
|
||||||
|
rawResponse: str = None
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
Determine if generation should continue based on JSON completeness.
|
Determine if generation should continue based on JSON completeness and complete_response flag.
|
||||||
Returns True if we should continue, False if done.
|
Returns True if we should continue, False if done.
|
||||||
"""
|
"""
|
||||||
if len(allSections) == 0:
|
if len(allSections) == 0:
|
||||||
return True # No sections yet, continue
|
return True # No sections yet, continue
|
||||||
|
|
||||||
# Simple rule: if JSON was complete, we're done
|
# Check for complete_response flag in raw response
|
||||||
|
if rawResponse:
|
||||||
|
import re
|
||||||
|
# Look for complete_response: true pattern (allowing for whitespace variations)
|
||||||
|
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
|
||||||
|
logger.info("AI marked response as complete (complete_response: true) - stopping generation")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If JSON was complete (and no complete_response flag), we're done
|
||||||
# If JSON was broken and repaired, continue to get more content
|
# If JSON was broken and repaired, continue to get more content
|
||||||
if wasJsonComplete:
|
if wasJsonComplete:
|
||||||
logger.info("JSON was complete - stopping generation")
|
logger.info("JSON was complete - stopping generation")
|
||||||
|
|
@ -398,6 +456,15 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Using provided options: operationType={options.operationType}, priority={options.priority}")
|
logger.debug(f"Using provided options: operationType={options.operationType}, priority={options.priority}")
|
||||||
|
|
||||||
|
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
||||||
|
# Compressing would truncate the template structure and confuse the AI
|
||||||
|
if outputFormat: # Document generation with structured output
|
||||||
|
if not options:
|
||||||
|
options = AiCallOptions()
|
||||||
|
options.compressPrompt = False # JSON templates must NOT be truncated
|
||||||
|
options.compressContext = False # Context also should not be compressed
|
||||||
|
logger.debug("Document generation detected - disabled prompt/context compression")
|
||||||
|
|
||||||
# Handle document generation with specific output format using unified approach
|
# Handle document generation with specific output format using unified approach
|
||||||
if outputFormat:
|
if outputFormat:
|
||||||
# Use unified generation method for all document generation
|
# Use unified generation method for all document generation
|
||||||
|
|
@ -411,7 +478,22 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||||
# First call without continuation context
|
# First call without continuation context
|
||||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
||||||
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation")
|
|
||||||
|
# Prepare prompt builder arguments for continuation
|
||||||
|
promptArgs = {
|
||||||
|
"outputFormat": outputFormat,
|
||||||
|
"userPrompt": prompt,
|
||||||
|
"title": title,
|
||||||
|
"extracted_content": extracted_content
|
||||||
|
}
|
||||||
|
|
||||||
|
generated_json = await self._callAiWithLooping(
|
||||||
|
generation_prompt,
|
||||||
|
options,
|
||||||
|
"document_generation",
|
||||||
|
buildGenerationPrompt,
|
||||||
|
promptArgs
|
||||||
|
)
|
||||||
|
|
||||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from typing import Dict, Any
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Centralized JSON structure template for document generation
|
# Centralized JSON structure template for document generation
|
||||||
|
# Includes examples for all content types so AI knows the structure patterns
|
||||||
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"split_strategy": "single_document",
|
"split_strategy": "single_document",
|
||||||
|
|
@ -22,16 +23,60 @@ TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
||||||
"filename": "document.json",
|
"filename": "document.json",
|
||||||
"sections": [
|
"sections": [
|
||||||
{
|
{
|
||||||
"id": "section_1",
|
"id": "section_heading_example",
|
||||||
"content_type": "heading|paragraph|table|list|code",
|
"content_type": "heading",
|
||||||
"elements": [
|
"elements": [
|
||||||
// heading: {"level": 1, "text": "..."}
|
{"level": 1, "text": "Heading Text"}
|
||||||
// paragraph: {"text": "..."}
|
|
||||||
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
|
||||||
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
|
||||||
// code: {"code": "...", "language": "..."}
|
|
||||||
],
|
],
|
||||||
"order": 1
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_paragraph_example",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [
|
||||||
|
{"text": "Paragraph text content"}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_list_example",
|
||||||
|
"content_type": "list",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{"text": "Item 1"},
|
||||||
|
{"text": "Item 2"}
|
||||||
|
],
|
||||||
|
"list_type": "numbered"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_table_example",
|
||||||
|
"content_type": "table",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"headers": ["Column 1", "Column 2"],
|
||||||
|
"rows": [
|
||||||
|
["Row 1 Col 1", "Row 1 Col 2"],
|
||||||
|
["Row 2 Col 1", "Row 2 Col 2"]
|
||||||
|
],
|
||||||
|
"caption": "Table caption"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_code_example",
|
||||||
|
"content_type": "code",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"code": "function example() { return true; }",
|
||||||
|
"language": "javascript"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -48,10 +93,10 @@ async def buildGenerationPrompt(
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Build the unified generation prompt using a single JSON template.
|
Build the unified generation prompt using a single JSON template.
|
||||||
Simplified version without continuation logic in prompt.
|
Generic solution that works for any user request.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
outputFormat: Target output format (html, pdf, docx, etc.)
|
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
|
||||||
userPrompt: User's original prompt for document generation
|
userPrompt: User's original prompt for document generation
|
||||||
title: Title for the document
|
title: Title for the document
|
||||||
extracted_content: Optional extracted content from documents to prepend to prompt
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
||||||
|
|
@ -64,63 +109,88 @@ async def buildGenerationPrompt(
|
||||||
title_value = title if title else "Generated Document"
|
title_value = title if title else "Generated Document"
|
||||||
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
||||||
|
|
||||||
# Check if this is a continuation request
|
# Build prompt based on whether this is a continuation or first call
|
||||||
if continuationContext and continuationContext.get("section_count", 0) > 0:
|
# Check if we have valid continuation context with actual JSON fragment
|
||||||
# Continuation prompt - simple and focused
|
has_continuation = (
|
||||||
section_count = continuationContext.get("section_count", 0)
|
continuationContext
|
||||||
next_order = continuationContext.get("next_order", 1)
|
and continuationContext.get("section_count", 0) > 0
|
||||||
last_content_sample = continuationContext.get("last_content_sample", "")
|
and continuationContext.get("last_raw_json", "")
|
||||||
|
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if has_continuation:
|
||||||
|
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
|
||||||
|
last_raw_json = continuationContext.get("last_raw_json", "")
|
||||||
|
last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
|
||||||
|
last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
|
||||||
|
total_items_count = continuationContext.get("total_items_count", 0)
|
||||||
|
|
||||||
generation_prompt = f"""Continue generating structured JSON content.
|
# Show the last few items to indicate where to continue (limit fragment size)
|
||||||
|
# Extract just the ending portion of the JSON to show where it cut off
|
||||||
|
fragment_snippet = ""
|
||||||
|
if last_raw_json:
|
||||||
|
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
|
||||||
|
fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
|
||||||
|
# Add ellipsis if truncated
|
||||||
|
if len(last_raw_json) > 1500:
|
||||||
|
fragment_snippet = "..." + fragment_snippet
|
||||||
|
|
||||||
|
# Build clear continuation guidance
|
||||||
|
continuation_guidance = []
|
||||||
|
|
||||||
|
if total_items_count > 0:
|
||||||
|
continuation_guidance.append(f"You have already generated {total_items_count} items.")
|
||||||
|
|
||||||
|
# Show the last complete item object (full object format)
|
||||||
|
if last_item_object:
|
||||||
|
continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
|
||||||
|
|
||||||
|
continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
|
||||||
|
|
||||||
|
generation_prompt = f"""User request: "{userPrompt}"
|
||||||
|
|
||||||
ORIGINAL REQUEST: "{userPrompt}"
|
The user already received part of the response. Continue generating the remaining content.
|
||||||
TARGET FORMAT: {outputFormat}
|
|
||||||
TITLE: "{title_value}"
|
|
||||||
|
|
||||||
CONTEXT - Already generated:
|
{continuation_text}
|
||||||
- Total sections generated: {section_count}
|
|
||||||
- Next section order: {next_order}
|
|
||||||
- Last content: {last_content_sample}
|
|
||||||
|
|
||||||
YOUR TASK:
|
Previous response ended here (JSON was cut off at this point):
|
||||||
Continue where previous generation stopped.
|
```json
|
||||||
Generate the NEXT section(s) starting with section_{next_order}.
|
{fragment_snippet if fragment_snippet else "(No fragment available)"}
|
||||||
Generate as much content as possible.
|
```
|
||||||
|
|
||||||
RULES:
|
JSON structure template:
|
||||||
- Follow the JSON template structure below exactly
|
|
||||||
- Fill sections with ACTUAL data based on the user request
|
|
||||||
- Use appropriate content_type for the data
|
|
||||||
- Generate REAL content, not summaries or placeholders
|
|
||||||
- Generate multiple sections if possible
|
|
||||||
|
|
||||||
Return raw JSON (no ```json blocks, no text before/after)
|
|
||||||
|
|
||||||
JSON Template
|
|
||||||
{json_template}
|
{json_template}
|
||||||
|
|
||||||
|
Instructions:
|
||||||
|
- Return full JSON structure (metadata + documents + sections)
|
||||||
|
- Continue from where it stopped - add NEW items only, do not repeat old items
|
||||||
|
- Use the element structures shown in the template
|
||||||
|
- Generate all remaining content needed to complete the user request
|
||||||
|
- Fill with actual content (no comments, no "Add more..." text, no placeholders)
|
||||||
|
- When fully complete, add "complete_response": true at root level
|
||||||
|
- Return only valid JSON (no comments, no markdown blocks)
|
||||||
|
|
||||||
|
Continue generating:
|
||||||
"""
|
"""
|
||||||
else:
|
else:
|
||||||
# First call - simple prompt without continuation complexity
|
# FIRST CALL - initial generation
|
||||||
generation_prompt = f"""Generate structured JSON content for document creation.
|
generation_prompt = f"""User request: "{userPrompt}"
|
||||||
|
|
||||||
USER REQUEST: "{userPrompt}"
|
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
|
||||||
TARGET FORMAT: {outputFormat}
|
|
||||||
TITLE: "{title_value}"
|
|
||||||
|
|
||||||
INSTRUCTIONS:
|
JSON structure template (reference only - shows the pattern):
|
||||||
- Follow the JSON template structure below exactly
|
|
||||||
- Emit only one JSON object in the response
|
|
||||||
- Fill sections with ACTUAL data based on the user request
|
|
||||||
- Use appropriate content_type for each section
|
|
||||||
- Generate REAL content, not summaries or instructions
|
|
||||||
- Structure content in sections with order 1, 2, 3...
|
|
||||||
- Each section should be complete before next
|
|
||||||
- Generate as much content as possible
|
|
||||||
|
|
||||||
Return raw JSON (no ```json blocks, no text before/after)
|
|
||||||
|
|
||||||
JSON Template
|
|
||||||
{json_template}
|
{json_template}
|
||||||
|
|
||||||
|
Instructions:
|
||||||
|
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
|
||||||
|
- Do NOT continue from the template examples above - create your own sections
|
||||||
|
- Generate content based on the user request
|
||||||
|
- Use the element structures shown in the template (heading, paragraph, list, table, code)
|
||||||
|
- Create your own section IDs (do not use the example IDs like "section_heading_example")
|
||||||
|
- When fully complete, add "complete_response": true at root level
|
||||||
|
- Return only valid JSON (no comments, no markdown blocks, no text before/after)
|
||||||
|
|
||||||
|
Generate your complete response starting from {{"metadata": ...}}:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# If we have extracted content, prepend it to the prompt
|
# If we have extracted content, prepend it to the prompt
|
||||||
|
|
|
||||||
|
|
@ -138,42 +138,73 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
||||||
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Attempt to repair broken JSON using multiple strategies.
|
Attempt to repair broken JSON using multiple strategies.
|
||||||
|
Generic solution that works for any content type.
|
||||||
Returns the best repair attempt or None if all fail.
|
Returns the best repair attempt or None if all fail.
|
||||||
"""
|
"""
|
||||||
if not text:
|
if not text:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Strategy 1: Progressive parsing - try to find longest valid prefix
|
# Strategy 1: Try to extract sections from the entire text first
|
||||||
|
# This handles cases where the JSON structure is broken but content is intact
|
||||||
|
extracted_sections = _extractSectionsRegex(text)
|
||||||
|
if extracted_sections:
|
||||||
|
logger.info(f"Extracted {len(extracted_sections)} sections using regex")
|
||||||
|
return {
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "ai_generation"
|
||||||
|
},
|
||||||
|
"documents": [{"sections": extracted_sections}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Strategy 2: Progressive parsing - try to find longest valid prefix
|
||||||
best_result = None
|
best_result = None
|
||||||
best_valid_length = 0
|
best_valid_length = 0
|
||||||
|
|
||||||
for i in range(len(text), 0, -1):
|
# Try different step sizes to find the best valid JSON
|
||||||
test_str = text[:i]
|
for step_size in [100, 50, 10, 1]:
|
||||||
closed_str = _closeJsonStructures(test_str)
|
for i in range(len(text), 0, -step_size):
|
||||||
obj, err, _ = tryParseJson(closed_str)
|
test_str = text[:i]
|
||||||
if err is None and isinstance(obj, dict):
|
closed_str = _closeJsonStructures(test_str)
|
||||||
best_result = obj
|
obj, err, _ = tryParseJson(closed_str)
|
||||||
best_valid_length = i
|
if err is None and isinstance(obj, dict):
|
||||||
logger.debug(f"Progressive parsing success at length {i}")
|
best_result = obj
|
||||||
|
best_valid_length = i
|
||||||
|
logger.debug(f"Progressive parsing success at length {i} (step: {step_size})")
|
||||||
|
break
|
||||||
|
if best_result:
|
||||||
break
|
break
|
||||||
|
|
||||||
if best_result:
|
if best_result:
|
||||||
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
|
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
|
||||||
return best_result
|
|
||||||
|
# Check if we have sections in the result
|
||||||
|
sections = extractSectionsFromDocument(best_result)
|
||||||
|
if sections:
|
||||||
|
logger.info(f"Progressive parsing found {len(sections)} sections")
|
||||||
|
return best_result
|
||||||
|
else:
|
||||||
|
# No sections found in progressive parsing, try to extract from broken part
|
||||||
|
logger.info("Progressive parsing found no sections, trying to extract from broken part")
|
||||||
|
extracted_sections = _extractSectionsRegex(text[best_valid_length:])
|
||||||
|
if extracted_sections:
|
||||||
|
logger.info(f"Extracted {len(extracted_sections)} sections from broken part")
|
||||||
|
# Merge with the valid part
|
||||||
|
if "documents" not in best_result:
|
||||||
|
best_result["documents"] = []
|
||||||
|
if not best_result["documents"]:
|
||||||
|
best_result["documents"] = [{"sections": []}]
|
||||||
|
best_result["documents"][0]["sections"].extend(extracted_sections)
|
||||||
|
return best_result
|
||||||
|
|
||||||
# Strategy 2: Structure closing - close incomplete structures
|
# Strategy 3: Structure closing - close incomplete structures
|
||||||
closed_str = _closeJsonStructures(text)
|
closed_str = _closeJsonStructures(text)
|
||||||
obj, err, _ = tryParseJson(closed_str)
|
obj, err, _ = tryParseJson(closed_str)
|
||||||
if err is None and isinstance(obj, dict):
|
if err is None and isinstance(obj, dict):
|
||||||
logger.info("Repaired JSON using structure closing")
|
logger.info("Repaired JSON using structure closing")
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
# Strategy 3: Regex extraction (fallback for completely broken JSON)
|
|
||||||
extracted = _extractSectionsRegex(text)
|
|
||||||
if extracted:
|
|
||||||
logger.info("Repaired JSON using regex extraction")
|
|
||||||
return {"documents": [{"sections": extracted}]}
|
|
||||||
|
|
||||||
logger.warning("All repair strategies failed")
|
logger.warning("All repair strategies failed")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -204,7 +235,7 @@ def _closeJsonStructures(text: str) -> str:
|
||||||
def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Extract sections from broken JSON using regex patterns.
|
Extract sections from broken JSON using regex patterns.
|
||||||
Fallback strategy when JSON is completely corrupted.
|
Generic solution that works for any content type.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
@ -218,10 +249,10 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||||
content_type = match.group(2)
|
content_type = match.group(2)
|
||||||
order = int(match.group(3))
|
order = int(match.group(3))
|
||||||
|
|
||||||
# Try to extract elements array
|
# Try to extract elements array - look for the elements array after this section
|
||||||
elements_match = re.search(
|
elements_match = re.search(
|
||||||
r'"elements"\s*:\s*\[(.*?)\]',
|
r'"elements"\s*:\s*\[(.*?)\]',
|
||||||
text[match.end():match.end()+500] # Look ahead for elements
|
text[match.end():match.end()+5000] # Look ahead for elements (large range)
|
||||||
)
|
)
|
||||||
|
|
||||||
elements = []
|
elements = []
|
||||||
|
|
@ -230,7 +261,9 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||||
elements_str = '[' + elements_match.group(1) + ']'
|
elements_str = '[' + elements_match.group(1) + ']'
|
||||||
elements = json.loads(elements_str)
|
elements = json.loads(elements_str)
|
||||||
except:
|
except:
|
||||||
pass
|
# If JSON parsing fails, try to extract individual items manually
|
||||||
|
elements_text = elements_match.group(1)
|
||||||
|
elements = _extractElementsFromText(elements_text, content_type)
|
||||||
|
|
||||||
sections.append({
|
sections.append({
|
||||||
"id": section_id,
|
"id": section_id,
|
||||||
|
|
@ -239,6 +272,243 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||||
"order": order
|
"order": order
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# If no sections found with the main pattern, try to find any content patterns
|
||||||
|
if not sections:
|
||||||
|
sections = _extractGenericContent(text)
|
||||||
|
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Extract elements from text when JSON parsing fails.
|
||||||
|
Generic approach that works for any content type.
|
||||||
|
Handles incomplete strings and corrupted data.
|
||||||
|
Excludes the last incomplete item to prevent corrupted data.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
elements = []
|
||||||
|
|
||||||
|
if content_type == "list":
|
||||||
|
# Look for {"text": "..."} patterns, including incomplete ones
|
||||||
|
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
|
||||||
|
# Also look for incomplete patterns like {"text": "36
|
||||||
|
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||||
|
|
||||||
|
# Combine both complete and incomplete items
|
||||||
|
all_items = text_items + incomplete_items
|
||||||
|
# Remove duplicates and empty strings
|
||||||
|
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_items:
|
||||||
|
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||||
|
|
||||||
|
elements = [{"text": item} for item in unique_items]
|
||||||
|
|
||||||
|
elif content_type == "paragraph":
|
||||||
|
# Look for {"text": "..."} patterns, including incomplete ones
|
||||||
|
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
|
||||||
|
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||||
|
|
||||||
|
all_items = text_items + incomplete_items
|
||||||
|
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_items:
|
||||||
|
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||||
|
|
||||||
|
elements = [{"text": item} for item in unique_items]
|
||||||
|
|
||||||
|
elif content_type == "heading":
|
||||||
|
# Look for {"level": X, "text": "..."} patterns, including incomplete ones
|
||||||
|
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text)
|
||||||
|
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||||
|
|
||||||
|
all_items = heading_items + incomplete_heading_items
|
||||||
|
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_items:
|
||||||
|
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||||
|
|
||||||
|
elements = [{"level": level, "text": text} for level, text in unique_items]
|
||||||
|
|
||||||
|
elif content_type == "table":
|
||||||
|
# Look for table patterns
|
||||||
|
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text)
|
||||||
|
for headers_str, rows_str, caption in table_items:
|
||||||
|
# Extract headers
|
||||||
|
headers = re.findall(r'"([^"]+)"', headers_str)
|
||||||
|
# Extract rows (simplified)
|
||||||
|
rows = []
|
||||||
|
row_matches = re.findall(r'\[(.*?)\]', rows_str)
|
||||||
|
for row_match in row_matches:
|
||||||
|
row_items = re.findall(r'"([^"]+)"', row_match)
|
||||||
|
rows.append(row_items)
|
||||||
|
|
||||||
|
elements.append({
|
||||||
|
"headers": headers,
|
||||||
|
"rows": rows,
|
||||||
|
"caption": caption
|
||||||
|
})
|
||||||
|
|
||||||
|
elif content_type == "code":
|
||||||
|
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones
|
||||||
|
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text)
|
||||||
|
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||||
|
|
||||||
|
all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
|
||||||
|
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_items:
|
||||||
|
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||||
|
|
||||||
|
elements = [{"code": code, "language": lang} for code, lang in unique_items]
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Generic fallback - look for any text content, including incomplete
|
||||||
|
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text)
|
||||||
|
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||||
|
|
||||||
|
all_items = text_items + incomplete_text_items
|
||||||
|
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_items:
|
||||||
|
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||||
|
|
||||||
|
elements = [{"text": item} for item in unique_items]
|
||||||
|
|
||||||
|
return elements
|
||||||
|
|
||||||
|
|
||||||
|
def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Remove the last item if it appears to be incomplete/corrupted.
|
||||||
|
This prevents corrupted data from being included in the final result.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return items
|
||||||
|
|
||||||
|
# Check if the original text ends with incomplete JSON patterns
|
||||||
|
# Look for patterns that suggest the last item was cut off
|
||||||
|
|
||||||
|
# Pattern 1: Text ends with incomplete string like {"text": "36
|
||||||
|
if re.search(r'\{"[^"]*"\s*:\s*"[^"]*$', original_text):
|
||||||
|
logger.debug("Detected incomplete string at end - removing last item")
|
||||||
|
return items[:-1]
|
||||||
|
|
||||||
|
# Pattern 2: Text ends with incomplete boolean like {"bool_flag": tr
|
||||||
|
if re.search(r'\{"[^"]*"\s*:\s*(true|false|tr|fa)$', original_text):
|
||||||
|
logger.debug("Detected incomplete boolean at end - removing last item")
|
||||||
|
return items[:-1]
|
||||||
|
|
||||||
|
# Pattern 3: Text ends with incomplete number like {"number": 123
|
||||||
|
if re.search(r'\{"[^"]*"\s*:\s*\d+$', original_text):
|
||||||
|
logger.debug("Detected incomplete number at end - removing last item")
|
||||||
|
return items[:-1]
|
||||||
|
|
||||||
|
# Pattern 4: Text ends with incomplete array like {"array": [1,2,3
|
||||||
|
if re.search(r'\{"[^"]*"\s*:\s*\[[^\]]*$', original_text):
|
||||||
|
logger.debug("Detected incomplete array at end - removing last item")
|
||||||
|
return items[:-1]
|
||||||
|
|
||||||
|
# Pattern 5: Text ends with incomplete object like {"obj": {"key": "val
|
||||||
|
if re.search(r'\{"[^"]*"\s*:\s*\{[^}]*$', original_text):
|
||||||
|
logger.debug("Detected incomplete object at end - removing last item")
|
||||||
|
return items[:-1]
|
||||||
|
|
||||||
|
# Pattern 6: Text ends with trailing comma (common sign of incomplete JSON)
|
||||||
|
if original_text.rstrip().endswith(','):
|
||||||
|
logger.debug("Detected trailing comma - removing last item")
|
||||||
|
return items[:-1]
|
||||||
|
|
||||||
|
# If no incomplete patterns detected, return all items
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def _extractGenericContent(text: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Extract generic content when no specific section patterns are found.
|
||||||
|
This handles cases where the JSON structure is completely broken.
|
||||||
|
Handles incomplete strings and corrupted data.
|
||||||
|
Excludes the last incomplete item to prevent corrupted data.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
|
||||||
|
# Look for any structured content patterns
|
||||||
|
# Pattern 1: Look for list items {"text": "..."}, including incomplete ones
|
||||||
|
list_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', text)
|
||||||
|
incomplete_list_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', text)
|
||||||
|
|
||||||
|
all_list_items = list_items + incomplete_list_items
|
||||||
|
unique_list_items = list(dict.fromkeys([item for item in all_list_items if item.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_list_items:
|
||||||
|
unique_list_items = _removeLastIncompleteItem(unique_list_items, text)
|
||||||
|
|
||||||
|
if unique_list_items:
|
||||||
|
elements = [{"text": item} for item in unique_list_items]
|
||||||
|
sections.append({
|
||||||
|
"id": "section_1",
|
||||||
|
"content_type": "list",
|
||||||
|
"elements": elements,
|
||||||
|
"order": 1
|
||||||
|
})
|
||||||
|
|
||||||
|
# Pattern 2: Look for paragraph text {"text": "..."}, including incomplete ones
|
||||||
|
elif re.search(r'\{"text"\s*:\s*"[^"]*\}', text):
|
||||||
|
# Extract all text elements, including incomplete ones
|
||||||
|
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', text)
|
||||||
|
incomplete_text_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', text)
|
||||||
|
|
||||||
|
all_text_items = text_items + incomplete_text_items
|
||||||
|
unique_text_items = list(dict.fromkeys([item for item in all_text_items if item.strip()]))
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if unique_text_items:
|
||||||
|
unique_text_items = _removeLastIncompleteItem(unique_text_items, text)
|
||||||
|
|
||||||
|
if unique_text_items:
|
||||||
|
elements = [{"text": item} for item in unique_text_items]
|
||||||
|
sections.append({
|
||||||
|
"id": "section_1",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": elements,
|
||||||
|
"order": 1
|
||||||
|
})
|
||||||
|
|
||||||
|
# Pattern 3: Look for any quoted strings that might be content, including incomplete ones
|
||||||
|
elif re.search(r'"([^"]{3,})"', text): # Strings longer than 3 chars (reduced threshold)
|
||||||
|
# Extract longer quoted strings, including incomplete ones
|
||||||
|
text_items = re.findall(r'"([^"]{3,})"', text)
|
||||||
|
incomplete_text_items = re.findall(r'"([^"]{3,}?)(?:\n|$)', text)
|
||||||
|
|
||||||
|
all_text_items = text_items + incomplete_text_items
|
||||||
|
# Filter out likely JSON keys
|
||||||
|
content_items = [item for item in all_text_items if not item.startswith(('section_', 'doc_', 'metadata', 'split_strategy', 'source_documents', 'extraction_method', 'id', 'content_type', 'elements', 'order', 'title', 'filename'))]
|
||||||
|
|
||||||
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
|
if content_items:
|
||||||
|
content_items = _removeLastIncompleteItem(content_items, text)
|
||||||
|
|
||||||
|
if content_items:
|
||||||
|
elements = [{"text": item} for item in content_items[:10]] # Limit to first 10 items
|
||||||
|
sections.append({
|
||||||
|
"id": "section_1",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": elements,
|
||||||
|
"order": 1
|
||||||
|
})
|
||||||
|
|
||||||
return sections
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -324,33 +594,295 @@ def extractContentSample(section: Dict[str, Any]) -> str:
|
||||||
return "Content exists"
|
return "Content exists"
|
||||||
|
|
||||||
|
|
||||||
def buildContinuationContext(allSections: List[Dict[str, Any]]) -> Dict[str, Any]:
|
def _buildDetailedContinuationInfo(section: Dict[str, Any], content_type: str) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Build context information from accumulated sections for continuation prompt.
|
Build detailed continuation information for better AI guidance.
|
||||||
Returns dict with metadata about what was already generated.
|
Completely generic - works for any content type (list, paragraph, code, table, etc.)
|
||||||
"""
|
"""
|
||||||
if not allSections:
|
elements = section.get("elements", [])
|
||||||
|
|
||||||
|
if not elements:
|
||||||
return {
|
return {
|
||||||
"section_count": 0,
|
"type": "continue_general",
|
||||||
"next_order": 1,
|
"sample": extractContentSample(section),
|
||||||
"last_content_sample": "No content yet"
|
"last_item": "",
|
||||||
|
"item_count": 0,
|
||||||
|
"guidance": "Continue generating content in the same format and style."
|
||||||
}
|
}
|
||||||
|
|
||||||
# Sort sections by order
|
# Count elements regardless of type
|
||||||
sorted_sections = sorted(allSections, key=lambda s: s.get("order", 0))
|
element_count = len(elements)
|
||||||
|
|
||||||
last_section = sorted_sections[-1]
|
# Extract sample for context - completely generic
|
||||||
last_order = last_section.get("order", 0)
|
sample = extractContentSample(section)
|
||||||
|
|
||||||
# Get content sample from last section
|
|
||||||
last_content_sample = extractContentSample(last_section)
|
|
||||||
|
|
||||||
|
# Generic continuation guidance - applies to ANY content type
|
||||||
|
# Tell AI to generate ALL REMAINING content to complete the user request
|
||||||
return {
|
return {
|
||||||
"section_count": len(allSections),
|
"type": "continue_general",
|
||||||
"last_section_id": last_section.get("id", ""),
|
"sample": sample,
|
||||||
"last_order": last_order,
|
"last_item": "",
|
||||||
"next_order": last_order + 1,
|
"item_count": element_count,
|
||||||
"last_content_type": last_section.get("content_type", ""),
|
"guidance": "Generate ALL remaining content to complete the user's request. Continue from where you left off and finish everything that was requested."
|
||||||
"last_content_sample": last_content_sample
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extractLastItemsFromFragment(fragment: str, max_items: int = 10) -> str:
|
||||||
|
"""
|
||||||
|
Extract the last few items from a JSON fragment for continuation context.
|
||||||
|
Uses JSON structure (sections -> elements -> items) - fully generic.
|
||||||
|
Works with broken/incomplete JSON by trying to parse and extract sections.
|
||||||
|
"""
|
||||||
|
if not fragment:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Strategy 1: Try to parse as JSON and extract from structure
|
||||||
|
try:
|
||||||
|
# Try to repair and parse the fragment
|
||||||
|
parsed = repairBrokenJson(fragment)
|
||||||
|
if parsed:
|
||||||
|
# Extract sections from parsed JSON using structure
|
||||||
|
sections = extractSectionsFromDocument(parsed)
|
||||||
|
if sections:
|
||||||
|
# Get the last section (likely where continuation should happen)
|
||||||
|
sorted_sections = sorted(sections, key=lambda s: s.get("order", 0))
|
||||||
|
last_section = sorted_sections[-1]
|
||||||
|
elements = last_section.get("elements", [])
|
||||||
|
|
||||||
|
if elements and isinstance(elements, list):
|
||||||
|
content_type = last_section.get("content_type", "").lower()
|
||||||
|
|
||||||
|
# For list content_type, extract from items array
|
||||||
|
if content_type == "list" and len(elements) > 0:
|
||||||
|
last_element = elements[-1]
|
||||||
|
if isinstance(last_element, dict):
|
||||||
|
# Check if it has an "items" array (list structure)
|
||||||
|
if "items" in last_element and isinstance(last_element["items"], list):
|
||||||
|
items_list = last_element["items"]
|
||||||
|
if items_list:
|
||||||
|
# Get last max_items from this items array
|
||||||
|
last_items = items_list[-max_items:] if len(items_list) > max_items else items_list
|
||||||
|
# Extract text from each item
|
||||||
|
texts = []
|
||||||
|
for item in last_items:
|
||||||
|
if isinstance(item, dict) and "text" in item:
|
||||||
|
texts.append(str(item["text"]))
|
||||||
|
if texts:
|
||||||
|
return ', '.join(texts)
|
||||||
|
|
||||||
|
# Or if elements themselves are items (alternative structure)
|
||||||
|
elif "text" in last_element:
|
||||||
|
# Get last max_items elements that have text
|
||||||
|
elements_with_text = [e for e in elements if isinstance(e, dict) and "text" in e]
|
||||||
|
if elements_with_text:
|
||||||
|
last_elements = elements_with_text[-max_items:] if len(elements_with_text) > max_items else elements_with_text
|
||||||
|
texts = [str(e.get("text", "")) for e in last_elements]
|
||||||
|
if texts:
|
||||||
|
return ', '.join(texts)
|
||||||
|
|
||||||
|
# For other content types, extract from elements
|
||||||
|
elif len(elements) > 0:
|
||||||
|
# Get last max_items elements that have text/code
|
||||||
|
valid_elements = [e for e in elements if isinstance(e, dict) and ("text" in e or "code" in e)]
|
||||||
|
if valid_elements:
|
||||||
|
last_elements = valid_elements[-max_items:] if len(valid_elements) > max_items else valid_elements
|
||||||
|
texts = []
|
||||||
|
for elem in last_elements:
|
||||||
|
if "text" in elem:
|
||||||
|
texts.append(str(elem["text"]))
|
||||||
|
elif "code" in elem:
|
||||||
|
# For code, show snippet
|
||||||
|
code = str(elem["code"])
|
||||||
|
texts.append(code[:50] + "..." if len(code) > 50 else code)
|
||||||
|
if texts:
|
||||||
|
return ', '.join(texts)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Could not extract items from fragment using JSON structure: {e}")
|
||||||
|
|
||||||
|
# Strategy 2: If parsing failed, try progressive parsing from the end
|
||||||
|
# Look for the last complete JSON structures near the end
|
||||||
|
try:
|
||||||
|
# Try parsing different lengths from the end
|
||||||
|
for length in [3000, 2000, 1000, 500]:
|
||||||
|
if len(fragment) > length:
|
||||||
|
end_portion = fragment[-length:]
|
||||||
|
closed = _closeJsonStructures(end_portion)
|
||||||
|
obj, err, _ = tryParseJson(closed)
|
||||||
|
if err is None and isinstance(obj, dict):
|
||||||
|
# Successfully parsed - extract sections
|
||||||
|
sections = extractSectionsFromDocument(obj)
|
||||||
|
if sections:
|
||||||
|
# Same extraction logic as above
|
||||||
|
sorted_sections = sorted(sections, key=lambda s: s.get("order", 0))
|
||||||
|
if sorted_sections:
|
||||||
|
last_section = sorted_sections[-1]
|
||||||
|
elements = last_section.get("elements", [])
|
||||||
|
if elements:
|
||||||
|
# Extract texts using same logic as Strategy 1
|
||||||
|
texts = []
|
||||||
|
for elem in elements[-max_items:]:
|
||||||
|
if isinstance(elem, dict):
|
||||||
|
if "items" in elem and isinstance(elem["items"], list):
|
||||||
|
# Get last item from items array
|
||||||
|
if elem["items"]:
|
||||||
|
last_item = elem["items"][-1]
|
||||||
|
if isinstance(last_item, dict) and "text" in last_item:
|
||||||
|
texts.append(str(last_item["text"]))
|
||||||
|
elif "text" in elem:
|
||||||
|
texts.append(str(elem["text"]))
|
||||||
|
if texts:
|
||||||
|
return ', '.join(texts[-max_items:])
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Progressive parsing from end failed: {e}")
|
||||||
|
|
||||||
|
# Strategy 3: If all parsing fails, try simple extraction from raw fragment
|
||||||
|
# Look for last complete {"text": "..."} pattern near the end
|
||||||
|
try:
|
||||||
|
# Look at last 2000 chars for the pattern
|
||||||
|
end_portion = fragment[-2000:] if len(fragment) > 2000 else fragment
|
||||||
|
# Find all {"text": "value"} patterns
|
||||||
|
import re
|
||||||
|
# Pattern to match {"text": "..."} with escaped quotes
|
||||||
|
pattern = r'\{"text"\s*:\s*"([^"]+)"\}'
|
||||||
|
matches = re.findall(pattern, end_portion)
|
||||||
|
if matches:
|
||||||
|
# Get last max_items
|
||||||
|
last_matches = matches[-max_items:] if len(matches) > max_items else matches
|
||||||
|
return ', '.join(last_matches)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Simple pattern extraction failed: {e}")
|
||||||
|
|
||||||
|
# Strategy 4: If all fails, return empty (will use last_item_from_sections)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Build context information from accumulated sections for continuation prompt.
|
||||||
|
Extracts last items and provides clear continuation point.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
allSections: List of sections already generated
|
||||||
|
lastRawResponse: Raw JSON response from last iteration (can be broken/incomplete)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with section_count, last_raw_json, last_items, and continuation point
|
||||||
|
"""
|
||||||
|
context = {
|
||||||
|
"section_count": len(allSections),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract last COMPLETE object directly from raw response (generic - works for any structure)
|
||||||
|
# This is extracted BEFORE any merging/accumulation happens
|
||||||
|
# Returns the full last complete object like {"text": "..."} or {"code": "...", "language": "..."} etc.
|
||||||
|
# Logic: find the last complete {...} where there are no nested { inside (flat object)
|
||||||
|
last_complete_object = "" # Full object as JSON string
|
||||||
|
total_items_count = 0
|
||||||
|
|
||||||
|
if lastRawResponse:
|
||||||
|
raw_json = stripCodeFences(lastRawResponse.strip())
|
||||||
|
if raw_json and raw_json.strip() != "{}":
|
||||||
|
# Find last complete flat object (no nested objects inside)
|
||||||
|
# Scan from the end backwards to find the last complete {...} object
|
||||||
|
# A flat object is complete if: starts with {, ends with }, and has no nested { inside
|
||||||
|
|
||||||
|
# Work backwards from the end, find last }
|
||||||
|
for i in range(len(raw_json) - 1, -1, -1):
|
||||||
|
if raw_json[i] == '}':
|
||||||
|
# Found a closing brace, work backwards to find its opening brace
|
||||||
|
depth = 1
|
||||||
|
opening_pos = -1
|
||||||
|
|
||||||
|
for j in range(i - 1, -1, -1):
|
||||||
|
if raw_json[j] == '}':
|
||||||
|
depth += 1
|
||||||
|
elif raw_json[j] == '{':
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
# Found matching opening brace
|
||||||
|
opening_pos = j
|
||||||
|
# Check if this is a flat object (no nested { inside)
|
||||||
|
obj_content = raw_json[j + 1:i]
|
||||||
|
if '{' not in obj_content:
|
||||||
|
# This is a flat object (no nested objects inside)
|
||||||
|
last_complete_object = raw_json[j:i + 1]
|
||||||
|
break
|
||||||
|
|
||||||
|
if last_complete_object:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Also try structure-based parsing for item count
|
||||||
|
try:
|
||||||
|
parsed = repairBrokenJson(raw_json)
|
||||||
|
if parsed:
|
||||||
|
sections = extractSectionsFromDocument(parsed)
|
||||||
|
if sections:
|
||||||
|
sorted_sections = sorted(sections, key=lambda s: s.get("order", 0))
|
||||||
|
last_section = sorted_sections[-1]
|
||||||
|
elements = last_section.get("elements", [])
|
||||||
|
|
||||||
|
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||||
|
if last_section.get("content_type") == "list":
|
||||||
|
last_element = elements[-1]
|
||||||
|
if isinstance(last_element, dict):
|
||||||
|
if "items" in last_element and isinstance(last_element["items"], list):
|
||||||
|
items_list = last_element["items"]
|
||||||
|
# Only count complete items (those successfully extracted)
|
||||||
|
total_items_count = len(items_list)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Could not extract item count from raw response structure: {e}")
|
||||||
|
|
||||||
|
# Also extract last items for display (fragment extraction)
|
||||||
|
last_items_from_fragment = _extractLastItemsFromFragment(raw_json, max_items=10)
|
||||||
|
|
||||||
|
context["last_raw_json"] = raw_json
|
||||||
|
context["last_item_object"] = last_complete_object # Full last complete object (generic - any structure)
|
||||||
|
context["last_items_from_fragment"] = last_items_from_fragment
|
||||||
|
context["total_items_count"] = total_items_count # Count from raw response
|
||||||
|
|
||||||
|
logger.debug(f"Included previous JSON response in continuation context ({len(raw_json)} chars, {total_items_count} items in response, last complete object: {last_complete_object})")
|
||||||
|
else:
|
||||||
|
logger.warning("lastRawResponse was empty or just '{}' - continuation may not work correctly")
|
||||||
|
else:
|
||||||
|
# No raw response - fallback to extracting from accumulated sections
|
||||||
|
# Extract the last complete object from the last element
|
||||||
|
last_item_object_from_sections = ""
|
||||||
|
if allSections:
|
||||||
|
sorted_sections = sorted(allSections, key=lambda s: s.get("order", 0))
|
||||||
|
last_section = sorted_sections[-1]
|
||||||
|
elements = last_section.get("elements", [])
|
||||||
|
|
||||||
|
if elements and isinstance(elements, list) and len(elements) > 0:
|
||||||
|
# Get the last element (could be any structure - generic)
|
||||||
|
last_element = elements[-1]
|
||||||
|
if isinstance(last_element, dict):
|
||||||
|
# Try to get items if it's a list structure
|
||||||
|
if "items" in last_element and isinstance(last_element["items"], list):
|
||||||
|
items_list = last_element["items"]
|
||||||
|
total_items_count = len(items_list)
|
||||||
|
if items_list:
|
||||||
|
# Get last item (any structure)
|
||||||
|
last_item = items_list[-1]
|
||||||
|
if isinstance(last_item, dict):
|
||||||
|
# Convert to JSON string (generic - works for any object structure)
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
last_item_object_from_sections = json.dumps(last_item)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Element itself is the object (no items array)
|
||||||
|
total_items_count = len(elements)
|
||||||
|
# Convert to JSON string (generic)
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
last_item_object_from_sections = json.dumps(last_element)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
context["last_item_object"] = last_item_object_from_sections
|
||||||
|
context["total_items_count"] = total_items_count
|
||||||
|
logger.debug(f"No previous raw response available for continuation context (but have {total_items_count} items accumulated, last item object: {last_item_object_from_sections})")
|
||||||
|
|
||||||
|
return context
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ class MethodAiOperationsTester:
|
||||||
"resultType": "json"
|
"resultType": "json"
|
||||||
},
|
},
|
||||||
OperationTypeEnum.DATA_GENERATE: {
|
OperationTypeEnum.DATA_GENERATE: {
|
||||||
"aiPrompt": "Generate the first 9000 prime numbers.",
|
"aiPrompt": "Generate the first 4000 prime numbers.",
|
||||||
"resultType": "txt"
|
"resultType": "txt"
|
||||||
},
|
},
|
||||||
OperationTypeEnum.DATA_EXTRACT: {
|
OperationTypeEnum.DATA_EXTRACT: {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue