From adbc29f069cbf0698709103980902c0124be72d7 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 29 Oct 2025 22:55:56 +0100
Subject: [PATCH] ai iteration loops running 21 of 22 test cases - fixing
object tree extraction
---
.../20251029-223021-api_sent_message_0.txt | 72 ++
.../20251029-223033-api_received_response.txt | 200 ++++++
modules/interfaces/interfaceAiObjects.py | 14 +-
modules/services/serviceAi/subCoreAi.py | 114 +++-
.../subPromptBuilderGeneration.py | 182 ++++--
modules/shared/jsonUtils.py | 614 ++++++++++++++++--
test4_method_ai_operations.py | 2 +-
7 files changed, 1071 insertions(+), 127 deletions(-)
create mode 100644 local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt
create mode 100644 local/logs/debug/prompts/20251029-223033-api_received_response.txt
diff --git a/local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt b/local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt
new file mode 100644
index 00000000..7a71526d
--- /dev/null
+++ b/local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt
@@ -0,0 +1,72 @@
+Message 0 (user)
+Length: 2015 chars
+================================================================================
+User request: "Generate the first 1000 prime numbers."
+
+Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
+
+JSON structure template (reference only - shows the pattern):
+{
+ "metadata": {
+ "split_strategy": "single_document",
+ "source_documents": [],
+ "extraction_method": "ai_generation"
+ },
+ "documents": [
+ {
+ "id": "doc_1",
+ "title": "Generated Document",
+ "filename": "document.json",
+ "sections": [
+ {
+ "id": "section_heading_example",
+ "content_type": "heading",
+ "elements": [
+ {"level": 1, "text": "Heading Text"}
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_paragraph_example",
+ "content_type": "paragraph",
+ "elements": [
+ {"text": "Paragraph text content"}
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_list_example",
+ "content_type": "list",
+ "elements": [
+ {
+ "items": [
+ {"text": "Item 1"},
+ {"text": "Item 2"}
+ ],
+ "list_type": "numbered"
+ }
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_table_example",
+ "content_type": "table",
+ "elements": [
+ {
+ "headers": ["Column 1", "Column 2"],
+ "rows": [
+ ["Row 1 Col 1", "Row 1 Col 2"],
+ ["Row 2 Col 1", "Row 2 Col 2"]
+ ],
+ "caption": "Table caption"
+ }
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_code_example",
+ "content_type": "code",
+ "elements": [
+ {
+ "code... [truncated]
+================================================================================
diff --git a/local/logs/debug/prompts/20251029-223033-api_received_response.txt b/local/logs/debug/prompts/20251029-223033-api_received_response.txt
new file mode 100644
index 00000000..31f8964c
--- /dev/null
+++ b/local/logs/debug/prompts/20251029-223033-api_received_response.txt
@@ -0,0 +1,200 @@
+API Response
+Length: 6082 chars
+================================================================================
+{
+ "metadata": {
+ "split_strategy": "single_document",
+ "source_documents": [],
+ "extraction_method": "ai_generation"
+ },
+ "documents": [
+ {
+ "id": "doc_1",
+ "title": "List of First 1000 Prime Numbers",
+ "filename": "prime_numbers.json",
+ "sections": [
+ {
+ "id": "section_prime_numbers",
+ "content_type": "list",
+ "elements": [
+ {
+ "items": [
+ {"number": 2},
+ {"number": 3},
+ {"number": 5},
+ {"number": 7},
+ {"number": 11},
+ {"number": 13},
+ {"number": 17},
+ {"number": 19},
+ {"number": 23},
+ {"number": 29},
+ {"number": 31},
+ {"number": 37},
+ {"number": 41},
+ {"number": 43},
+ {"number": 47},
+ {"number": 53},
+ {"number": 59},
+ {"number": 61},
+ {"number": 67},
+ {"number": 71},
+ {"number": 73},
+ {"number": 79},
+ {"number": 83},
+ {"number": 89},
+ {"number": 97},
+ {"number": 101},
+ {"number": 103},
+ {"number": 107},
+ {"number": 109},
+ {"number": 113},
+ {"number": 127},
+ {"number": 131},
+ {"number": 137},
+ {"number": 139},
+ {"number": 149},
+ {"number": 151},
+ {"number": 157},
+ {"number": 163},
+ {"number": 167},
+ {"number": 173},
+ {"number": 179},
+ {"number": 181},
+ {"number": 191},
+ {"number": 193},
+ {"number": 197},
+ {"number": 199},
+ {"number": 211},
+ {"number": 223},
+ {"number": 227},
+ {"number": 229},
+ {"number": 233},
+ {"number": 239},
+ {"number": 241},
+ {"number": 251},
+ {"number": 257},
+ {"number": 263},
+ {"number": 269},
+ {"number": 271},
+ {"number": 277},
+ {"number": 281},
+ {"number": 283},
+ {"number": 293},
+ {"number": 307},
+ {"number": 311},
+ {"number": 313},
+ {"number": 317},
+ {"number": 331},
+ {"number": 337},
+ {"number": 347},
+ {"number": 349},
+ {"number": 353},
+ {"number": 359},
+ {"number": 367},
+ {"number": 373},
+ {"number": 379},
+ {"number": 383},
+ {"number": 389},
+ {"number": 397},
+ {"number": 401},
+ {"number": 409},
+ {"number": 419},
+ {"number": 421},
+ {"number": 431},
+ {"number": 433},
+ {"number": 439},
+ {"number": 443},
+ {"number": 449},
+ {"number": 457},
+ {"number": 461},
+ {"number": 463},
+ {"number": 467},
+ {"number": 479},
+ {"number": 487},
+ {"number": 491},
+ {"number": 499},
+ {"number": 503},
+ {"number": 509},
+ {"number": 521},
+ {"number": 523},
+ {"number": 541},
+ {"number": 547},
+ {"number": 557},
+ {"number": 563},
+ {"number": 569},
+ {"number": 571},
+ {"number": 577},
+ {"number": 587},
+ {"number": 593},
+ {"number": 599},
+ {"number": 601},
+ {"number": 607},
+ {"number": 613},
+ {"number": 617},
+ {"number": 619},
+ {"number": 631},
+ {"number": 641},
+ {"number": 643},
+ {"number": 647},
+ {"number": 653},
+ {"number": 659},
+ {"number": 661},
+ {"number": 673},
+ {"number": 677},
+ {"number": 683},
+ {"number": 691},
+ {"number": 701},
+ {"number": 709},
+ {"number": 719},
+ {"number": 727},
+ {"number": 733},
+ {"number": 739},
+ {"number": 743},
+ {"number": 751},
+ {"number": 757},
+ {"number": 761},
+ {"number": 769},
+ {"number": 773},
+ {"number": 787},
+ {"number": 797},
+ {"number": 809},
+ {"number": 811},
+ {"number": 821},
+ {"number": 823},
+ {"number": 827},
+ {"number": 829},
+ {"number": 839},
+ {"number": 853},
+ {"number": 857},
+ {"number": 859},
+ {"number": 863},
+ {"number": 877},
+ {"number": 881},
+ {"number": 883},
+ {"number": 887},
+ {"number": 907},
+ {"number": 911},
+ {"number": 919},
+ {"number": 929},
+ {"number": 937},
+ {"number": 941},
+ {"number": 947},
+ {"number": 953},
+ {"number": 967},
+ {"number": 971},
+ {"number": 977},
+ {"number": 983},
+ {"number": 991},
+ {"number": 997}
+ ],
+ "list_type": "numbered"
+ }
+ ],
+ "order": 0
+ }
+ ]
+ }
+ ]
+}
+================================================================================
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 6c12d267..e58fa1ef 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -92,18 +92,6 @@ class AiObjects:
# Input bytes will be calculated inside _callWithModel
- # Compress optionally (prompt/context) - simple truncation fallback kept here
- def _maybeTruncate(text: str, limit: int) -> str:
- data = text.encode("utf-8")
- if len(data) <= limit:
- return text
- return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
-
- if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
- prompt = _maybeTruncate(prompt, 2000)
- if options.compressContext and len(context.encode("utf-8")) > 70000:
- context = _maybeTruncate(context, 70000)
-
# Generation parameters are handled inside _callWithModel
# Get failover models for this operation type
@@ -129,7 +117,7 @@ class AiObjects:
try:
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
- # Call the model
+ # Call the model directly - no truncation or compression here
response = await self._callWithModel(model, prompt, context, options)
logger.info(f"✅ AI call successful with model: {model.name}")
diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py
index cd177cc9..4e9d1bf6 100644
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@@ -124,7 +124,9 @@ Respond with ONLY a JSON object in this exact format:
self,
prompt: str,
options: AiCallOptions,
- debugPrefix: str = "ai_call"
+ debugPrefix: str = "ai_call",
+ promptBuilder: Optional[callable] = None,
+ promptArgs: Optional[Dict[str, Any]] = None
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
@@ -141,6 +143,7 @@ Respond with ONLY a JSON object in this exact format:
max_iterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
+ lastRawResponse = None # Store last raw JSON response for continuation
logger.debug(f"Starting AI call with repair-based looping (debug prefix: {debugPrefix})")
@@ -149,14 +152,18 @@ Respond with ONLY a JSON object in this exact format:
logger.debug(f"AI call iteration {iteration}/{max_iterations}")
# Build iteration prompt
- if len(allSections) > 0:
- # This is a continuation - build continuation context
- continuationContext = buildContinuationContext(allSections)
- logger.info(f"Continuation context: {continuationContext.get('section_count')} sections, next order: {continuationContext.get('next_order')}")
+ if len(allSections) > 0 and promptBuilder and promptArgs:
+ # This is a continuation - build continuation context with raw JSON and rebuild prompt
+ continuationContext = buildContinuationContext(allSections, lastRawResponse)
+ logger.info(f"Continuation context: {continuationContext.get('section_count')} sections")
+ if lastRawResponse:
+ logger.debug(f"Iteration {iteration}: Including previous response in continuation context ({len(lastRawResponse)} chars)")
+ else:
+ logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
- # If prompt contains a placeholder for continuation, inject the context
- # For now, we'll handle this at the calling code level
- iterationPrompt = prompt
+ # Rebuild prompt with continuation context using the provided prompt builder
+ iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
+ logger.debug(f"Rebuilt prompt with continuation context for iteration {iteration}")
else:
# First iteration - use original prompt
iterationPrompt = prompt
@@ -179,6 +186,13 @@ Respond with ONLY a JSON object in this exact format:
response = await self.aiObjects.call(request)
result = response.content
+ # Debug: Check response immediately from API
+ if iteration == 1 and result:
+ first_chars = result[:200].replace('\n', '\\n').replace('\r', '\\r')
+ logger.debug(f"Iteration 1: Raw API response starts with (first 200 chars): '{first_chars}'")
+ if result.strip().startswith('},') or result.strip().startswith('],'):
+ logger.error(f"Iteration 1: API returned fragment! Full start: '{result[:200]}'")
+
# Write raw AI response to debug file
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
@@ -196,10 +210,23 @@ Respond with ONLY a JSON object in this exact format:
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
+ # Store raw response for continuation (even if broken)
+ lastRawResponse = result
+
+ # Check for complete_response flag in raw response (before parsing)
+ import re
+ if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
+ logger.info(f"Iteration {iteration}: Detected complete_response flag in raw response")
+
# Extract sections from response (handles both valid and broken JSON)
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
if not extractedSections:
+ # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
+ if iteration > 1 and not wasJsonComplete:
+ logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
+ continue
+ # Otherwise, stop if no sections
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
break
@@ -208,7 +235,7 @@ Respond with ONLY a JSON object in this exact format:
logger.info(f"Iteration {iteration}: Extracted {len(extractedSections)} sections (total: {len(allSections)})")
# Check if we should continue (completion detection)
- if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete):
+ if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
logger.debug(f"Iteration {iteration}: Continuing generation")
continue
else:
@@ -241,6 +268,7 @@ Respond with ONLY a JSON object in this exact format:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
+ Checks for "complete_response": true flag to determine completion.
Returns (sections, wasJsonComplete)
"""
# First, try to parse as valid JSON
@@ -248,14 +276,35 @@ Respond with ONLY a JSON object in this exact format:
extracted = extractJsonString(result)
parsed_result = json.loads(extracted)
+ # Check if AI marked response as complete
+ isComplete = parsed_result.get("complete_response", False) == True
+ if isComplete:
+ logger.info(f"Iteration {iteration}: AI marked response as complete (complete_response: true)")
+
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
logger.debug(f"Iteration {iteration}: Valid JSON - extracted {len(sections)} sections")
- return sections, True # JSON was complete
+
+ # If AI marked as complete, always return as complete
+ if isComplete:
+ return sections, True
+
+ # If in continuation mode (iteration > 1), continuation responses are expected to be fragments
+ # A fragment with 0 extractable sections means JSON is incomplete - need another iteration
+ # Don't use repair mechanism - just mark as incomplete so loop continues
+ if len(sections) == 0 and iteration > 1:
+ logger.info(f"Iteration {iteration}: Continuation fragment with 0 extractable sections - JSON incomplete, continuing")
+ return sections, False # Mark as incomplete so loop continues
+
+ # First iteration with 0 sections means empty response - stop
+ if len(sections) == 0:
+ return sections, True # Complete but empty
+
+ return sections, True # JSON was complete with sections
except json.JSONDecodeError as e:
- # Broken JSON - try repair mechanism
- logger.warning(f"Iteration {iteration}: Invalid JSON, attempting repair: {str(e)}")
+ # Broken JSON - try repair mechanism (normal in iterative generation)
+ logger.info(f"Iteration {iteration}: JSON incomplete/broken, attempting repair: {str(e)}")
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
# Try to repair
@@ -279,16 +328,25 @@ Respond with ONLY a JSON object in this exact format:
self,
allSections: List[Dict[str, Any]],
iteration: int,
- wasJsonComplete: bool
+ wasJsonComplete: bool,
+ rawResponse: str = None
) -> bool:
"""
- Determine if generation should continue based on JSON completeness.
+ Determine if generation should continue based on JSON completeness and complete_response flag.
Returns True if we should continue, False if done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
- # Simple rule: if JSON was complete, we're done
+ # Check for complete_response flag in raw response
+ if rawResponse:
+ import re
+ # Look for complete_response: true pattern (allowing for whitespace variations)
+ if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
+ logger.info("AI marked response as complete (complete_response: true) - stopping generation")
+ return False
+
+ # If JSON was complete (and no complete_response flag), we're done
# If JSON was broken and repaired, continue to get more content
if wasJsonComplete:
logger.info("JSON was complete - stopping generation")
@@ -398,6 +456,15 @@ Respond with ONLY a JSON object in this exact format:
else:
logger.debug(f"Using provided options: operationType={options.operationType}, priority={options.priority}")
+ # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
+ # Compressing would truncate the template structure and confuse the AI
+ if outputFormat: # Document generation with structured output
+ if not options:
+ options = AiCallOptions()
+ options.compressPrompt = False # JSON templates must NOT be truncated
+ options.compressContext = False # Context also should not be compressed
+ logger.debug("Document generation detected - disabled prompt/context compression")
+
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
@@ -411,7 +478,22 @@ Respond with ONLY a JSON object in this exact format:
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
# First call without continuation context
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
- generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation")
+
+ # Prepare prompt builder arguments for continuation
+ promptArgs = {
+ "outputFormat": outputFormat,
+ "userPrompt": prompt,
+ "title": title,
+ "extracted_content": extracted_content
+ }
+
+ generated_json = await self._callAiWithLooping(
+ generation_prompt,
+ options,
+ "document_generation",
+ buildGenerationPrompt,
+ promptArgs
+ )
# Parse the generated JSON (extract fenced/embedded JSON first)
try:
diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
index 0cf32bf7..895d9af8 100644
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@@ -9,6 +9,7 @@ from typing import Dict, Any
logger = logging.getLogger(__name__)
# Centralized JSON structure template for document generation
+# Includes examples for all content types so AI knows the structure patterns
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
"metadata": {
"split_strategy": "single_document",
@@ -22,16 +23,60 @@ TEMPLATE_JSON_DOCUMENT_GENERATION = """{
"filename": "document.json",
"sections": [
{
- "id": "section_1",
- "content_type": "heading|paragraph|table|list|code",
+ "id": "section_heading_example",
+ "content_type": "heading",
"elements": [
- // heading: {"level": 1, "text": "..."}
- // paragraph: {"text": "..."}
- // table: {"headers": [...], "rows": [[...]], "caption": "..."}
- // list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
- // code: {"code": "...", "language": "..."}
+ {"level": 1, "text": "Heading Text"}
],
- "order": 1
+ "order": 0
+ },
+ {
+ "id": "section_paragraph_example",
+ "content_type": "paragraph",
+ "elements": [
+ {"text": "Paragraph text content"}
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_list_example",
+ "content_type": "list",
+ "elements": [
+ {
+ "items": [
+ {"text": "Item 1"},
+ {"text": "Item 2"}
+ ],
+ "list_type": "numbered"
+ }
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_table_example",
+ "content_type": "table",
+ "elements": [
+ {
+ "headers": ["Column 1", "Column 2"],
+ "rows": [
+ ["Row 1 Col 1", "Row 1 Col 2"],
+ ["Row 2 Col 1", "Row 2 Col 2"]
+ ],
+ "caption": "Table caption"
+ }
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_code_example",
+ "content_type": "code",
+ "elements": [
+ {
+ "code": "function example() { return true; }",
+ "language": "javascript"
+ }
+ ],
+ "order": 0
}
]
}
@@ -48,10 +93,10 @@ async def buildGenerationPrompt(
) -> str:
"""
Build the unified generation prompt using a single JSON template.
- Simplified version without continuation logic in prompt.
+ Generic solution that works for any user request.
Args:
- outputFormat: Target output format (html, pdf, docx, etc.)
+ outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
userPrompt: User's original prompt for document generation
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
@@ -64,63 +109,88 @@ async def buildGenerationPrompt(
title_value = title if title else "Generated Document"
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
- # Check if this is a continuation request
- if continuationContext and continuationContext.get("section_count", 0) > 0:
- # Continuation prompt - simple and focused
- section_count = continuationContext.get("section_count", 0)
- next_order = continuationContext.get("next_order", 1)
- last_content_sample = continuationContext.get("last_content_sample", "")
+ # Build prompt based on whether this is a continuation or first call
+ # Check if we have valid continuation context with actual JSON fragment
+ has_continuation = (
+ continuationContext
+ and continuationContext.get("section_count", 0) > 0
+ and continuationContext.get("last_raw_json", "")
+ and continuationContext.get("last_raw_json", "").strip() != "{}"
+ )
+
+ if has_continuation:
+ # CONTINUATION PROMPT - user already received first part, continue from where it stopped
+ last_raw_json = continuationContext.get("last_raw_json", "")
+ last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
+ last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
+ total_items_count = continuationContext.get("total_items_count", 0)
- generation_prompt = f"""Continue generating structured JSON content.
+ # Show the last few items to indicate where to continue (limit fragment size)
+ # Extract just the ending portion of the JSON to show where it cut off
+ fragment_snippet = ""
+ if last_raw_json:
+ # Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
+ fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
+ # Add ellipsis if truncated
+ if len(last_raw_json) > 1500:
+ fragment_snippet = "..." + fragment_snippet
+
+ # Build clear continuation guidance
+ continuation_guidance = []
+
+ if total_items_count > 0:
+ continuation_guidance.append(f"You have already generated {total_items_count} items.")
+
+ # Show the last complete item object (full object format)
+ if last_item_object:
+ continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
+
+ continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
+
+ generation_prompt = f"""User request: "{userPrompt}"
-ORIGINAL REQUEST: "{userPrompt}"
-TARGET FORMAT: {outputFormat}
-TITLE: "{title_value}"
+The user already received part of the response. Continue generating the remaining content.
-CONTEXT - Already generated:
-- Total sections generated: {section_count}
-- Next section order: {next_order}
-- Last content: {last_content_sample}
+{continuation_text}
-YOUR TASK:
-Continue where previous generation stopped.
-Generate the NEXT section(s) starting with section_{next_order}.
-Generate as much content as possible.
+Previous response ended here (JSON was cut off at this point):
+```json
+{fragment_snippet if fragment_snippet else "(No fragment available)"}
+```
-RULES:
-- Follow the JSON template structure below exactly
-- Fill sections with ACTUAL data based on the user request
-- Use appropriate content_type for the data
-- Generate REAL content, not summaries or placeholders
-- Generate multiple sections if possible
-
-Return raw JSON (no ```json blocks, no text before/after)
-
-JSON Template
+JSON structure template:
{json_template}
+
+Instructions:
+- Return full JSON structure (metadata + documents + sections)
+- Continue from where it stopped - add NEW items only, do not repeat old items
+- Use the element structures shown in the template
+- Generate all remaining content needed to complete the user request
+- Fill with actual content (no comments, no "Add more..." text, no placeholders)
+- When fully complete, add "complete_response": true at root level
+- Return only valid JSON (no comments, no markdown blocks)
+
+Continue generating:
"""
else:
- # First call - simple prompt without continuation complexity
- generation_prompt = f"""Generate structured JSON content for document creation.
+ # FIRST CALL - initial generation
+ generation_prompt = f"""User request: "{userPrompt}"
-USER REQUEST: "{userPrompt}"
-TARGET FORMAT: {outputFormat}
-TITLE: "{title_value}"
+Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
-INSTRUCTIONS:
-- Follow the JSON template structure below exactly
-- Emit only one JSON object in the response
-- Fill sections with ACTUAL data based on the user request
-- Use appropriate content_type for each section
-- Generate REAL content, not summaries or instructions
-- Structure content in sections with order 1, 2, 3...
-- Each section should be complete before next
-- Generate as much content as possible
-
-Return raw JSON (no ```json blocks, no text before/after)
-
-JSON Template
+JSON structure template (reference only - shows the pattern):
{json_template}
+
+Instructions:
+- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
+- Do NOT continue from the template examples above - create your own sections
+- Generate content based on the user request
+- Use the element structures shown in the template (heading, paragraph, list, table, code)
+- Create your own section IDs (do not use the example IDs like "section_heading_example")
+- When fully complete, add "complete_response": true at root level
+- Return only valid JSON (no comments, no markdown blocks, no text before/after)
+
+Generate your complete response starting from {{"metadata": ...}}:
"""
# If we have extracted content, prepend it to the prompt
diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py
index 92c6dd84..12b044f1 100644
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@@ -138,42 +138,73 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
"""
Attempt to repair broken JSON using multiple strategies.
+ Generic solution that works for any content type.
Returns the best repair attempt or None if all fail.
"""
if not text:
return None
- # Strategy 1: Progressive parsing - try to find longest valid prefix
+ # Strategy 1: Try to extract sections from the entire text first
+ # This handles cases where the JSON structure is broken but content is intact
+ extracted_sections = _extractSectionsRegex(text)
+ if extracted_sections:
+ logger.info(f"Extracted {len(extracted_sections)} sections using regex")
+ return {
+ "metadata": {
+ "split_strategy": "single_document",
+ "source_documents": [],
+ "extraction_method": "ai_generation"
+ },
+ "documents": [{"sections": extracted_sections}]
+ }
+
+ # Strategy 2: Progressive parsing - try to find longest valid prefix
best_result = None
best_valid_length = 0
- for i in range(len(text), 0, -1):
- test_str = text[:i]
- closed_str = _closeJsonStructures(test_str)
- obj, err, _ = tryParseJson(closed_str)
- if err is None and isinstance(obj, dict):
- best_result = obj
- best_valid_length = i
- logger.debug(f"Progressive parsing success at length {i}")
+ # Try different step sizes to find the best valid JSON
+ for step_size in [100, 50, 10, 1]:
+ for i in range(len(text), 0, -step_size):
+ test_str = text[:i]
+ closed_str = _closeJsonStructures(test_str)
+ obj, err, _ = tryParseJson(closed_str)
+ if err is None and isinstance(obj, dict):
+ best_result = obj
+ best_valid_length = i
+ logger.debug(f"Progressive parsing success at length {i} (step: {step_size})")
+ break
+ if best_result:
break
if best_result:
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
- return best_result
+
+ # Check if we have sections in the result
+ sections = extractSectionsFromDocument(best_result)
+ if sections:
+ logger.info(f"Progressive parsing found {len(sections)} sections")
+ return best_result
+ else:
+ # No sections found in progressive parsing, try to extract from broken part
+ logger.info("Progressive parsing found no sections, trying to extract from broken part")
+ extracted_sections = _extractSectionsRegex(text[best_valid_length:])
+ if extracted_sections:
+ logger.info(f"Extracted {len(extracted_sections)} sections from broken part")
+ # Merge with the valid part
+ if "documents" not in best_result:
+ best_result["documents"] = []
+ if not best_result["documents"]:
+ best_result["documents"] = [{"sections": []}]
+ best_result["documents"][0]["sections"].extend(extracted_sections)
+ return best_result
- # Strategy 2: Structure closing - close incomplete structures
+ # Strategy 3: Structure closing - close incomplete structures
closed_str = _closeJsonStructures(text)
obj, err, _ = tryParseJson(closed_str)
if err is None and isinstance(obj, dict):
logger.info("Repaired JSON using structure closing")
return obj
- # Strategy 3: Regex extraction (fallback for completely broken JSON)
- extracted = _extractSectionsRegex(text)
- if extracted:
- logger.info("Repaired JSON using regex extraction")
- return {"documents": [{"sections": extracted}]}
-
logger.warning("All repair strategies failed")
return None
@@ -204,7 +235,7 @@ def _closeJsonStructures(text: str) -> str:
def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
"""
Extract sections from broken JSON using regex patterns.
- Fallback strategy when JSON is completely corrupted.
+ Generic solution that works for any content type.
"""
import re
@@ -218,10 +249,10 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
content_type = match.group(2)
order = int(match.group(3))
- # Try to extract elements array
+ # Try to extract elements array - look for the elements array after this section
elements_match = re.search(
r'"elements"\s*:\s*\[(.*?)\]',
- text[match.end():match.end()+500] # Look ahead for elements
+ text[match.end():match.end()+5000] # Look ahead for elements (large range)
)
elements = []
@@ -230,7 +261,9 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
elements_str = '[' + elements_match.group(1) + ']'
elements = json.loads(elements_str)
except:
- pass
+ # If JSON parsing fails, try to extract individual items manually
+ elements_text = elements_match.group(1)
+ elements = _extractElementsFromText(elements_text, content_type)
sections.append({
"id": section_id,
@@ -239,6 +272,243 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
"order": order
})
+ # If no sections found with the main pattern, try to find any content patterns
+ if not sections:
+ sections = _extractGenericContent(text)
+
+ return sections
+
+
+def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]:
+ """
+ Extract elements from text when JSON parsing fails.
+ Generic approach that works for any content type.
+ Handles incomplete strings and corrupted data.
+ Excludes the last incomplete item to prevent corrupted data.
+ """
+ import re
+
+ elements = []
+
+ if content_type == "list":
+ # Look for {"text": "..."} patterns, including incomplete ones
+ text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
+ # Also look for incomplete patterns like {"text": "36
+ incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+
+ # Combine both complete and incomplete items
+ all_items = text_items + incomplete_items
+ # Remove duplicates and empty strings
+ unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_items:
+ unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+
+ elements = [{"text": item} for item in unique_items]
+
+ elif content_type == "paragraph":
+ # Look for {"text": "..."} patterns, including incomplete ones
+ text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
+ incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+
+ all_items = text_items + incomplete_items
+ unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_items:
+ unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+
+ elements = [{"text": item} for item in unique_items]
+
+ elif content_type == "heading":
+ # Look for {"level": X, "text": "..."} patterns, including incomplete ones
+ heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text)
+ incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+
+ all_items = heading_items + incomplete_heading_items
+ unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_items:
+ unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+
+ elements = [{"level": level, "text": text} for level, text in unique_items]
+
+ elif content_type == "table":
+ # Look for table patterns
+ table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text)
+ for headers_str, rows_str, caption in table_items:
+ # Extract headers
+ headers = re.findall(r'"([^"]+)"', headers_str)
+ # Extract rows (simplified)
+ rows = []
+ row_matches = re.findall(r'\[(.*?)\]', rows_str)
+ for row_match in row_matches:
+ row_items = re.findall(r'"([^"]+)"', row_match)
+ rows.append(row_items)
+
+ elements.append({
+ "headers": headers,
+ "rows": rows,
+ "caption": caption
+ })
+
+ elif content_type == "code":
+ # Look for {"code": "...", "language": "..."} patterns, including incomplete ones
+ code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text)
+ incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+
+ all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
+ unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_items:
+ unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+
+ elements = [{"code": code, "language": lang} for code, lang in unique_items]
+
+ else:
+ # Generic fallback - look for any text content, including incomplete
+ text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text)
+ incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+
+ all_items = text_items + incomplete_text_items
+ unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_items:
+ unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+
+ elements = [{"text": item} for item in unique_items]
+
+ return elements
+
+
+def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str]:
+ """
+ Remove the last item if it appears to be incomplete/corrupted.
+ This prevents corrupted data from being included in the final result.
+ """
+ import re
+
+ if not items:
+ return items
+
+ # Check if the original text ends with incomplete JSON patterns
+ # Look for patterns that suggest the last item was cut off
+
+ # Pattern 1: Text ends with incomplete string like {"text": "36
+ if re.search(r'\{"[^"]*"\s*:\s*"[^"]*$', original_text):
+ logger.debug("Detected incomplete string at end - removing last item")
+ return items[:-1]
+
+ # Pattern 2: Text ends with incomplete boolean like {"bool_flag": tr
+ if re.search(r'\{"[^"]*"\s*:\s*(true|false|tr|fa)$', original_text):
+ logger.debug("Detected incomplete boolean at end - removing last item")
+ return items[:-1]
+
+ # Pattern 3: Text ends with incomplete number like {"number": 123
+ if re.search(r'\{"[^"]*"\s*:\s*\d+$', original_text):
+ logger.debug("Detected incomplete number at end - removing last item")
+ return items[:-1]
+
+ # Pattern 4: Text ends with incomplete array like {"array": [1,2,3
+ if re.search(r'\{"[^"]*"\s*:\s*\[[^\]]*$', original_text):
+ logger.debug("Detected incomplete array at end - removing last item")
+ return items[:-1]
+
+ # Pattern 5: Text ends with incomplete object like {"obj": {"key": "val
+ if re.search(r'\{"[^"]*"\s*:\s*\{[^}]*$', original_text):
+ logger.debug("Detected incomplete object at end - removing last item")
+ return items[:-1]
+
+ # Pattern 6: Text ends with trailing comma (common sign of incomplete JSON)
+ if original_text.rstrip().endswith(','):
+ logger.debug("Detected trailing comma - removing last item")
+ return items[:-1]
+
+ # If no incomplete patterns detected, return all items
+ return items
+
+
+def _extractGenericContent(text: str) -> List[Dict[str, Any]]:
+ """
+ Extract generic content when no specific section patterns are found.
+ This handles cases where the JSON structure is completely broken.
+ Handles incomplete strings and corrupted data.
+ Excludes the last incomplete item to prevent corrupted data.
+ """
+ import re
+
+ sections = []
+
+ # Look for any structured content patterns
+ # Pattern 1: Look for list items {"text": "..."}, including incomplete ones
+ list_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', text)
+ incomplete_list_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', text)
+
+ all_list_items = list_items + incomplete_list_items
+ unique_list_items = list(dict.fromkeys([item for item in all_list_items if item.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_list_items:
+ unique_list_items = _removeLastIncompleteItem(unique_list_items, text)
+
+ if unique_list_items:
+ elements = [{"text": item} for item in unique_list_items]
+ sections.append({
+ "id": "section_1",
+ "content_type": "list",
+ "elements": elements,
+ "order": 1
+ })
+
+ # Pattern 2: Look for paragraph text {"text": "..."}, including incomplete ones
+ elif re.search(r'\{"text"\s*:\s*"[^"]*\}', text):
+ # Extract all text elements, including incomplete ones
+ text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', text)
+ incomplete_text_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', text)
+
+ all_text_items = text_items + incomplete_text_items
+ unique_text_items = list(dict.fromkeys([item for item in all_text_items if item.strip()]))
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if unique_text_items:
+ unique_text_items = _removeLastIncompleteItem(unique_text_items, text)
+
+ if unique_text_items:
+ elements = [{"text": item} for item in unique_text_items]
+ sections.append({
+ "id": "section_1",
+ "content_type": "paragraph",
+ "elements": elements,
+ "order": 1
+ })
+
+ # Pattern 3: Look for any quoted strings that might be content, including incomplete ones
+ elif re.search(r'"([^"]{3,})"', text): # Strings longer than 3 chars (reduced threshold)
+ # Extract longer quoted strings, including incomplete ones
+ text_items = re.findall(r'"([^"]{3,})"', text)
+ incomplete_text_items = re.findall(r'"([^"]{3,}?)(?:\n|$)', text)
+
+ all_text_items = text_items + incomplete_text_items
+ # Filter out likely JSON keys
+ content_items = [item for item in all_text_items if not item.startswith(('section_', 'doc_', 'metadata', 'split_strategy', 'source_documents', 'extraction_method', 'id', 'content_type', 'elements', 'order', 'title', 'filename'))]
+
+ # Remove the last item if it appears to be incomplete/corrupted
+ if content_items:
+ content_items = _removeLastIncompleteItem(content_items, text)
+
+ if content_items:
+ elements = [{"text": item} for item in content_items[:10]] # Limit to first 10 items
+ sections.append({
+ "id": "section_1",
+ "content_type": "paragraph",
+ "elements": elements,
+ "order": 1
+ })
+
return sections
@@ -324,33 +594,295 @@ def extractContentSample(section: Dict[str, Any]) -> str:
return "Content exists"
-def buildContinuationContext(allSections: List[Dict[str, Any]]) -> Dict[str, Any]:
+def _buildDetailedContinuationInfo(section: Dict[str, Any], content_type: str) -> Dict[str, Any]:
"""
- Build context information from accumulated sections for continuation prompt.
- Returns dict with metadata about what was already generated.
+ Build detailed continuation information for better AI guidance.
+ Completely generic - works for any content type (list, paragraph, code, table, etc.)
"""
- if not allSections:
+ elements = section.get("elements", [])
+
+ if not elements:
return {
- "section_count": 0,
- "next_order": 1,
- "last_content_sample": "No content yet"
+ "type": "continue_general",
+ "sample": extractContentSample(section),
+ "last_item": "",
+ "item_count": 0,
+ "guidance": "Continue generating content in the same format and style."
}
- # Sort sections by order
- sorted_sections = sorted(allSections, key=lambda s: s.get("order", 0))
+ # Count elements regardless of type
+ element_count = len(elements)
- last_section = sorted_sections[-1]
- last_order = last_section.get("order", 0)
-
- # Get content sample from last section
- last_content_sample = extractContentSample(last_section)
+ # Extract sample for context - completely generic
+ sample = extractContentSample(section)
+ # Generic continuation guidance - applies to ANY content type
+ # Tell AI to generate ALL REMAINING content to complete the user request
return {
- "section_count": len(allSections),
- "last_section_id": last_section.get("id", ""),
- "last_order": last_order,
- "next_order": last_order + 1,
- "last_content_type": last_section.get("content_type", ""),
- "last_content_sample": last_content_sample
+ "type": "continue_general",
+ "sample": sample,
+ "last_item": "",
+ "item_count": element_count,
+ "guidance": "Generate ALL remaining content to complete the user's request. Continue from where you left off and finish everything that was requested."
}
+
+def _extractLastItemsFromFragment(fragment: str, max_items: int = 10) -> str:
+ """
+ Extract the last few items from a JSON fragment for continuation context.
+ Uses JSON structure (sections -> elements -> items) - fully generic.
+ Works with broken/incomplete JSON by trying to parse and extract sections.
+ """
+ if not fragment:
+ return ""
+
+ # Strategy 1: Try to parse as JSON and extract from structure
+ try:
+ # Try to repair and parse the fragment
+ parsed = repairBrokenJson(fragment)
+ if parsed:
+ # Extract sections from parsed JSON using structure
+ sections = extractSectionsFromDocument(parsed)
+ if sections:
+ # Get the last section (likely where continuation should happen)
+ sorted_sections = sorted(sections, key=lambda s: s.get("order", 0))
+ last_section = sorted_sections[-1]
+ elements = last_section.get("elements", [])
+
+ if elements and isinstance(elements, list):
+ content_type = last_section.get("content_type", "").lower()
+
+ # For list content_type, extract from items array
+ if content_type == "list" and len(elements) > 0:
+ last_element = elements[-1]
+ if isinstance(last_element, dict):
+ # Check if it has an "items" array (list structure)
+ if "items" in last_element and isinstance(last_element["items"], list):
+ items_list = last_element["items"]
+ if items_list:
+ # Get last max_items from this items array
+ last_items = items_list[-max_items:] if len(items_list) > max_items else items_list
+ # Extract text from each item
+ texts = []
+ for item in last_items:
+ if isinstance(item, dict) and "text" in item:
+ texts.append(str(item["text"]))
+ if texts:
+ return ', '.join(texts)
+
+ # Or if elements themselves are items (alternative structure)
+ elif "text" in last_element:
+ # Get last max_items elements that have text
+ elements_with_text = [e for e in elements if isinstance(e, dict) and "text" in e]
+ if elements_with_text:
+ last_elements = elements_with_text[-max_items:] if len(elements_with_text) > max_items else elements_with_text
+ texts = [str(e.get("text", "")) for e in last_elements]
+ if texts:
+ return ', '.join(texts)
+
+ # For other content types, extract from elements
+ elif len(elements) > 0:
+ # Get last max_items elements that have text/code
+ valid_elements = [e for e in elements if isinstance(e, dict) and ("text" in e or "code" in e)]
+ if valid_elements:
+ last_elements = valid_elements[-max_items:] if len(valid_elements) > max_items else valid_elements
+ texts = []
+ for elem in last_elements:
+ if "text" in elem:
+ texts.append(str(elem["text"]))
+ elif "code" in elem:
+ # For code, show snippet
+ code = str(elem["code"])
+ texts.append(code[:50] + "..." if len(code) > 50 else code)
+ if texts:
+ return ', '.join(texts)
+ except Exception as e:
+ logger.debug(f"Could not extract items from fragment using JSON structure: {e}")
+
+ # Strategy 2: If parsing failed, try progressive parsing from the end
+ # Look for the last complete JSON structures near the end
+ try:
+ # Try parsing different lengths from the end
+ for length in [3000, 2000, 1000, 500]:
+ if len(fragment) > length:
+ end_portion = fragment[-length:]
+ closed = _closeJsonStructures(end_portion)
+ obj, err, _ = tryParseJson(closed)
+ if err is None and isinstance(obj, dict):
+ # Successfully parsed - extract sections
+ sections = extractSectionsFromDocument(obj)
+ if sections:
+ # Same extraction logic as above
+ sorted_sections = sorted(sections, key=lambda s: s.get("order", 0))
+ if sorted_sections:
+ last_section = sorted_sections[-1]
+ elements = last_section.get("elements", [])
+ if elements:
+ # Extract texts using same logic as Strategy 1
+ texts = []
+ for elem in elements[-max_items:]:
+ if isinstance(elem, dict):
+ if "items" in elem and isinstance(elem["items"], list):
+ # Get last item from items array
+ if elem["items"]:
+ last_item = elem["items"][-1]
+ if isinstance(last_item, dict) and "text" in last_item:
+ texts.append(str(last_item["text"]))
+ elif "text" in elem:
+ texts.append(str(elem["text"]))
+ if texts:
+ return ', '.join(texts[-max_items:])
+ except Exception as e:
+ logger.debug(f"Progressive parsing from end failed: {e}")
+
+ # Strategy 3: If all parsing fails, try simple extraction from raw fragment
+ # Look for last complete {"text": "..."} pattern near the end
+ try:
+ # Look at last 2000 chars for the pattern
+ end_portion = fragment[-2000:] if len(fragment) > 2000 else fragment
+ # Find all {"text": "value"} patterns
+ import re
+ # Pattern to match {"text": "..."} with escaped quotes
+ pattern = r'\{"text"\s*:\s*"([^"]+)"\}'
+ matches = re.findall(pattern, end_portion)
+ if matches:
+ # Get last max_items
+ last_matches = matches[-max_items:] if len(matches) > max_items else matches
+ return ', '.join(last_matches)
+ except Exception as e:
+ logger.debug(f"Simple pattern extraction failed: {e}")
+
+ # Strategy 4: If all fails, return empty (will use last_item_from_sections)
+ return ""
+
+
+def buildContinuationContext(allSections: List[Dict[str, Any]], lastRawResponse: Optional[str] = None) -> Dict[str, Any]:
+ """
+ Build context information from accumulated sections for continuation prompt.
+ Extracts last items and provides clear continuation point.
+
+ Args:
+ allSections: List of sections already generated
+ lastRawResponse: Raw JSON response from last iteration (can be broken/incomplete)
+
+ Returns:
+ Dict with section_count, last_raw_json, last_items, and continuation point
+ """
+ context = {
+ "section_count": len(allSections),
+ }
+
+ # Extract last COMPLETE object directly from raw response (generic - works for any structure)
+ # This is extracted BEFORE any merging/accumulation happens
+ # Returns the full last complete object like {"text": "..."} or {"code": "...", "language": "..."} etc.
+ # Logic: find the last complete {...} where there are no nested { inside (flat object)
+ last_complete_object = "" # Full object as JSON string
+ total_items_count = 0
+
+ if lastRawResponse:
+ raw_json = stripCodeFences(lastRawResponse.strip())
+ if raw_json and raw_json.strip() != "{}":
+ # Find last complete flat object (no nested objects inside)
+ # Scan from the end backwards to find the last complete {...} object
+ # A flat object is complete if: starts with {, ends with }, and has no nested { inside
+
+ # Work backwards from the end, find last }
+ for i in range(len(raw_json) - 1, -1, -1):
+ if raw_json[i] == '}':
+ # Found a closing brace, work backwards to find its opening brace
+ depth = 1
+ opening_pos = -1
+
+ for j in range(i - 1, -1, -1):
+ if raw_json[j] == '}':
+ depth += 1
+ elif raw_json[j] == '{':
+ depth -= 1
+ if depth == 0:
+ # Found matching opening brace
+ opening_pos = j
+ # Check if this is a flat object (no nested { inside)
+ obj_content = raw_json[j + 1:i]
+ if '{' not in obj_content:
+ # This is a flat object (no nested objects inside)
+ last_complete_object = raw_json[j:i + 1]
+ break
+
+ if last_complete_object:
+ break
+
+ # Also try structure-based parsing for item count
+ try:
+ parsed = repairBrokenJson(raw_json)
+ if parsed:
+ sections = extractSectionsFromDocument(parsed)
+ if sections:
+ sorted_sections = sorted(sections, key=lambda s: s.get("order", 0))
+ last_section = sorted_sections[-1]
+ elements = last_section.get("elements", [])
+
+ if elements and isinstance(elements, list) and len(elements) > 0:
+ if last_section.get("content_type") == "list":
+ last_element = elements[-1]
+ if isinstance(last_element, dict):
+ if "items" in last_element and isinstance(last_element["items"], list):
+ items_list = last_element["items"]
+ # Only count complete items (those successfully extracted)
+ total_items_count = len(items_list)
+ except Exception as e:
+ logger.debug(f"Could not extract item count from raw response structure: {e}")
+
+ # Also extract last items for display (fragment extraction)
+ last_items_from_fragment = _extractLastItemsFromFragment(raw_json, max_items=10)
+
+ context["last_raw_json"] = raw_json
+ context["last_item_object"] = last_complete_object # Full last complete object (generic - any structure)
+ context["last_items_from_fragment"] = last_items_from_fragment
+ context["total_items_count"] = total_items_count # Count from raw response
+
+ logger.debug(f"Included previous JSON response in continuation context ({len(raw_json)} chars, {total_items_count} items in response, last complete object: {last_complete_object})")
+ else:
+ logger.warning("lastRawResponse was empty or just '{}' - continuation may not work correctly")
+ else:
+ # No raw response - fallback to extracting from accumulated sections
+ # Extract the last complete object from the last element
+ last_item_object_from_sections = ""
+ if allSections:
+ sorted_sections = sorted(allSections, key=lambda s: s.get("order", 0))
+ last_section = sorted_sections[-1]
+ elements = last_section.get("elements", [])
+
+ if elements and isinstance(elements, list) and len(elements) > 0:
+ # Get the last element (could be any structure - generic)
+ last_element = elements[-1]
+ if isinstance(last_element, dict):
+ # Try to get items if it's a list structure
+ if "items" in last_element and isinstance(last_element["items"], list):
+ items_list = last_element["items"]
+ total_items_count = len(items_list)
+ if items_list:
+ # Get last item (any structure)
+ last_item = items_list[-1]
+ if isinstance(last_item, dict):
+ # Convert to JSON string (generic - works for any object structure)
+ import json
+ try:
+ last_item_object_from_sections = json.dumps(last_item)
+ except:
+ pass
+ else:
+ # Element itself is the object (no items array)
+ total_items_count = len(elements)
+ # Convert to JSON string (generic)
+ import json
+ try:
+ last_item_object_from_sections = json.dumps(last_element)
+ except:
+ pass
+
+ context["last_item_object"] = last_item_object_from_sections
+ context["total_items_count"] = total_items_count
+ logger.debug(f"No previous raw response available for continuation context (but have {total_items_count} items accumulated, last item object: {last_item_object_from_sections})")
+
+ return context
+
diff --git a/test4_method_ai_operations.py b/test4_method_ai_operations.py
index e0bd5861..dc09ea9a 100644
--- a/test4_method_ai_operations.py
+++ b/test4_method_ai_operations.py
@@ -50,7 +50,7 @@ class MethodAiOperationsTester:
"resultType": "json"
},
OperationTypeEnum.DATA_GENERATE: {
- "aiPrompt": "Generate the first 9000 prime numbers.",
+ "aiPrompt": "Generate the first 4000 prime numbers.",
"resultType": "txt"
},
OperationTypeEnum.DATA_EXTRACT: {