ai loop: generic continuation logic
This commit is contained in:
parent
be2934d54a
commit
24f152d0b9
9 changed files with 433 additions and 209 deletions
|
|
@ -461,18 +461,16 @@ class AiObjects:
|
|||
# Calculate input bytes from prompt and context
|
||||
inputBytes = len((prompt + context).encode('utf-8'))
|
||||
|
||||
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
|
||||
# Use maxTokens for output limit, not contextLength
|
||||
if model.maxTokens > 0:
|
||||
tokenLimit = str(model.maxTokens)
|
||||
# Replace <TOKEN_LIMIT> placeholder with model's maxTokens value
|
||||
if "<TOKEN_LIMIT>" in prompt:
|
||||
if model.maxTokens > 0:
|
||||
tokenLimit = str(model.maxTokens)
|
||||
modelPrompt = prompt.replace("<TOKEN_LIMIT>", tokenLimit)
|
||||
logger.debug(f"Replaced <TOKEN_LIMIT> with {tokenLimit} for model {model.name}")
|
||||
else:
|
||||
raise ValueError(f"Model {model.name} has invalid maxTokens ({model.maxTokens}). Cannot set token limit.")
|
||||
else:
|
||||
tokenLimit = "16000" # Default for text generation
|
||||
|
||||
# Create a copy of the prompt for this model call
|
||||
modelPrompt = prompt
|
||||
if "<TOKEN_LIMIT>" in modelPrompt:
|
||||
modelPrompt = modelPrompt.replace("<TOKEN_LIMIT>", tokenLimit)
|
||||
logger.debug(f"Replaced <TOKEN_LIMIT> with {tokenLimit} for model {model.name}")
|
||||
modelPrompt = prompt
|
||||
|
||||
# Update messages array with replaced content
|
||||
messages = []
|
||||
|
|
@ -483,11 +481,6 @@ class AiObjects:
|
|||
# Start timing
|
||||
startTime = time.time()
|
||||
|
||||
# Get the connector for this model
|
||||
connector = modelRegistry.getConnectorForModel(model.name)
|
||||
if not connector:
|
||||
raise ValueError(f"No connector found for model {model.name}")
|
||||
|
||||
# Call the model's function directly - completely generic
|
||||
if model.functionCall:
|
||||
# Create standardized call object
|
||||
|
|
|
|||
|
|
@ -139,8 +139,7 @@ class AiService:
|
|||
) -> Union[str, Dict[str, Any]]:
|
||||
"""Document generation AI call for all non-planning calls."""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
# Use "json" for document generation calls since they return JSON
|
||||
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title, "json")
|
||||
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
|
||||
|
||||
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
||||
"""Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
|
||||
|
|
|
|||
|
|
@ -9,24 +9,17 @@ from modules.services.serviceAi.subSharedAiUtils import (
|
|||
reduceText,
|
||||
determineCallType
|
||||
)
|
||||
from modules.shared.jsonUtils import (
|
||||
extractJsonString,
|
||||
repairBrokenJson,
|
||||
extractSectionsFromDocument,
|
||||
buildContinuationContext
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Generic continuation instruction for all prompts with JSON responses
|
||||
# Used by _callAiWithLooping() to replace LOOP_INSTRUCTION placeholder
|
||||
LOOP_INSTRUCTION_TEXT = """
|
||||
MANDATORY RULE:
|
||||
Return ONLY raw JSON (no ```json blocks, no text before/after)
|
||||
|
||||
CONTINUATION REQUIREMENT:
|
||||
Your response must be a valid JSON object with a "continuation" field.
|
||||
|
||||
- If you can complete the FULL request: Set {"continuation": null}
|
||||
- If you MUST stop early (due to token limits): Set {"continuation": {"last_data_items": "brief summary of what was delivered for context", "next_instruction": "what to deliver next to complete the request"}}
|
||||
|
||||
The "continuation" field controls whether this AI call continues in a loop or stops.
|
||||
Refer to the json template below to see where to set the "continuation" information.
|
||||
"""
|
||||
# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT
|
||||
# Sections are accumulated and repair mechanism handles broken JSON automatically
|
||||
|
||||
# Rebuild the model to resolve forward references
|
||||
AiCallRequest.model_rebuild()
|
||||
|
|
@ -126,7 +119,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
|
||||
|
||||
# Shared Core Function for AI Calls with Looping
|
||||
# Shared Core Function for AI Calls with Looping and Repair
|
||||
async def _callAiWithLooping(
|
||||
self,
|
||||
prompt: str,
|
||||
|
|
@ -134,9 +127,8 @@ Respond with ONLY a JSON object in this exact format:
|
|||
debugPrefix: str = "ai_call"
|
||||
) -> str:
|
||||
"""
|
||||
Shared core function for AI calls with looping system.
|
||||
Handles continuation logic when response needs multiple rounds.
|
||||
Delivers prompt and response to debug file log.
|
||||
Shared core function for AI calls with repair-based looping system.
|
||||
Automatically repairs broken JSON and continues generation seamlessly.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send to AI
|
||||
|
|
@ -146,42 +138,28 @@ Respond with ONLY a JSON object in this exact format:
|
|||
Returns:
|
||||
Complete AI response after all iterations
|
||||
"""
|
||||
max_iterations = 100 # Prevent infinite loops
|
||||
max_iterations = 50 # Prevent infinite loops
|
||||
iteration = 0
|
||||
accumulatedContent = []
|
||||
lastContinuationData = None
|
||||
|
||||
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix})")
|
||||
|
||||
# Use generic LOOP_INSTRUCTION_TEXT
|
||||
loopInstruction = LOOP_INSTRUCTION_TEXT if ("LOOP_INSTRUCTION" in prompt) else ""
|
||||
allSections = [] # Accumulate all sections across iterations
|
||||
|
||||
logger.debug(f"Starting AI call with repair-based looping (debug prefix: {debugPrefix})")
|
||||
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
logger.debug(f"AI call iteration {iteration}/{max_iterations}")
|
||||
|
||||
# Build iteration prompt
|
||||
if iteration == 1:
|
||||
# First iteration - replace LOOP_INSTRUCTION with standardized instruction
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
if len(allSections) > 0:
|
||||
# This is a continuation - build continuation context
|
||||
continuationContext = buildContinuationContext(allSections)
|
||||
logger.info(f"Continuation context: {continuationContext.get('section_count')} sections, next order: {continuationContext.get('next_order')}")
|
||||
|
||||
# If prompt contains a placeholder for continuation, inject the context
|
||||
# For now, we'll handle this at the calling code level
|
||||
iterationPrompt = prompt
|
||||
else:
|
||||
# Subsequent iterations - include continuation data if available
|
||||
if lastContinuationData and isinstance(lastContinuationData, dict):
|
||||
continuationPrompt = self._buildContinuationPrompt(lastContinuationData, iteration)
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationPrompt}\n\n{loopInstruction}")
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
else:
|
||||
# No continuation data - re-send original prompt
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
# First iteration - use original prompt
|
||||
iterationPrompt = prompt
|
||||
|
||||
# Make AI call
|
||||
try:
|
||||
|
|
@ -192,12 +170,10 @@ Respond with ONLY a JSON object in this exact format:
|
|||
options=options
|
||||
)
|
||||
|
||||
# Write the ACTUAL prompt sent to AI (including continuation context)
|
||||
# Write the ACTUAL prompt sent to AI
|
||||
if iteration == 1:
|
||||
# First iteration - use the historic naming pattern
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||
else:
|
||||
# Subsequent iterations - include iteration number
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
|
|
@ -205,10 +181,8 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
# Write raw AI response to debug file
|
||||
if iteration == 1:
|
||||
# First iteration - use the historic naming pattern
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||
else:
|
||||
# Subsequent iterations - include iteration number
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||
|
||||
# Emit stats for this iteration
|
||||
|
|
@ -222,35 +196,24 @@ Respond with ONLY a JSON object in this exact format:
|
|||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||
break
|
||||
|
||||
accumulatedContent.append(result)
|
||||
# Extract sections from response (handles both valid and broken JSON)
|
||||
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
||||
|
||||
# Check if this is a continuation response (only when LOOP_INSTRUCTION was used)
|
||||
if loopInstruction:
|
||||
try:
|
||||
# Extract JSON substring if wrapped (e.g., ```json ... ```)
|
||||
extracted = self.services.utils.jsonExtractString(result)
|
||||
parsed_result = json.loads(extracted)
|
||||
|
||||
if isinstance(parsed_result, dict):
|
||||
continuation = parsed_result.get("continuation")
|
||||
|
||||
if continuation is None:
|
||||
# Final response - break loop
|
||||
logger.debug(f"Iteration {iteration}: Final response received (continuation: null)")
|
||||
break
|
||||
else:
|
||||
# Continuation detected - extract data for next iteration
|
||||
lastContinuationData = continuation if isinstance(continuation, dict) else None
|
||||
logger.debug(f"Iteration {iteration}: Continuation detected, continuing...")
|
||||
continue
|
||||
except json.JSONDecodeError:
|
||||
# Not JSON, treat as final response
|
||||
logger.warning(f"Iteration {iteration}: Non-JSON response - treating as final")
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_error_non_json_response_iteration_{iteration}")
|
||||
break
|
||||
if not extractedSections:
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
||||
break
|
||||
|
||||
# Add new sections to accumulator
|
||||
allSections.extend(extractedSections)
|
||||
logger.info(f"Iteration {iteration}: Extracted {len(extractedSections)} sections (total: {len(allSections)})")
|
||||
|
||||
# Check if we should continue (completion detection)
|
||||
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete):
|
||||
logger.debug(f"Iteration {iteration}: Continuing generation")
|
||||
continue
|
||||
else:
|
||||
# No loop instruction format - treat as final response
|
||||
logger.debug(f"Iteration {iteration}: Final response received (no loop format)")
|
||||
# Done - build final result
|
||||
logger.info(f"Iteration {iteration}: Generation complete")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -260,95 +223,112 @@ Respond with ONLY a JSON object in this exact format:
|
|||
if iteration >= max_iterations:
|
||||
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
|
||||
|
||||
# Intelligently merge JSON content from all iterations
|
||||
final_result = self._mergeJsonContent(accumulatedContent) if accumulatedContent else ""
|
||||
# Build final result from accumulated sections
|
||||
final_result = self._buildFinalResultFromSections(allSections)
|
||||
|
||||
# Write final result to debug file
|
||||
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
||||
|
||||
logger.info(f"AI call completed: {len(accumulatedContent)} parts from {iteration} iterations")
|
||||
logger.info(f"AI call completed: {len(allSections)} total sections from {iteration} iterations")
|
||||
return final_result
|
||||
|
||||
def _buildContinuationPrompt(
|
||||
|
||||
def _extractSectionsFromResponse(
|
||||
self,
|
||||
continuationData: dict,
|
||||
iteration: int
|
||||
) -> str:
|
||||
result: str,
|
||||
iteration: int,
|
||||
debugPrefix: str
|
||||
) -> Tuple[List[Dict[str, Any]], bool]:
|
||||
"""
|
||||
Build standardized continuation prompt from continuation data dict.
|
||||
This replaces the complex _buildContinuationContent method with a simpler approach.
|
||||
|
||||
Args:
|
||||
continuationData: Dictionary containing last_data_items and next_instruction
|
||||
iteration: Current iteration number
|
||||
Extract sections from AI response, handling both valid and broken JSON.
|
||||
Uses repair mechanism for broken JSON.
|
||||
Returns (sections, wasJsonComplete)
|
||||
"""
|
||||
# First, try to parse as valid JSON
|
||||
try:
|
||||
extracted = extractJsonString(result)
|
||||
parsed_result = json.loads(extracted)
|
||||
|
||||
Returns:
|
||||
Formatted continuation prompt string
|
||||
"""
|
||||
last_data_items = continuationData.get("last_data_items", "")
|
||||
next_instruction = continuationData.get("next_instruction", "")
|
||||
# Extract sections from parsed JSON
|
||||
sections = extractSectionsFromDocument(parsed_result)
|
||||
logger.debug(f"Iteration {iteration}: Valid JSON - extracted {len(sections)} sections")
|
||||
return sections, True # JSON was complete
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
# Broken JSON - try repair mechanism
|
||||
logger.warning(f"Iteration {iteration}: Invalid JSON, attempting repair: {str(e)}")
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
||||
|
||||
# Try to repair
|
||||
repaired_json = repairBrokenJson(result)
|
||||
|
||||
if repaired_json:
|
||||
# Extract sections from repaired JSON
|
||||
sections = extractSectionsFromDocument(repaired_json)
|
||||
logger.info(f"Iteration {iteration}: Repaired JSON - extracted {len(sections)} sections")
|
||||
return sections, False # JSON was broken but repaired
|
||||
else:
|
||||
# Repair failed - log error
|
||||
logger.error(f"Iteration {iteration}: All repair strategies failed")
|
||||
return [], False
|
||||
|
||||
continuation_prompt = f"""CONTINUATION REQUEST (Iteration {iteration}):
|
||||
You are continuing a previous response. DO NOT repeat any previous content.
|
||||
|
||||
{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
|
||||
|
||||
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
|
||||
|
||||
CRITICAL REQUIREMENTS:
|
||||
- Start from the exact point specified above
|
||||
- DO NOT repeat any previous content"""
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
||||
return [], False
|
||||
|
||||
def _shouldContinueGeneration(
|
||||
self,
|
||||
allSections: List[Dict[str, Any]],
|
||||
iteration: int,
|
||||
wasJsonComplete: bool
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if generation should continue based on JSON completeness.
|
||||
Returns True if we should continue, False if done.
|
||||
"""
|
||||
if len(allSections) == 0:
|
||||
return True # No sections yet, continue
|
||||
|
||||
return continuation_prompt
|
||||
|
||||
def _mergeJsonContent(self, accumulatedContent: List[str]) -> str:
|
||||
# Simple rule: if JSON was complete, we're done
|
||||
# If JSON was broken and repaired, continue to get more content
|
||||
if wasJsonComplete:
|
||||
logger.info("JSON was complete - stopping generation")
|
||||
return False
|
||||
else:
|
||||
logger.info("JSON was broken/repaired - continuing generation")
|
||||
return True
|
||||
|
||||
def _buildFinalResultFromSections(
|
||||
self,
|
||||
allSections: List[Dict[str, Any]]
|
||||
) -> str:
|
||||
"""
|
||||
Generic JSON merger that combines all lists from multiple iterations.
|
||||
Structure: root attributes + 1..n lists that get merged together.
|
||||
Build final JSON result from accumulated sections.
|
||||
"""
|
||||
if not accumulatedContent:
|
||||
if not allSections:
|
||||
return ""
|
||||
|
||||
if len(accumulatedContent) == 1:
|
||||
return accumulatedContent[0]
|
||||
# Build documents structure
|
||||
# Assuming single document for now
|
||||
documents = [{
|
||||
"id": "doc_1",
|
||||
"title": "Generated Document", # This should come from prompt
|
||||
"filename": "document.json",
|
||||
"sections": allSections
|
||||
}]
|
||||
|
||||
try:
|
||||
|
||||
# Parse all JSON responses
|
||||
parsed_responses = []
|
||||
for content in accumulatedContent:
|
||||
try:
|
||||
extracted = self.services.utils.jsonExtractString(content)
|
||||
parsed = json.loads(extracted)
|
||||
parsed_responses.append(parsed)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse JSON content: {str(e)}")
|
||||
continue
|
||||
|
||||
if not parsed_responses:
|
||||
return accumulatedContent[0] # Return first response if all parsing failed
|
||||
|
||||
# Start with first response as base
|
||||
merged = parsed_responses[0].copy()
|
||||
|
||||
# Merge all lists from all responses
|
||||
for response in parsed_responses[1:]:
|
||||
for key, value in response.items():
|
||||
if isinstance(value, list) and key in merged and isinstance(merged[key], list):
|
||||
# Merge lists by extending
|
||||
merged[key].extend(value)
|
||||
elif key not in merged:
|
||||
# Add new fields
|
||||
merged[key] = value
|
||||
|
||||
# Mark as complete
|
||||
merged["continuation"] = None
|
||||
|
||||
return json.dumps(merged, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error merging JSON content: {str(e)}")
|
||||
return accumulatedContent[0] # Return first response on error
|
||||
result = {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
# Old _buildContinuationPrompt and _mergeJsonContent methods removed
|
||||
# Now handled by repair mechanism in jsonUtils.py and section accumulation
|
||||
|
||||
|
||||
# Planning AI Call
|
||||
|
|
@ -429,7 +409,8 @@ CRITICAL REQUIREMENTS:
|
|||
extracted_content = None
|
||||
logger.debug(f"[DEBUG] title value: {title}, type: {type(title)}")
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content)
|
||||
# First call without continuation context
|
||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
||||
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation")
|
||||
|
||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||
|
|
|
|||
|
|
@ -291,7 +291,6 @@ class SubDocumentProcessing:
|
|||
Build a prompt that includes partial results continuation instructions.
|
||||
|
||||
NOTE: This uses a different continuation pattern than SubCoreAi:
|
||||
- SubCoreAi uses "continuation": null/dict for generic JSON responses
|
||||
- This uses "continue": true/false + "continuation_context" for document sections
|
||||
- Kept separate because it's tightly coupled to document processing needs
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ This module builds prompts for generating documents from extracted content.
|
|||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -34,8 +35,7 @@ TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
|||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"continuation": null
|
||||
]
|
||||
}"""
|
||||
|
||||
|
||||
|
|
@ -43,38 +43,83 @@ async def buildGenerationPrompt(
|
|||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
extracted_content: str = None
|
||||
extracted_content: str = None,
|
||||
continuationContext: Dict[str, Any] = None
|
||||
) -> str:
|
||||
"""
|
||||
Build the unified generation prompt using a single JSON template.
|
||||
Simplified version without continuation logic in prompt.
|
||||
|
||||
Args:
|
||||
outputFormat: Target output format (html, pdf, docx, etc.)
|
||||
userPrompt: User's original prompt for document generation
|
||||
title: Title for the document
|
||||
extracted_content: Optional extracted content from documents to prepend to prompt
|
||||
continuationContext: Optional context from previous generation for continuation
|
||||
|
||||
Returns:
|
||||
Complete generation prompt string
|
||||
"""
|
||||
# Create a template - let AI generate title if not provided
|
||||
prompt_instruction = f"Use the following title: \"{title}\""
|
||||
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title)
|
||||
title_value = title if title else "Generated Document"
|
||||
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
||||
|
||||
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
||||
generation_prompt = f"""Generate structured JSON content for document creation.
|
||||
# Check if this is a continuation request
|
||||
if continuationContext and continuationContext.get("section_count", 0) > 0:
|
||||
# Continuation prompt - simple and focused
|
||||
section_count = continuationContext.get("section_count", 0)
|
||||
next_order = continuationContext.get("next_order", 1)
|
||||
last_content_sample = continuationContext.get("last_content_sample", "")
|
||||
|
||||
generation_prompt = f"""Continue generating structured JSON content.
|
||||
|
||||
USER CONTEXT: "{userPrompt}"
|
||||
ORIGINAL REQUEST: "{userPrompt}"
|
||||
TARGET FORMAT: {outputFormat}
|
||||
TITLE INSTRUCTION: {prompt_instruction}
|
||||
TITLE: "{title_value}"
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
CONTEXT - Already generated:
|
||||
- Total sections generated: {section_count}
|
||||
- Next section order: {next_order}
|
||||
- Last content: {last_content_sample}
|
||||
|
||||
YOUR TASK:
|
||||
Continue where previous generation stopped.
|
||||
Generate the NEXT section(s) starting with section_{next_order}.
|
||||
Generate as much content as possible.
|
||||
|
||||
RULES:
|
||||
- Follow the template structure below exactly; emit only one JSON object in the response
|
||||
- Fill sections with content based on the user request
|
||||
- Use appropriate content_type
|
||||
- Follow the JSON template structure below exactly
|
||||
- Fill sections with ACTUAL data based on the user request
|
||||
- Use appropriate content_type for the data
|
||||
- Generate REAL content, not summaries or placeholders
|
||||
- Generate multiple sections if possible
|
||||
|
||||
Return raw JSON (no ```json blocks, no text before/after)
|
||||
|
||||
JSON Template
|
||||
{json_template}
|
||||
"""
|
||||
else:
|
||||
# First call - simple prompt without continuation complexity
|
||||
generation_prompt = f"""Generate structured JSON content for document creation.
|
||||
|
||||
USER REQUEST: "{userPrompt}"
|
||||
TARGET FORMAT: {outputFormat}
|
||||
TITLE: "{title_value}"
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Follow the JSON template structure below exactly
|
||||
- Emit only one JSON object in the response
|
||||
- Fill sections with ACTUAL data based on the user request
|
||||
- Use appropriate content_type for each section
|
||||
- Generate REAL content, not summaries or instructions
|
||||
- Structure content in sections with order 1, 2, 3...
|
||||
- Each section should be complete before next
|
||||
- Generate as much content as possible
|
||||
|
||||
Return raw JSON (no ```json blocks, no text before/after)
|
||||
|
||||
JSON Template
|
||||
{json_template}
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -135,3 +135,222 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
|||
return base
|
||||
|
||||
|
||||
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Attempt to repair broken JSON using multiple strategies.
|
||||
Returns the best repair attempt or None if all fail.
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# Strategy 1: Progressive parsing - try to find longest valid prefix
|
||||
best_result = None
|
||||
best_valid_length = 0
|
||||
|
||||
for i in range(len(text), 0, -1):
|
||||
test_str = text[:i]
|
||||
closed_str = _closeJsonStructures(test_str)
|
||||
obj, err, _ = tryParseJson(closed_str)
|
||||
if err is None and isinstance(obj, dict):
|
||||
best_result = obj
|
||||
best_valid_length = i
|
||||
logger.debug(f"Progressive parsing success at length {i}")
|
||||
break
|
||||
|
||||
if best_result:
|
||||
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
|
||||
return best_result
|
||||
|
||||
# Strategy 2: Structure closing - close incomplete structures
|
||||
closed_str = _closeJsonStructures(text)
|
||||
obj, err, _ = tryParseJson(closed_str)
|
||||
if err is None and isinstance(obj, dict):
|
||||
logger.info("Repaired JSON using structure closing")
|
||||
return obj
|
||||
|
||||
# Strategy 3: Regex extraction (fallback for completely broken JSON)
|
||||
extracted = _extractSectionsRegex(text)
|
||||
if extracted:
|
||||
logger.info("Repaired JSON using regex extraction")
|
||||
return {"documents": [{"sections": extracted}]}
|
||||
|
||||
logger.warning("All repair strategies failed")
|
||||
return None
|
||||
|
||||
|
||||
def _closeJsonStructures(text: str) -> str:
|
||||
"""
|
||||
Close incomplete JSON structures by adding missing closing brackets.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# Count open/close brackets and braces
|
||||
open_braces = text.count('{')
|
||||
close_braces = text.count('}')
|
||||
open_brackets = text.count('[')
|
||||
close_brackets = text.count(']')
|
||||
|
||||
# Close incomplete structures
|
||||
result = text
|
||||
for _ in range(open_braces - close_braces):
|
||||
result += '}'
|
||||
for _ in range(open_brackets - close_brackets):
|
||||
result += ']'
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract sections from broken JSON using regex patterns.
|
||||
Fallback strategy when JSON is completely corrupted.
|
||||
"""
|
||||
import re
|
||||
|
||||
sections = []
|
||||
|
||||
# Pattern to find section objects
|
||||
section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
|
||||
|
||||
for match in re.finditer(section_pattern, text, re.IGNORECASE):
|
||||
section_id = match.group(1)
|
||||
content_type = match.group(2)
|
||||
order = int(match.group(3))
|
||||
|
||||
# Try to extract elements array
|
||||
elements_match = re.search(
|
||||
r'"elements"\s*:\s*\[(.*?)\]',
|
||||
text[match.end():match.end()+500] # Look ahead for elements
|
||||
)
|
||||
|
||||
elements = []
|
||||
if elements_match:
|
||||
try:
|
||||
elements_str = '[' + elements_match.group(1) + ']'
|
||||
elements = json.loads(elements_str)
|
||||
except:
|
||||
pass
|
||||
|
||||
sections.append({
|
||||
"id": section_id,
|
||||
"content_type": content_type,
|
||||
"elements": elements,
|
||||
"order": order
|
||||
})
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
def extractSectionsFromDocument(documentData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract all sections from document data structure.
|
||||
Handles both flat and nested document structures.
|
||||
"""
|
||||
if not isinstance(documentData, dict):
|
||||
return []
|
||||
|
||||
# Try to extract sections from documents array
|
||||
if "documents" in documentData:
|
||||
all_sections = []
|
||||
for doc in documentData.get("documents", []):
|
||||
if isinstance(doc, dict) and "sections" in doc:
|
||||
sections = doc.get("sections", [])
|
||||
if isinstance(sections, list):
|
||||
all_sections.extend(sections)
|
||||
return all_sections
|
||||
|
||||
# Try to extract sections directly from root
|
||||
if "sections" in documentData:
|
||||
sections = documentData.get("sections", [])
|
||||
if isinstance(sections, list):
|
||||
return sections
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def extractContentSample(section: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Extract a sample of content from a section for continuation context.
|
||||
Returns a string describing the last content for context.
|
||||
"""
|
||||
if not isinstance(section, dict):
|
||||
return ""
|
||||
|
||||
content_type = section.get("content_type", "").lower()
|
||||
elements = section.get("elements", [])
|
||||
|
||||
if not elements or not isinstance(elements, list):
|
||||
return "Content exists"
|
||||
|
||||
# Get last elements for sampling
|
||||
sample_elements = elements[-5:] if len(elements) > 5 else elements
|
||||
|
||||
if content_type == "list":
|
||||
# Extract last few list items
|
||||
items_text = []
|
||||
for elem in sample_elements:
|
||||
if isinstance(elem, dict) and "text" in elem:
|
||||
items_text.append(elem.get("text", ""))
|
||||
if items_text:
|
||||
return f"Last {len(items_text)} items: {', '.join(items_text[:3])}"
|
||||
|
||||
elif content_type == "paragraph":
|
||||
# Extract text and take last 150 chars
|
||||
for elem in sample_elements:
|
||||
if isinstance(elem, dict) and "text" in elem:
|
||||
text = elem.get("text", "")
|
||||
if len(text) > 150:
|
||||
text = "..." + text[-150:]
|
||||
return f"Last content: {text}"
|
||||
|
||||
elif content_type == "code":
|
||||
# Extract last few lines
|
||||
for elem in sample_elements:
|
||||
if isinstance(elem, dict) and "code" in elem:
|
||||
code = elem.get("code", "")
|
||||
lines = code.split('\n')
|
||||
if len(lines) > 5:
|
||||
return f"Last lines ({len(lines)} total): {', '.join(lines[-3:])}"
|
||||
return f"Code ({len(lines)} lines)"
|
||||
|
||||
elif content_type == "table":
|
||||
# Extract last rows
|
||||
for elem in sample_elements:
|
||||
if isinstance(elem, dict) and "rows" in elem:
|
||||
rows = elem.get("rows", [])
|
||||
return f"Table with {len(rows)} rows"
|
||||
|
||||
return "Content exists"
|
||||
|
||||
|
||||
def buildContinuationContext(allSections: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Build context information from accumulated sections for continuation prompt.
|
||||
Returns dict with metadata about what was already generated.
|
||||
"""
|
||||
if not allSections:
|
||||
return {
|
||||
"section_count": 0,
|
||||
"next_order": 1,
|
||||
"last_content_sample": "No content yet"
|
||||
}
|
||||
|
||||
# Sort sections by order
|
||||
sorted_sections = sorted(allSections, key=lambda s: s.get("order", 0))
|
||||
|
||||
last_section = sorted_sections[-1]
|
||||
last_order = last_section.get("order", 0)
|
||||
|
||||
# Get content sample from last section
|
||||
last_content_sample = extractContentSample(last_section)
|
||||
|
||||
return {
|
||||
"section_count": len(allSections),
|
||||
"last_section_id": last_section.get("id", ""),
|
||||
"last_order": last_order,
|
||||
"next_order": last_order + 1,
|
||||
"last_content_type": last_section.get("content_type", ""),
|
||||
"last_content_sample": last_content_sample
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1178,11 +1178,9 @@ Return JSON:
|
|||
{{
|
||||
"subject": "subject line",
|
||||
"body": "email body (HTML allowed)",
|
||||
"attachments": ["doc_ref1", "doc_ref2"],
|
||||
"continuation": null
|
||||
"attachments": ["doc_ref1", "doc_ref2"]
|
||||
}}
|
||||
|
||||
LOOP_INSTRUCTION"""
|
||||
"""
|
||||
|
||||
# Call AI service to generate email content
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -78,8 +78,7 @@ Generate the next action to advance toward completing the task objective.
|
|||
"description": "What this action accomplishes",
|
||||
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}'"
|
||||
}
|
||||
],
|
||||
"continuation": null
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -95,8 +94,7 @@ Generate the next action to advance toward completing the task objective.
|
|||
"description": "Extract data from documents",
|
||||
"userMessage": "Extracting data from documents"
|
||||
}
|
||||
],
|
||||
"continuation": null
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -125,7 +123,6 @@ Generate the next action to advance toward completing the task objective.
|
|||
|
||||
## 🚀 Response Format
|
||||
Return ONLY the JSON object with complete action objects. If you cannot complete the full response, set "continuation" to a brief description of what still needs to be generated. If you can complete the response, keep "continuation" as null.
|
||||
LOOP_INSTRUCTION
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
|
@ -137,7 +134,7 @@ def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
|||
PromptPlaceholder(label="REVIEW_CONTENT", content=extractReviewContent(context), summaryAllowed=True),
|
||||
]
|
||||
|
||||
template = """# Result Review & Validation
|
||||
template = f"""# Result Review & Validation
|
||||
|
||||
Review task execution outcomes and determine success, retry needs, or failure.
|
||||
|
||||
|
|
@ -166,7 +163,7 @@ def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
|||
## 📊 Required JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
{{
|
||||
"status": "success|retry|failed",
|
||||
"reason": "Detailed explanation of the validation decision",
|
||||
"improvements": ["specific improvement 1", "specific improvement 2"],
|
||||
|
|
@ -174,9 +171,8 @@ def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
|||
"met_criteria": ["criteria1", "criteria2"],
|
||||
"unmet_criteria": ["criteria3", "criteria4"],
|
||||
"confidence": 0.85,
|
||||
"userMessage": "User-friendly message explaining the validation result in language '{{KEY:USER_LANGUAGE}}'",
|
||||
"continuation": null
|
||||
}
|
||||
"userMessage": "User-friendly message explaining the validation result in language '{{KEY:USER_LANGUAGE}}'"
|
||||
}}
|
||||
```
|
||||
|
||||
## 🎯 Validation Principles
|
||||
|
|
@ -232,9 +228,6 @@ def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
|||
- "Include user language parameter for better localization"
|
||||
- "Break down complex objective into smaller, focused actions"
|
||||
- "Verify document references before processing"
|
||||
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
|
|
|||
|
|
@ -72,21 +72,20 @@ Break down user requests into logical, executable task steps.
|
|||
## 📊 Required JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
{{
|
||||
"overview": "Brief description of the overall plan",
|
||||
"userMessage": "User-friendly message explaining the task plan in language '{{KEY:USER_LANGUAGE}}'",
|
||||
"tasks": [
|
||||
{
|
||||
{{
|
||||
"id": "task_1",
|
||||
"objective": "Clear business objective focusing on what to deliver",
|
||||
"dependencies": ["task_0"],
|
||||
"success_criteria": ["measurable criteria 1", "measurable criteria 2"],
|
||||
"estimated_complexity": "low|medium|high",
|
||||
"userMessage": "What this task will accomplish in language '{{KEY:USER_LANGUAGE}}'"
|
||||
}
|
||||
}}
|
||||
],
|
||||
"continuation": null
|
||||
}
|
||||
}}
|
||||
```
|
||||
|
||||
## 🎯 Task Structure Guidelines
|
||||
|
|
@ -127,7 +126,5 @@ Break down user requests into logical, executable task steps.
|
|||
- **Low**: Simple, single-action tasks (1-2 actions)
|
||||
- **Medium**: Multi-action tasks for one topic (3-5 actions)
|
||||
- **High**: Complex strategic tasks (6+ actions)
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
"""
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
|
|
|||
Loading…
Reference in a new issue