gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py

170 lines
7 KiB
Python

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""
import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
async def buildGenerationPrompt(
outputFormat: str,
userPrompt: str,
title: str,
extracted_content: str = None,
continuationContext: Dict[str, Any] = None
) -> str:
"""
Build the unified generation prompt using a single JSON template.
Generic solution that works for any user request.
Args:
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
userPrompt: User's original prompt for document generation
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
continuationContext: Optional context from previous generation for continuation
Returns:
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
titleValue = title if title else "Generated Document"
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
# Build prompt based on whether this is a continuation or first call
# Check if we have valid continuation context with actual JSON fragment
# CRITICAL: Allow continuation even if section_count is 0 (broken JSON that couldn't be parsed)
# as long as we have last_raw_json - this handles cases where JSON is too broken to extract sections
hasContinuation = (
continuationContext
and continuationContext.get("last_raw_json", "")
and continuationContext.get("last_raw_json", "").strip() != "{}"
)
if hasContinuation:
# CONTINUATION PROMPT - use new summary format from buildContinuationContext
delivered_summary = continuationContext.get("delivered_summary", "")
element_before_cutoff = continuationContext.get("element_before_cutoff")
cut_off_element = continuationContext.get("cut_off_element")
# Build continuation text with delivered summary and cut-off information
# CRITICAL: Always include cut-off information if available (per loop_plan.md)
continuationText = f"{delivered_summary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
# Add cut-off point information (per loop_plan.md: always add if available)
# These are shown ONLY as REFERENCE to know where generation stopped
if element_before_cutoff:
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
continuationText += f"{element_before_cutoff}\n\n"
if cut_off_element:
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
continuationText += f"{cut_off_element}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
continuationText += "Start directly with the next element/section that should follow.\n\n"
# PROMPT FOR CONTINUATION
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
{continuationText}
JSON structure template:
{jsonTemplate}
Rules:
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only).
- Reference elements shown above are ALREADY DELIVERED - DO NOT repeat them.
- Generate ONLY the remaining content that comes AFTER the reference elements.
- DO NOT regenerate the entire JSON structure - start directly with what comes next.
- Output JSON only; no markdown fences or extra text.
Continue generating the remaining content now.
"""
else:
# PROMPT FOR FIRST CALL
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
if extracted_content:
# If we have extracted content, put it FIRST and make it very clear it's the source data
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
{'='*80}
⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
{'='*80}
The content below contains the ACTUAL DATA extracted from the source documents.
You MUST use this data - DO NOT generate fake or example data.
{'='*80}
EXTRACTED CONTENT FROM DOCUMENTS:
{'='*80}
{extracted_content}
{'='*80}
END OF EXTRACTED CONTENT
{'='*80}
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
JSON structure template (structure only - use data from EXTRACTED CONTENT above):
{jsonTemplate}
Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
Generate your complete response using the extracted content data.
"""
else:
# No extracted content - generate from scratch
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
JSON structure template:
{jsonTemplate}
Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
Generate your complete response.
"""
return generationPrompt.strip()