gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""

import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument

logger = logging.getLogger(__name__)


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    extracted_content: str = None,
    continuationContext: Dict[str, Any] = None
) -> str:
    """
    Build the unified generation prompt using a single JSON template.
    Generic solution that works for any user request.

    Args:
        outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
        userPrompt: User's original prompt for document generation
        title: Title for the document
        extracted_content: Optional extracted content from documents to prepend to prompt
        continuationContext: Optional context from previous generation for continuation

    Returns:
        Complete generation prompt string
    """
    # Create a template - let AI generate title if not provided
    titleValue = title if title else "Generated Document"
    jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)

    # Build prompt based on whether this is a continuation or first call
    # Check if we have valid continuation context with actual JSON fragment
    # CRITICAL: Allow continuation even if section_count is 0 (broken JSON that couldn't be parsed)
    # as long as we have last_raw_json - this handles cases where JSON is too broken to extract sections
    hasContinuation = (
        continuationContext
        and continuationContext.get("last_raw_json", "")
        and continuationContext.get("last_raw_json", "").strip() != "{}"
    )

    if hasContinuation:
        # CONTINUATION PROMPT - use new summary format from buildContinuationContext
        delivered_summary = continuationContext.get("delivered_summary", "")
        element_before_cutoff = continuationContext.get("element_before_cutoff")
        cut_off_element = continuationContext.get("cut_off_element")

        # Build continuation text with delivered summary and cut-off information
        # CRITICAL: Always include cut-off information if available (per loop_plan.md)
        continuationText = f"{delivered_summary}\n\n"
        continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"

        # Add cut-off point information (per loop_plan.md: always add if available)
        # These are shown ONLY as REFERENCE to know where generation stopped
        if element_before_cutoff:
            continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
            continuationText += f"{element_before_cutoff}\n\n"

        if cut_off_element:
            continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
            continuationText += f"{cut_off_element}\n\n"

        continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
        continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
        continuationText += "Start directly with the next element/section that should follow.\n\n"

        # PROMPT FOR CONTINUATION
        generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}

⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.

{continuationText}

JSON structure template:
{jsonTemplate}

Rules:
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only).
- Reference elements shown above are ALREADY DELIVERED - DO NOT repeat them.
- Generate ONLY the remaining content that comes AFTER the reference elements.
- DO NOT regenerate the entire JSON structure - start directly with what comes next.
- Output JSON only; no markdown fences or extra text.

Continue generating the remaining content now.
"""
    else:

        # PROMPT FOR FIRST CALL
        # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions

        if extracted_content:
            # If we have extracted content, put it FIRST and make it very clear it's the source data
            generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}

{'='*80}
⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
{'='*80}
The content below contains the ACTUAL DATA extracted from the source documents.
You MUST use this data - DO NOT generate fake or example data.
{'='*80}
EXTRACTED CONTENT FROM DOCUMENTS:
{'='*80}
{extracted_content}
{'='*80}
END OF EXTRACTED CONTENT
{'='*80}

Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.

JSON structure template (structure only - use data from EXTRACTED CONTENT above):
{jsonTemplate}

Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.

Generate your complete response using the extracted content data.
"""
        else:
            # No extracted content - generate from scratch
            generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}

Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.

JSON structure template:
{jsonTemplate}

Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.

Generate your complete response.
"""

    return generationPrompt.strip()