gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""

import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument

logger = logging.getLogger(__name__)


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    extracted_content: str = None,
    continuationContext: Dict[str, Any] = None
) -> str:
    """
    Build the unified generation prompt using a single JSON template.
    Generic solution that works for any user request.

    Args:
        outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
        userPrompt: User's original prompt for document generation
        title: Title for the document
        extracted_content: Optional extracted content from documents to prepend to prompt
        continuationContext: Optional context from previous generation for continuation

    Returns:
        Complete generation prompt string
    """
    # Create a template - let AI generate title if not provided
    titleValue = title if title else "Generated Document"
    jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)

    # Build prompt based on whether this is a continuation or first call
    # Check if we have valid continuation context with actual JSON fragment
    hasContinuation = (
        continuationContext
        and continuationContext.get("section_count", 0) > 0
        and continuationContext.get("last_raw_json", "")
        and continuationContext.get("last_raw_json", "").strip() != "{}"
    )

    if hasContinuation:
        # CONTINUATION PROMPT - user already received first part, continue from where it stopped
        lastRawJson = continuationContext.get("last_raw_json", "")
        lastItemObject = continuationContext.get("last_item_object", "")  # Full object like {"text": "value"}
        lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
        totalItemsCount = continuationContext.get("total_items_count", 0)

        # Show the last few items to indicate where to continue (limit fragment size)
        # Extract just the ending portion of the JSON to show where it cut off
        fragmentSnippet = ""
        if lastRawJson:
            # Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
            fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
            # Add ellipsis if truncated
            if len(lastRawJson) > 1500:
                fragmentSnippet = "..." + fragmentSnippet

        # Build clear continuation guidance
        continuationGuidance = []

        if totalItemsCount > 0:
            continuationGuidance.append(f"You have already generated {totalItemsCount} items.")

        # Show the last complete item object (full object format)
        if lastItemObject:
            continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")

        continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."

        # PROMPT FOR CONTINUATION

        generationPrompt = f"""User request: "{userPrompt}"

The user already received part of the response. Continue generating the remaining content.

{continuationText}

Previous response ended here (JSON was cut off at this point):
```json
{fragmentSnippet if fragmentSnippet else "(No fragment available)"}
```

JSON structure template:
{jsonTemplate}

Instructions:
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use ONLY the element structures shown in the template.
- Continue from where it stopped — add NEW items only; do not repeat existing items.
- Generate remaining content to complete the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- Fill with actual content (no placeholders or instructional text such as "Add more...").
- IMPORTANT: Ensure "filename" in each document has meaningful name with appropriate extension matching the content.
- When the request is fully satisfied, add "complete_response": true at root level.
- Output JSON only; no markdown fences or extra text.

IMPORTANT: Before responding, analyse the remaining data to fully satisfy user request.

Continue generating:
"""
    else:

        # PROMPT FOR FIRST CALL

        generationPrompt = f"""User request: "{userPrompt}"

Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.

JSON structure template:
{jsonTemplate}

Instructions:
- Start with {{"metadata": ...}} — return COMPLETE, STRICT JSON.
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- When the request is fully satisfied, add "complete_response": true at root level.
- Output JSON only; no markdown fences or extra text.

Generate your complete response starting from {{"metadata": ...}}:
"""

    # If we have extracted content, prepend it to the prompt
    if extracted_content:
        generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}

{generationPrompt}"""

    return generationPrompt.strip()