gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""

import logging
from typing import Dict, Any

logger = logging.getLogger(__name__)

# Centralized JSON structure template for document generation
# Includes examples for all content types so AI knows the structure patterns
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
  "metadata": {
    "split_strategy": "single_document",
    "source_documents": [],
    "extraction_method": "ai_generation"
  },
  "documents": [
    {
        "id": "doc_1",
        "title": "{{DOCUMENT_TITLE}}",
        "filename": "document.json",
        "sections": [
        {
            "id": "section_heading_example",
            "content_type": "heading",
            "elements": [
                {"level": 1, "text": "Heading Text"}
            ],
            "order": 0
        },
        {
            "id": "section_paragraph_example",
            "content_type": "paragraph",
            "elements": [
                {"text": "Paragraph text content"}
            ],
            "order": 0
        },
        {
            "id": "section_list_example",
            "content_type": "list",
            "elements": [
                {
                    "items": [
                        {"text": "Item 1"},
                        {"text": "Item 2"}
                    ],
                    "list_type": "numbered"
                }
            ],
            "order": 0
        },
        {
            "id": "section_table_example",
            "content_type": "table",
            "elements": [
                {
                    "headers": ["Column 1", "Column 2"],
                    "rows": [
                        ["Row 1 Col 1", "Row 1 Col 2"],
                        ["Row 2 Col 1", "Row 2 Col 2"]
                    ],
                    "caption": "Table caption"
                }
            ],
            "order": 0
        },
        {
            "id": "section_code_example",
            "content_type": "code",
            "elements": [
                {
                    "code": "function example() { return true; }",
                    "language": "javascript"
                }
            ],
            "order": 0
        }
      ]
    }
  ]
}"""


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    extracted_content: str = None,
    continuationContext: Dict[str, Any] = None
) -> str:
    """
    Build the unified generation prompt using a single JSON template.
    Generic solution that works for any user request.

    Args:
        outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
        userPrompt: User's original prompt for document generation
        title: Title for the document
        extracted_content: Optional extracted content from documents to prepend to prompt
        continuationContext: Optional context from previous generation for continuation

    Returns:
        Complete generation prompt string
    """
    # Create a template - let AI generate title if not provided
    title_value = title if title else "Generated Document"
    json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)

    # Build prompt based on whether this is a continuation or first call
    # Check if we have valid continuation context with actual JSON fragment
    has_continuation = (
        continuationContext
        and continuationContext.get("section_count", 0) > 0
        and continuationContext.get("last_raw_json", "")
        and continuationContext.get("last_raw_json", "").strip() != "{}"
    )

    if has_continuation:
        # CONTINUATION PROMPT - user already received first part, continue from where it stopped
        last_raw_json = continuationContext.get("last_raw_json", "")
        last_item_object = continuationContext.get("last_item_object", "")  # Full object like {"text": "value"}
        last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
        total_items_count = continuationContext.get("total_items_count", 0)

        # Show the last few items to indicate where to continue (limit fragment size)
        # Extract just the ending portion of the JSON to show where it cut off
        fragment_snippet = ""
        if last_raw_json:
            # Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
            fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
            # Add ellipsis if truncated
            if len(last_raw_json) > 1500:
                fragment_snippet = "..." + fragment_snippet

        # Build clear continuation guidance
        continuation_guidance = []

        if total_items_count > 0:
            continuation_guidance.append(f"You have already generated {total_items_count} items.")

        # Show the last complete item object (full object format)
        if last_item_object:
            continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")

        continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."

        generation_prompt = f"""User request: "{userPrompt}"

The user already received part of the response. Continue generating the remaining content.

{continuation_text}

Previous response ended here (JSON was cut off at this point):
```json
{fragment_snippet if fragment_snippet else "(No fragment available)"}
```

JSON structure template:
{json_template}

Instructions:
- Return full JSON structure (metadata + documents + sections)
- Continue from where it stopped - add NEW items only, do not repeat old items
- Use the element structures shown in the template
- Generate all remaining content needed to complete the user request
- Fill with actual content (no comments, no "Add more..." text, no placeholders)
- When fully complete, add "complete_response": true at root level
- Return only valid JSON (no comments, no markdown blocks)

Continue generating:
"""
    else:
        # FIRST CALL - initial generation
        generation_prompt = f"""User request: "{userPrompt}"

Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.

JSON structure template (reference only - shows the pattern):
{json_template}

Instructions:
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
- Do NOT continue from the template examples above - create your own sections
- Generate content based on the user request
- Use the element structures shown in the template (heading, paragraph, list, table, code)
- Create your own section IDs (do not use the example IDs like "section_heading_example")
- When fully complete, add "complete_response": true at root level
- Return only valid JSON (no comments, no markdown blocks, no text before/after)

Generate your complete response starting from {{"metadata": ...}}:
"""

    # If we have extracted content, prepend it to the prompt
    if extracted_content:
        generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}

{generation_prompt}"""

    return generation_prompt.strip()