gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""

import logging
from typing import Dict, Any

logger = logging.getLogger(__name__)

# Centralized JSON structure template for document generation
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
  "metadata": {
    "split_strategy": "single_document",
    "source_documents": [],
    "extraction_method": "ai_generation"
  },
  "documents": [
    {
        "id": "doc_1",
        "title": "{{DOCUMENT_TITLE}}",
        "filename": "document.json",
        "sections": [
        {
            "id": "section_1",
            "content_type": "heading|paragraph|table|list|code",
            "elements": [
            // heading: {"level": 1, "text": "..."}
            // paragraph: {"text": "..."}
            // table: {"headers": [...], "rows": [[...]], "caption": "..."}
            // list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
            // code: {"code": "...", "language": "..."}
            ],
            "order": 1
        }
      ]
    }
  ]
}"""


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    extracted_content: str = None,
    continuationContext: Dict[str, Any] = None
) -> str:
    """
    Build the unified generation prompt using a single JSON template.
    Simplified version without continuation logic in prompt.

    Args:
        outputFormat: Target output format (html, pdf, docx, etc.)
        userPrompt: User's original prompt for document generation
        title: Title for the document
        extracted_content: Optional extracted content from documents to prepend to prompt
        continuationContext: Optional context from previous generation for continuation

    Returns:
        Complete generation prompt string
    """
    # Create a template - let AI generate title if not provided
    title_value = title if title else "Generated Document"
    json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)

    # Check if this is a continuation request
    if continuationContext and continuationContext.get("section_count", 0) > 0:
        # Continuation prompt - simple and focused
        section_count = continuationContext.get("section_count", 0)
        next_order = continuationContext.get("next_order", 1)
        last_content_sample = continuationContext.get("last_content_sample", "")

        generation_prompt = f"""Continue generating structured JSON content.

ORIGINAL REQUEST: "{userPrompt}"
TARGET FORMAT: {outputFormat}
TITLE: "{title_value}"

CONTEXT - Already generated:
- Total sections generated: {section_count}
- Next section order: {next_order}
- Last content: {last_content_sample}

YOUR TASK:
Continue where previous generation stopped.
Generate the NEXT section(s) starting with section_{next_order}.
Generate as much content as possible.

RULES:
- Follow the JSON template structure below exactly
- Fill sections with ACTUAL data based on the user request
- Use appropriate content_type for the data
- Generate REAL content, not summaries or placeholders
- Generate multiple sections if possible

Return raw JSON (no ```json blocks, no text before/after)

JSON Template
{json_template}
"""
    else:
        # First call - simple prompt without continuation complexity
        generation_prompt = f"""Generate structured JSON content for document creation.

USER REQUEST: "{userPrompt}"
TARGET FORMAT: {outputFormat}
TITLE: "{title_value}"

INSTRUCTIONS:
- Follow the JSON template structure below exactly
- Emit only one JSON object in the response
- Fill sections with ACTUAL data based on the user request
- Use appropriate content_type for each section
- Generate REAL content, not summaries or instructions
- Structure content in sections with order 1, 2, 3...
- Each section should be complete before next
- Generate as much content as possible

Return raw JSON (no ```json blocks, no text before/after)

JSON Template
{json_template}
"""

    # If we have extracted content, prepend it to the prompt
    if extracted_content:
        generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}

{generation_prompt}"""

    return generation_prompt.strip()