gateway/modules/services/serviceGeneration/prompt_builder.py

"""
Centralized prompt builder for document generation across formats.

Builds a robust prompt that:
- Accepts any user intent (no fixed structure assumptions)
- Injects format-specific guidelines from the selected renderer
- Adds a common policy section to always use real data from source docs
- Requires the AI to output a filename header that we can parse and use
"""

from typing import Protocol


class _RendererLike(Protocol):
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:  # returns only format-specific guidelines
        ...


def buildExtractionPrompt(
    output_format: str,
    renderer: _RendererLike,
    user_prompt: str,
    title: str
) -> str:
    """
    Build the final extraction prompt by combining:
    - The raw user prompt (verbatim)
    - Generic cross-format instructions (filename header + real-data policy)
    - Format-specific guidelines snippet provided by the renderer

    The AI must place a single filename header at the very top:
    FILENAME: <safe-file-name-with-extension>
    followed by a blank line and then ONLY the document content according to the target format.
    """

    format_guidelines = renderer.getExtractionPrompt(user_prompt, title)

    # Generic block appears once for every format
    generic_intro = f"""
{user_prompt}

You are generating a document in {output_format.upper()} format for the title: "{title}".

Rules:
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
- Work with whatever data is available from the source documents - partial data is better than no data.
- If some information is missing, create the best possible document with what you have available.
- Do not refuse to generate the document due to incomplete data - always proceed with available information.
- The output must strictly follow the target format and be ready for saving without extra wrapping.
- At the VERY TOP output exactly one line with the filename header:
  FILENAME: <safe-file-name-with-extension>
  - The base name should be short, descriptive, and kebab-case or snake-case without spaces.
  - Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
  - Avoid special characters beyond [a-zA-Z0-9-_].
  - After this header, insert a single blank line and then provide ONLY the document content.

Common policy:
- Use the actual data from the source documents to create the content.
- If data is incomplete, work with what you have and create a meaningful document.
- Always generate the document - never refuse due to missing information.
- Extract and use the real data provided in the source documents to create meaningful content.
""".strip()

    # Final assembly
    final_prompt = (
        generic_intro
        + "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
        + format_guidelines.strip()
        + "\n\nGenerate the complete document content now based on the source documents below:"
    )

    return final_prompt