gateway/modules/services/serviceGeneration/prompt_builder.py

"""
Centralized prompt builder for document generation across formats.

Builds a robust prompt that:
- Accepts any user intent (no fixed structure assumptions)
- Injects format-specific guidelines from the selected renderer
- Adds a common policy section to always use real data from source docs
- Requires the AI to output a filename header that we can parse and use
"""

from typing import Protocol


class _RendererLike(Protocol):
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:  # returns only format-specific guidelines
        ...


async def buildExtractionPrompt(
    outputFormat: str,
    renderer: _RendererLike,
    userPrompt: str,
    title: str,
    aiService=None
) -> str:
    """
    Build the final extraction prompt by combining:
    - Parsed extraction intent from user prompt (using AI)
    - Generic cross-format instructions (filename header + real-data policy)
    - Format-specific guidelines snippet provided by the renderer

    The AI must place a single filename header at the very top:
    FILENAME: <safe-file-name-with-extension>
    followed by a blank line and then ONLY the document content according to the target format.
    """

    # Parse user prompt to separate extraction intent from generation format using AI
    extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService)

    formatGuidelines = renderer.getExtractionPrompt(userPrompt, title)

    # Generic block appears once for every format
    genericIntro = f"""
{extractionIntent}

You are generating a document in {outputFormat.upper()} format for the title: "{title}".

Rules:
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
- Work with whatever data is available from the source documents - partial data is better than no data.
- If some information is missing, create the best possible document with what you have available.
- Do not refuse to generate the document due to incomplete data - always proceed with available information.
- The output must strictly follow the target format and be ready for saving without extra wrapping.
- At the VERY TOP output exactly one line with the filename header:
  FILENAME: <safe-file-name-with-extension>
  - The base name should be short, descriptive, and kebab-case or snake-case without spaces.
  - Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
  - Avoid special characters beyond [a-zA-Z0-9-_].
  - After this header, insert a single blank line and then provide ONLY the document content.

Common policy:
- Use the actual data from the source documents to create the content.
- If data is incomplete, work with what you have and create a meaningful document.
- Always generate the document - never refuse due to missing information.
- Extract and use the real data provided in the source documents to create meaningful content.
""".strip()

    # Final assembly
    finalPrompt = (
        genericIntro
        + "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
        + formatGuidelines.strip()
        + "\n\nGenerate the complete document content now based on the source documents below:"
    )

    return finalPrompt


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    aiService=None
) -> str:
    """
    Use AI to build the generation prompt based on user intent and format requirements.
    Focus on what's important for the user and how to structure the content.
    """
    if not aiService:
        # Fallback if no AI service available
        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."

    try:
        # Protect userPrompt from injection
        safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')

        # AI call to generate the appropriate generation prompt
        generationPromptRequest = f"""
Based on this user request, create a detailed generation prompt for creating a {outputFormat} document.

User request: "{safeUserPrompt}"
Document title: "{title}"
Output format: {outputFormat}

Create a generation prompt that:
1. Identifies what content is most important for the user
2. Specifies how to structure and organize the content. Support with your inputs fo rstructure to match best the user's intention.
3. Includes any specific formatting or presentation requirements
4. Ensures the document meets the user's needs

Return only the generation prompt, starting with "Generate a {outputFormat} document that..."
"""

        # Call AI service to generate the prompt
        result = await aiService.callAi(
            prompt=generationPromptRequest,
            documents=None,
            options=None
        )

        return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."

    except Exception:
        # Fallback on any error
        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."


async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None) -> str:
    """
    Use AI to extract the core content intention from the user prompt.
    Focus on WHAT the user wants to extract, not HOW to format it.
    """
    if not aiService:
        # Fallback if no AI service available
        return "Extract all relevant content from the document according to the user's requirements"

    try:
        # Protect userPrompt from injection by escaping quotes and newlines
        safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')

        # Simple AI call to extract the intention
        extractionPrompt = f"""
Extract the core content intention from this user request. Focus on WHAT content they want.

User request: "{safeUserPrompt}"

Return only the content intention in a simple format like "Extract: [content description]"
Do not include formatting instructions, file types, or output methods.
"""

        # Call AI service to extract intention
        result = await aiService.callAi(
            prompt=extractionPrompt,
            documents=None,
            options=None
        )

        return result if result else "Extract all relevant content from the document according to the user's requirements"

    except Exception:
        # Fallback on any error
        return "Extract all relevant content from the document according to the user's requirements"