gateway/modules/services/serviceGeneration/prompt_builder.py
2025-10-06 15:39:25 +02:00

72 lines
2.7 KiB
Python

"""
Centralized prompt builder for document generation across formats.
Builds a robust prompt that:
- Accepts any user intent (no fixed structure assumptions)
- Injects format-specific guidelines from the selected renderer
- Adds a common policy section to always use real data from source docs
- Requires the AI to output a filename header that we can parse and use
"""
from typing import Protocol
class _RendererLike(Protocol):
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
...
def buildExtractionPrompt(
output_format: str,
renderer: _RendererLike,
user_prompt: str,
title: str
) -> str:
"""
Build the final extraction prompt by combining:
- The raw user prompt (verbatim)
- Generic cross-format instructions (filename header + real-data policy)
- Format-specific guidelines snippet provided by the renderer
The AI must place a single filename header at the very top:
FILENAME: <safe-file-name-with-extension>
followed by a blank line and then ONLY the document content according to the target format.
"""
format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
# Generic block appears once for every format
generic_intro = f"""
{user_prompt}
You are generating a document in {output_format.upper()} format for the title: "{title}".
Rules:
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
- Use only factual information extracted from the supplied source documents.
- Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
- The output must strictly follow the target format and be ready for saving without extra wrapping.
- At the VERY TOP output exactly one line with the filename header:
FILENAME: <safe-file-name-with-extension>
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
- Avoid special characters beyond [a-zA-Z0-9-_].
- After this header, insert a single blank line and then provide ONLY the document content.
Common policy:
- Use the actual data from the source documents to create the content.
- Do not generate placeholder text or templates.
- Extract and use the real data provided in the source documents to create meaningful content.
""".strip()
# Final assembly
final_prompt = (
generic_intro
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
+ format_guidelines.strip()
+ "\n\nGenerate the complete document content now based on the source documents below:"
)
return final_prompt