gateway/modules/services/serviceGeneration/prompt_builder.py
2025-10-10 23:46:24 +02:00

74 lines
2.9 KiB
Python

"""
Centralized prompt builder for document generation across formats.
Builds a robust prompt that:
- Accepts any user intent (no fixed structure assumptions)
- Injects format-specific guidelines from the selected renderer
- Adds a common policy section to always use real data from source docs
- Requires the AI to output a filename header that we can parse and use
"""
from typing import Protocol
class _RendererLike(Protocol):
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
...
def buildExtractionPrompt(
output_format: str,
renderer: _RendererLike,
user_prompt: str,
title: str
) -> str:
"""
Build the final extraction prompt by combining:
- The raw user prompt (verbatim)
- Generic cross-format instructions (filename header + real-data policy)
- Format-specific guidelines snippet provided by the renderer
The AI must place a single filename header at the very top:
FILENAME: <safe-file-name-with-extension>
followed by a blank line and then ONLY the document content according to the target format.
"""
format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
# Generic block appears once for every format
generic_intro = f"""
{user_prompt}
You are generating a document in {output_format.upper()} format for the title: "{title}".
Rules:
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
- Work with whatever data is available from the source documents - partial data is better than no data.
- If some information is missing, create the best possible document with what you have available.
- Do not refuse to generate the document due to incomplete data - always proceed with available information.
- The output must strictly follow the target format and be ready for saving without extra wrapping.
- At the VERY TOP output exactly one line with the filename header:
FILENAME: <safe-file-name-with-extension>
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
- Avoid special characters beyond [a-zA-Z0-9-_].
- After this header, insert a single blank line and then provide ONLY the document content.
Common policy:
- Use the actual data from the source documents to create the content.
- If data is incomplete, work with what you have and create a meaningful document.
- Always generate the document - never refuse due to missing information.
- Extract and use the real data provided in the source documents to create meaningful content.
""".strip()
# Final assembly
final_prompt = (
generic_intro
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
+ format_guidelines.strip()
+ "\n\nGenerate the complete document content now based on the source documents below:"
)
return final_prompt