74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
"""
|
|
Centralized prompt builder for document generation across formats.
|
|
|
|
Builds a robust prompt that:
|
|
- Accepts any user intent (no fixed structure assumptions)
|
|
- Injects format-specific guidelines from the selected renderer
|
|
- Adds a common policy section to always use real data from source docs
|
|
- Requires the AI to output a filename header that we can parse and use
|
|
"""
|
|
|
|
from typing import Protocol
|
|
|
|
|
|
class _RendererLike(Protocol):
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
|
|
...
|
|
|
|
|
|
def buildExtractionPrompt(
|
|
output_format: str,
|
|
renderer: _RendererLike,
|
|
user_prompt: str,
|
|
title: str
|
|
) -> str:
|
|
"""
|
|
Build the final extraction prompt by combining:
|
|
- The raw user prompt (verbatim)
|
|
- Generic cross-format instructions (filename header + real-data policy)
|
|
- Format-specific guidelines snippet provided by the renderer
|
|
|
|
The AI must place a single filename header at the very top:
|
|
FILENAME: <safe-file-name-with-extension>
|
|
followed by a blank line and then ONLY the document content according to the target format.
|
|
"""
|
|
|
|
format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
|
|
|
|
# Generic block appears once for every format
|
|
generic_intro = f"""
|
|
{user_prompt}
|
|
|
|
You are generating a document in {output_format.upper()} format for the title: "{title}".
|
|
|
|
Rules:
|
|
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
|
|
- Work with whatever data is available from the source documents - partial data is better than no data.
|
|
- If some information is missing, create the best possible document with what you have available.
|
|
- Do not refuse to generate the document due to incomplete data - always proceed with available information.
|
|
- The output must strictly follow the target format and be ready for saving without extra wrapping.
|
|
- At the VERY TOP output exactly one line with the filename header:
|
|
FILENAME: <safe-file-name-with-extension>
|
|
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
|
|
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
|
|
- Avoid special characters beyond [a-zA-Z0-9-_].
|
|
- After this header, insert a single blank line and then provide ONLY the document content.
|
|
|
|
Common policy:
|
|
- Use the actual data from the source documents to create the content.
|
|
- If data is incomplete, work with what you have and create a meaningful document.
|
|
- Always generate the document - never refuse due to missing information.
|
|
- Extract and use the real data provided in the source documents to create meaningful content.
|
|
""".strip()
|
|
|
|
# Final assembly
|
|
final_prompt = (
|
|
generic_intro
|
|
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
|
|
+ format_guidelines.strip()
|
|
+ "\n\nGenerate the complete document content now based on the source documents below:"
|
|
)
|
|
|
|
return final_prompt
|
|
|
|
|