164 lines
6.5 KiB
Python
164 lines
6.5 KiB
Python
"""
|
|
Centralized prompt builder for document generation across formats.
|
|
|
|
Builds a robust prompt that:
|
|
- Accepts any user intent (no fixed structure assumptions)
|
|
- Injects format-specific guidelines from the selected renderer
|
|
- Adds a common policy section to always use real data from source docs
|
|
- Requires the AI to output a filename header that we can parse and use
|
|
"""
|
|
|
|
from typing import Protocol
|
|
|
|
|
|
class _RendererLike(Protocol):
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
|
|
...
|
|
|
|
|
|
async def buildExtractionPrompt(
|
|
outputFormat: str,
|
|
renderer: _RendererLike,
|
|
userPrompt: str,
|
|
title: str,
|
|
aiService=None
|
|
) -> str:
|
|
"""
|
|
Build the final extraction prompt by combining:
|
|
- Parsed extraction intent from user prompt (using AI)
|
|
- Generic cross-format instructions (filename header + real-data policy)
|
|
- Format-specific guidelines snippet provided by the renderer
|
|
|
|
The AI must place a single filename header at the very top:
|
|
FILENAME: <safe-file-name-with-extension>
|
|
followed by a blank line and then ONLY the document content according to the target format.
|
|
"""
|
|
|
|
# Parse user prompt to separate extraction intent from generation format using AI
|
|
extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService)
|
|
|
|
formatGuidelines = renderer.getExtractionPrompt(userPrompt, title)
|
|
|
|
# Generic block appears once for every format
|
|
genericIntro = f"""
|
|
{extractionIntent}
|
|
|
|
You are generating a document in {outputFormat.upper()} format for the title: "{title}".
|
|
|
|
Rules:
|
|
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
|
|
- Work with whatever data is available from the source documents - partial data is better than no data.
|
|
- If some information is missing, create the best possible document with what you have available.
|
|
- Do not refuse to generate the document due to incomplete data - always proceed with available information.
|
|
- The output must strictly follow the target format and be ready for saving without extra wrapping.
|
|
- At the VERY TOP output exactly one line with the filename header:
|
|
FILENAME: <safe-file-name-with-extension>
|
|
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
|
|
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
|
|
- Avoid special characters beyond [a-zA-Z0-9-_].
|
|
- After this header, insert a single blank line and then provide ONLY the document content.
|
|
|
|
Common policy:
|
|
- Use the actual data from the source documents to create the content.
|
|
- If data is incomplete, work with what you have and create a meaningful document.
|
|
- Always generate the document - never refuse due to missing information.
|
|
- Extract and use the real data provided in the source documents to create meaningful content.
|
|
""".strip()
|
|
|
|
# Final assembly
|
|
finalPrompt = (
|
|
genericIntro
|
|
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
|
|
+ formatGuidelines.strip()
|
|
+ "\n\nGenerate the complete document content now based on the source documents below:"
|
|
)
|
|
|
|
return finalPrompt
|
|
|
|
|
|
async def buildGenerationPrompt(
|
|
outputFormat: str,
|
|
userPrompt: str,
|
|
title: str,
|
|
aiService=None
|
|
) -> str:
|
|
"""
|
|
Use AI to build the generation prompt based on user intent and format requirements.
|
|
Focus on what's important for the user and how to structure the content.
|
|
"""
|
|
if not aiService:
|
|
# Fallback if no AI service available
|
|
return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
|
|
|
|
try:
|
|
# Protect userPrompt from injection
|
|
safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
|
|
|
|
# AI call to generate the appropriate generation prompt
|
|
generationPromptRequest = f"""
|
|
Based on this user request, create a detailed generation prompt for creating a {outputFormat} document.
|
|
|
|
User request: "{safeUserPrompt}"
|
|
Document title: "{title}"
|
|
Output format: {outputFormat}
|
|
|
|
Create a generation prompt that:
|
|
1. Identifies what content is most important for the user
|
|
2. Specifies how to structure and organize the content. Support with your inputs fo rstructure to match best the user's intention.
|
|
3. Includes any specific formatting or presentation requirements
|
|
4. Ensures the document meets the user's needs
|
|
|
|
Return only the generation prompt, starting with "Generate a {outputFormat} document that..."
|
|
"""
|
|
|
|
# Call AI service to generate the prompt
|
|
result = await aiService.callAi(
|
|
prompt=generationPromptRequest,
|
|
documents=None,
|
|
options=None
|
|
)
|
|
|
|
return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
|
|
|
|
except Exception:
|
|
# Fallback on any error
|
|
return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
|
|
|
|
|
|
async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None) -> str:
|
|
"""
|
|
Use AI to extract the core content intention from the user prompt.
|
|
Focus on WHAT the user wants to extract, not HOW to format it.
|
|
"""
|
|
if not aiService:
|
|
# Fallback if no AI service available
|
|
return "Extract all relevant content from the document according to the user's requirements"
|
|
|
|
try:
|
|
# Protect userPrompt from injection by escaping quotes and newlines
|
|
safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
|
|
|
|
# Simple AI call to extract the intention
|
|
extractionPrompt = f"""
|
|
Extract the core content intention from this user request. Focus on WHAT content they want.
|
|
|
|
User request: "{safeUserPrompt}"
|
|
|
|
Return only the content intention in a simple format like "Extract: [content description]"
|
|
Do not include formatting instructions, file types, or output methods.
|
|
"""
|
|
|
|
# Call AI service to extract intention
|
|
result = await aiService.callAi(
|
|
prompt=extractionPrompt,
|
|
documents=None,
|
|
options=None
|
|
)
|
|
|
|
return result if result else "Extract all relevant content from the document according to the user's requirements"
|
|
|
|
except Exception:
|
|
# Fallback on any error
|
|
return "Extract all relevant content from the document according to the user's requirements"
|
|
|
|
|