89 lines
2.6 KiB
Python
89 lines
2.6 KiB
Python
"""
|
|
Prompt builder for document generation.
|
|
This module builds prompts for generating documents from extracted content.
|
|
"""
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Centralized JSON structure template for document generation
|
|
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
|
"metadata": {
|
|
"split_strategy": "single_document",
|
|
"source_documents": [],
|
|
"extraction_method": "ai_generation"
|
|
},
|
|
"documents": [
|
|
{
|
|
"id": "doc_1",
|
|
"title": "{{DOCUMENT_TITLE}}",
|
|
"filename": "document.json",
|
|
"sections": [
|
|
{
|
|
"id": "section_1",
|
|
"content_type": "heading|paragraph|table|list|code",
|
|
"elements": [
|
|
// heading: {"level": 1, "text": "..."}
|
|
// paragraph: {"text": "..."}
|
|
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
|
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
|
// code: {"code": "...", "language": "..."}
|
|
],
|
|
"order": 1
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"continuation": null
|
|
}"""
|
|
|
|
|
|
async def buildGenerationPrompt(
|
|
outputFormat: str,
|
|
userPrompt: str,
|
|
title: str,
|
|
extracted_content: str = None
|
|
) -> str:
|
|
"""
|
|
Build the unified generation prompt using a single JSON template.
|
|
|
|
Args:
|
|
outputFormat: Target output format (html, pdf, docx, etc.)
|
|
userPrompt: User's original prompt for document generation
|
|
title: Title for the document
|
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
|
|
|
Returns:
|
|
Complete generation prompt string
|
|
"""
|
|
# Create a template - let AI generate title if not provided
|
|
prompt_instruction = f"Use the following title: \"{title}\""
|
|
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title)
|
|
|
|
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
|
generation_prompt = f"""Generate structured JSON content for document creation.
|
|
|
|
USER CONTEXT: "{userPrompt}"
|
|
TARGET FORMAT: {outputFormat}
|
|
TITLE INSTRUCTION: {prompt_instruction}
|
|
|
|
LOOP_INSTRUCTION
|
|
|
|
RULES:
|
|
- Follow the template structure below exactly; emit only one JSON object in the response
|
|
- Fill sections with content based on the user request
|
|
- Use appropriate content_type
|
|
|
|
{json_template}
|
|
"""
|
|
|
|
# If we have extracted content, prepend it to the prompt
|
|
if extracted_content:
|
|
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
|
{extracted_content}
|
|
|
|
{generation_prompt}"""
|
|
|
|
return generation_prompt.strip()
|
|
|