134 lines
4.2 KiB
Python
134 lines
4.2 KiB
Python
"""
|
|
Prompt builder for document generation.
|
|
This module builds prompts for generating documents from extracted content.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Centralized JSON structure template for document generation
|
|
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
|
"metadata": {
|
|
"split_strategy": "single_document",
|
|
"source_documents": [],
|
|
"extraction_method": "ai_generation"
|
|
},
|
|
"documents": [
|
|
{
|
|
"id": "doc_1",
|
|
"title": "{{DOCUMENT_TITLE}}",
|
|
"filename": "document.json",
|
|
"sections": [
|
|
{
|
|
"id": "section_1",
|
|
"content_type": "heading|paragraph|table|list|code",
|
|
"elements": [
|
|
// heading: {"level": 1, "text": "..."}
|
|
// paragraph: {"text": "..."}
|
|
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
|
|
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
|
|
// code: {"code": "...", "language": "..."}
|
|
],
|
|
"order": 1
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}"""
|
|
|
|
|
|
async def buildGenerationPrompt(
|
|
outputFormat: str,
|
|
userPrompt: str,
|
|
title: str,
|
|
extracted_content: str = None,
|
|
continuationContext: Dict[str, Any] = None
|
|
) -> str:
|
|
"""
|
|
Build the unified generation prompt using a single JSON template.
|
|
Simplified version without continuation logic in prompt.
|
|
|
|
Args:
|
|
outputFormat: Target output format (html, pdf, docx, etc.)
|
|
userPrompt: User's original prompt for document generation
|
|
title: Title for the document
|
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
|
continuationContext: Optional context from previous generation for continuation
|
|
|
|
Returns:
|
|
Complete generation prompt string
|
|
"""
|
|
# Create a template - let AI generate title if not provided
|
|
title_value = title if title else "Generated Document"
|
|
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
|
|
|
# Check if this is a continuation request
|
|
if continuationContext and continuationContext.get("section_count", 0) > 0:
|
|
# Continuation prompt - simple and focused
|
|
section_count = continuationContext.get("section_count", 0)
|
|
next_order = continuationContext.get("next_order", 1)
|
|
last_content_sample = continuationContext.get("last_content_sample", "")
|
|
|
|
generation_prompt = f"""Continue generating structured JSON content.
|
|
|
|
ORIGINAL REQUEST: "{userPrompt}"
|
|
TARGET FORMAT: {outputFormat}
|
|
TITLE: "{title_value}"
|
|
|
|
CONTEXT - Already generated:
|
|
- Total sections generated: {section_count}
|
|
- Next section order: {next_order}
|
|
- Last content: {last_content_sample}
|
|
|
|
YOUR TASK:
|
|
Continue where previous generation stopped.
|
|
Generate the NEXT section(s) starting with section_{next_order}.
|
|
Generate as much content as possible.
|
|
|
|
RULES:
|
|
- Follow the JSON template structure below exactly
|
|
- Fill sections with ACTUAL data based on the user request
|
|
- Use appropriate content_type for the data
|
|
- Generate REAL content, not summaries or placeholders
|
|
- Generate multiple sections if possible
|
|
|
|
Return raw JSON (no ```json blocks, no text before/after)
|
|
|
|
JSON Template
|
|
{json_template}
|
|
"""
|
|
else:
|
|
# First call - simple prompt without continuation complexity
|
|
generation_prompt = f"""Generate structured JSON content for document creation.
|
|
|
|
USER REQUEST: "{userPrompt}"
|
|
TARGET FORMAT: {outputFormat}
|
|
TITLE: "{title_value}"
|
|
|
|
INSTRUCTIONS:
|
|
- Follow the JSON template structure below exactly
|
|
- Emit only one JSON object in the response
|
|
- Fill sections with ACTUAL data based on the user request
|
|
- Use appropriate content_type for each section
|
|
- Generate REAL content, not summaries or instructions
|
|
- Structure content in sections with order 1, 2, 3...
|
|
- Each section should be complete before next
|
|
- Generate as much content as possible
|
|
|
|
Return raw JSON (no ```json blocks, no text before/after)
|
|
|
|
JSON Template
|
|
{json_template}
|
|
"""
|
|
|
|
# If we have extracted content, prepend it to the prompt
|
|
if extracted_content:
|
|
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
|
{extracted_content}
|
|
|
|
{generation_prompt}"""
|
|
|
|
return generation_prompt.strip()
|
|
|