204 lines
7.3 KiB
Python
204 lines
7.3 KiB
Python
"""
|
|
Prompt builder for document generation.
|
|
This module builds prompts for generating documents from extracted content.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Centralized JSON structure template for document generation
|
|
# Includes examples for all content types so AI knows the structure patterns
|
|
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
|
"metadata": {
|
|
"split_strategy": "single_document",
|
|
"source_documents": [],
|
|
"extraction_method": "ai_generation"
|
|
},
|
|
"documents": [
|
|
{
|
|
"id": "doc_1",
|
|
"title": "{{DOCUMENT_TITLE}}",
|
|
"filename": "document.json",
|
|
"sections": [
|
|
{
|
|
"id": "section_heading_example",
|
|
"content_type": "heading",
|
|
"elements": [
|
|
{"level": 1, "text": "Heading Text"}
|
|
],
|
|
"order": 0
|
|
},
|
|
{
|
|
"id": "section_paragraph_example",
|
|
"content_type": "paragraph",
|
|
"elements": [
|
|
{"text": "Paragraph text content"}
|
|
],
|
|
"order": 0
|
|
},
|
|
{
|
|
"id": "section_list_example",
|
|
"content_type": "list",
|
|
"elements": [
|
|
{
|
|
"items": [
|
|
{"text": "Item 1"},
|
|
{"text": "Item 2"}
|
|
],
|
|
"list_type": "numbered"
|
|
}
|
|
],
|
|
"order": 0
|
|
},
|
|
{
|
|
"id": "section_table_example",
|
|
"content_type": "table",
|
|
"elements": [
|
|
{
|
|
"headers": ["Column 1", "Column 2"],
|
|
"rows": [
|
|
["Row 1 Col 1", "Row 1 Col 2"],
|
|
["Row 2 Col 1", "Row 2 Col 2"]
|
|
],
|
|
"caption": "Table caption"
|
|
}
|
|
],
|
|
"order": 0
|
|
},
|
|
{
|
|
"id": "section_code_example",
|
|
"content_type": "code",
|
|
"elements": [
|
|
{
|
|
"code": "function example() { return true; }",
|
|
"language": "javascript"
|
|
}
|
|
],
|
|
"order": 0
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}"""
|
|
|
|
|
|
async def buildGenerationPrompt(
|
|
outputFormat: str,
|
|
userPrompt: str,
|
|
title: str,
|
|
extracted_content: str = None,
|
|
continuationContext: Dict[str, Any] = None
|
|
) -> str:
|
|
"""
|
|
Build the unified generation prompt using a single JSON template.
|
|
Generic solution that works for any user request.
|
|
|
|
Args:
|
|
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
|
|
userPrompt: User's original prompt for document generation
|
|
title: Title for the document
|
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
|
continuationContext: Optional context from previous generation for continuation
|
|
|
|
Returns:
|
|
Complete generation prompt string
|
|
"""
|
|
# Create a template - let AI generate title if not provided
|
|
title_value = title if title else "Generated Document"
|
|
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
|
|
|
# Build prompt based on whether this is a continuation or first call
|
|
# Check if we have valid continuation context with actual JSON fragment
|
|
has_continuation = (
|
|
continuationContext
|
|
and continuationContext.get("section_count", 0) > 0
|
|
and continuationContext.get("last_raw_json", "")
|
|
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
|
)
|
|
|
|
if has_continuation:
|
|
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
|
|
last_raw_json = continuationContext.get("last_raw_json", "")
|
|
last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
|
|
last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
|
|
total_items_count = continuationContext.get("total_items_count", 0)
|
|
|
|
# Show the last few items to indicate where to continue (limit fragment size)
|
|
# Extract just the ending portion of the JSON to show where it cut off
|
|
fragment_snippet = ""
|
|
if last_raw_json:
|
|
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
|
|
fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
|
|
# Add ellipsis if truncated
|
|
if len(last_raw_json) > 1500:
|
|
fragment_snippet = "..." + fragment_snippet
|
|
|
|
# Build clear continuation guidance
|
|
continuation_guidance = []
|
|
|
|
if total_items_count > 0:
|
|
continuation_guidance.append(f"You have already generated {total_items_count} items.")
|
|
|
|
# Show the last complete item object (full object format)
|
|
if last_item_object:
|
|
continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
|
|
|
|
continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
|
|
|
|
generation_prompt = f"""User request: "{userPrompt}"
|
|
|
|
The user already received part of the response. Continue generating the remaining content.
|
|
|
|
{continuation_text}
|
|
|
|
Previous response ended here (JSON was cut off at this point):
|
|
```json
|
|
{fragment_snippet if fragment_snippet else "(No fragment available)"}
|
|
```
|
|
|
|
JSON structure template:
|
|
{json_template}
|
|
|
|
Instructions:
|
|
- Return full JSON structure (metadata + documents + sections)
|
|
- Continue from where it stopped - add NEW items only, do not repeat old items
|
|
- Use the element structures shown in the template
|
|
- Generate all remaining content needed to complete the user request
|
|
- Fill with actual content (no comments, no "Add more..." text, no placeholders)
|
|
- When fully complete, add "complete_response": true at root level
|
|
- Return only valid JSON (no comments, no markdown blocks)
|
|
|
|
Continue generating:
|
|
"""
|
|
else:
|
|
# FIRST CALL - initial generation
|
|
generation_prompt = f"""User request: "{userPrompt}"
|
|
|
|
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
|
|
|
|
JSON structure template (reference only - shows the pattern):
|
|
{json_template}
|
|
|
|
Instructions:
|
|
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
|
|
- Do NOT continue from the template examples above - create your own sections
|
|
- Generate content based on the user request
|
|
- Use the element structures shown in the template (heading, paragraph, list, table, code)
|
|
- Create your own section IDs (do not use the example IDs like "section_heading_example")
|
|
- When fully complete, add "complete_response": true at root level
|
|
- Return only valid JSON (no comments, no markdown blocks, no text before/after)
|
|
|
|
Generate your complete response starting from {{"metadata": ...}}:
|
|
"""
|
|
|
|
# If we have extracted content, prepend it to the prompt
|
|
if extracted_content:
|
|
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
|
{extracted_content}
|
|
|
|
{generation_prompt}"""
|
|
|
|
return generation_prompt.strip()
|
|
|