gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py
2025-10-28 00:14:24 +01:00

89 lines
2.6 KiB
Python

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""
import logging
logger = logging.getLogger(__name__)
# Centralized JSON structure template for document generation
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [
{
"id": "doc_1",
"title": "{{DOCUMENT_TITLE}}",
"filename": "document.json",
"sections": [
{
"id": "section_1",
"content_type": "heading|paragraph|table|list|code",
"elements": [
// heading: {"level": 1, "text": "..."}
// paragraph: {"text": "..."}
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
// code: {"code": "...", "language": "..."}
],
"order": 1
}
]
}
],
"continuation": null
}"""
async def buildGenerationPrompt(
outputFormat: str,
userPrompt: str,
title: str,
extracted_content: str = None
) -> str:
"""
Build the unified generation prompt using a single JSON template.
Args:
outputFormat: Target output format (html, pdf, docx, etc.)
userPrompt: User's original prompt for document generation
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
Returns:
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
prompt_instruction = f"Use the following title: \"{title}\""
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title)
# Always use the proper generation prompt template with LOOP_INSTRUCTION
generation_prompt = f"""Generate structured JSON content for document creation.
USER CONTEXT: "{userPrompt}"
TARGET FORMAT: {outputFormat}
TITLE INSTRUCTION: {prompt_instruction}
LOOP_INSTRUCTION
RULES:
- Follow the template structure below exactly; emit only one JSON object in the response
- Fill sections with content based on the user request
- Use appropriate content_type
{json_template}
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generation_prompt}"""
return generation_prompt.strip()