gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py
2025-10-29 00:38:57 +01:00

134 lines
4.2 KiB
Python

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""
import logging
from typing import Dict, Any
logger = logging.getLogger(__name__)
# Centralized JSON structure template for document generation
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [
{
"id": "doc_1",
"title": "{{DOCUMENT_TITLE}}",
"filename": "document.json",
"sections": [
{
"id": "section_1",
"content_type": "heading|paragraph|table|list|code",
"elements": [
// heading: {"level": 1, "text": "..."}
// paragraph: {"text": "..."}
// table: {"headers": [...], "rows": [[...]], "caption": "..."}
// list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"}
// code: {"code": "...", "language": "..."}
],
"order": 1
}
]
}
]
}"""
async def buildGenerationPrompt(
outputFormat: str,
userPrompt: str,
title: str,
extracted_content: str = None,
continuationContext: Dict[str, Any] = None
) -> str:
"""
Build the unified generation prompt using a single JSON template.
Simplified version without continuation logic in prompt.
Args:
outputFormat: Target output format (html, pdf, docx, etc.)
userPrompt: User's original prompt for document generation
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
continuationContext: Optional context from previous generation for continuation
Returns:
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
title_value = title if title else "Generated Document"
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
# Check if this is a continuation request
if continuationContext and continuationContext.get("section_count", 0) > 0:
# Continuation prompt - simple and focused
section_count = continuationContext.get("section_count", 0)
next_order = continuationContext.get("next_order", 1)
last_content_sample = continuationContext.get("last_content_sample", "")
generation_prompt = f"""Continue generating structured JSON content.
ORIGINAL REQUEST: "{userPrompt}"
TARGET FORMAT: {outputFormat}
TITLE: "{title_value}"
CONTEXT - Already generated:
- Total sections generated: {section_count}
- Next section order: {next_order}
- Last content: {last_content_sample}
YOUR TASK:
Continue where previous generation stopped.
Generate the NEXT section(s) starting with section_{next_order}.
Generate as much content as possible.
RULES:
- Follow the JSON template structure below exactly
- Fill sections with ACTUAL data based on the user request
- Use appropriate content_type for the data
- Generate REAL content, not summaries or placeholders
- Generate multiple sections if possible
Return raw JSON (no ```json blocks, no text before/after)
JSON Template
{json_template}
"""
else:
# First call - simple prompt without continuation complexity
generation_prompt = f"""Generate structured JSON content for document creation.
USER REQUEST: "{userPrompt}"
TARGET FORMAT: {outputFormat}
TITLE: "{title_value}"
INSTRUCTIONS:
- Follow the JSON template structure below exactly
- Emit only one JSON object in the response
- Fill sections with ACTUAL data based on the user request
- Use appropriate content_type for each section
- Generate REAL content, not summaries or instructions
- Structure content in sections with order 1, 2, 3...
- Each section should be complete before next
- Generate as much content as possible
Return raw JSON (no ```json blocks, no text before/after)
JSON Template
{json_template}
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generation_prompt}"""
return generation_prompt.strip()