200 lines
8.9 KiB
Python
200 lines
8.9 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Prompt builder for document generation.
|
|
This module builds prompts for generating documents from extracted content.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any
|
|
from modules.datamodels.datamodelJson import jsonTemplateDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
async def buildGenerationPrompt(
|
|
outputFormat: str,
|
|
userPrompt: str,
|
|
title: str,
|
|
extracted_content: str = None,
|
|
continuationContext: Dict[str, Any] = None,
|
|
services: Any = None,
|
|
useContentParts: bool = False # ARCHITECTURE: If True, don't include full content in prompt (ContentParts will be used directly)
|
|
) -> str:
|
|
"""
|
|
Build the unified generation prompt using a single JSON template.
|
|
Generic solution that works for any user request.
|
|
|
|
Args:
|
|
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
|
|
userPrompt: User's original prompt for document generation
|
|
title: Title for the document
|
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
|
continuationContext: Optional context from previous generation for continuation
|
|
services: Optional services instance for accessing user language
|
|
|
|
Returns:
|
|
Complete generation prompt string
|
|
"""
|
|
# Extract user language for document language instruction
|
|
userLanguage = 'en' # Default fallback
|
|
if services:
|
|
try:
|
|
# Prefer detected language if available
|
|
if hasattr(services, 'currentUserLanguage') and services.currentUserLanguage:
|
|
userLanguage = services.currentUserLanguage
|
|
elif hasattr(services, 'user') and services.user and hasattr(services.user, 'language'):
|
|
userLanguage = services.user.language
|
|
except Exception:
|
|
pass
|
|
|
|
# Create a template - let AI generate title if not provided
|
|
titleValue = title if title else "Generated Document"
|
|
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
|
|
|
|
# Build prompt based on whether this is a continuation or first call
|
|
# Check if we have valid continuation context with actual JSON fragment
|
|
# CRITICAL: Allow continuation even if section_count is 0 (broken JSON that couldn't be parsed)
|
|
# as long as we have last_raw_json - this handles cases where JSON is too broken to extract sections
|
|
hasContinuation = (
|
|
continuationContext
|
|
and continuationContext.get("last_raw_json", "")
|
|
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
|
)
|
|
|
|
if hasContinuation:
|
|
# CONTINUATION PROMPT - use centralized jsonContinuation system
|
|
delivered_summary = continuationContext.get("delivered_summary", "")
|
|
|
|
# Use centralized system: overlap_context and hierarchy_context from jsonContinuation.getContexts()
|
|
overlap_context = continuationContext.get("overlap_context")
|
|
hierarchy_context = continuationContext.get("hierarchy_context")
|
|
|
|
# Build continuation text with delivered summary and cut-off information
|
|
# CRITICAL: Always include cut-off information if available (per loop_plan.md)
|
|
continuationText = f"{delivered_summary}\n\n"
|
|
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
|
|
|
|
# Add cut-off point information using centralized jsonContinuation contexts
|
|
# These are shown ONLY as REFERENCE to know where generation stopped
|
|
if hierarchy_context:
|
|
continuationText += "# REFERENCE: Structure context (already delivered - DO NOT repeat):\n"
|
|
continuationText += f"{hierarchy_context}\n\n"
|
|
|
|
if overlap_context:
|
|
continuationText += "# REFERENCE: Overlap context - incomplete element at cut point (DO NOT repeat):\n"
|
|
continuationText += f"{overlap_context}\n\n"
|
|
|
|
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
|
|
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
|
|
continuationText += "Start directly with the next element/section that should follow.\n\n"
|
|
|
|
# PROMPT FOR CONTINUATION
|
|
generationPrompt = f"""{'='*80}
|
|
USER REQUEST / USER PROMPT:
|
|
{'='*80}
|
|
{userPrompt}
|
|
{'='*80}
|
|
END OF USER REQUEST / USER PROMPT
|
|
{'='*80}
|
|
|
|
⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
|
|
|
|
LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language.
|
|
|
|
{continuationText}
|
|
|
|
JSON structure template:
|
|
{jsonTemplate}
|
|
|
|
Rules:
|
|
- Return ONLY valid JSON (no comments, no trailing commas, double quotes only).
|
|
- Reference elements shown above are ALREADY DELIVERED - DO NOT repeat them.
|
|
- Generate ONLY the remaining content that comes AFTER the reference elements.
|
|
- DO NOT regenerate the entire JSON structure - start directly with what comes next.
|
|
- All content must be in the language '{userLanguage}'.
|
|
- Output JSON only; no markdown fences or extra text.
|
|
|
|
Continue generating the remaining content now.
|
|
"""
|
|
else:
|
|
|
|
# PROMPT FOR FIRST CALL
|
|
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
|
|
|
|
# ARCHITECTURE: If useContentParts=True, don't include full content in prompt
|
|
# ContentParts will be passed directly to callAi for model-aware chunking
|
|
if extracted_content and not useContentParts:
|
|
# If we have extracted content, put it FIRST and make it very clear it's the source data
|
|
generationPrompt = f"""{'='*80}
|
|
USER REQUEST / USER PROMPT:
|
|
{'='*80}
|
|
{userPrompt}
|
|
{'='*80}
|
|
END OF USER REQUEST / USER PROMPT
|
|
{'='*80}
|
|
|
|
{'='*80}
|
|
⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
|
|
{'='*80}
|
|
The content below contains the ACTUAL DATA extracted from the source documents.
|
|
You MUST use this data - DO NOT generate fake or example data.
|
|
{'='*80}
|
|
EXTRACTED CONTENT FROM DOCUMENTS:
|
|
{'='*80}
|
|
{extracted_content}
|
|
{'='*80}
|
|
END OF EXTRACTED CONTENT
|
|
{'='*80}
|
|
|
|
LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language. If the extracted content is in a different language, translate it to '{userLanguage}' while preserving the structure and meaning.
|
|
|
|
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
|
|
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
|
|
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
|
|
|
|
JSON structure template (structure only - use data from EXTRACTED CONTENT above):
|
|
{jsonTemplate}
|
|
|
|
Instructions:
|
|
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
|
|
- Do NOT reuse example section IDs; create your own.
|
|
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
|
|
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
|
|
- All content must be in the language '{userLanguage}'.
|
|
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
|
|
- Output JSON only; no markdown fences or extra text.
|
|
|
|
Generate your complete response using the extracted content data.
|
|
"""
|
|
else:
|
|
# No extracted content - generate from scratch
|
|
generationPrompt = f"""{'='*80}
|
|
USER REQUEST / USER PROMPT:
|
|
{'='*80}
|
|
{userPrompt}
|
|
{'='*80}
|
|
END OF USER REQUEST / USER PROMPT
|
|
{'='*80}
|
|
|
|
LANGUAGE REQUIREMENT: All generated content must be in the language '{userLanguage}'. Generate all text, headings, paragraphs, and content in this language.
|
|
|
|
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
|
|
|
|
JSON structure template:
|
|
{jsonTemplate}
|
|
|
|
Instructions:
|
|
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
|
|
- Do NOT reuse example section IDs; create your own.
|
|
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
|
|
- All content must be in the language '{userLanguage}'.
|
|
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
|
|
- Output JSON only; no markdown fences or extra text.
|
|
|
|
Generate your complete response.
|
|
"""
|
|
|
|
return generationPrompt.strip()
|
|
|