gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py
2025-11-02 15:36:02 +01:00

139 lines
5.8 KiB
Python

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""
import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
async def buildGenerationPrompt(
outputFormat: str,
userPrompt: str,
title: str,
extracted_content: str = None,
continuationContext: Dict[str, Any] = None
) -> str:
"""
Build the unified generation prompt using a single JSON template.
Generic solution that works for any user request.
Args:
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
userPrompt: User's original prompt for document generation
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
continuationContext: Optional context from previous generation for continuation
Returns:
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
titleValue = title if title else "Generated Document"
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
# Build prompt based on whether this is a continuation or first call
# Check if we have valid continuation context with actual JSON fragment
hasContinuation = (
continuationContext
and continuationContext.get("section_count", 0) > 0
and continuationContext.get("last_raw_json", "")
and continuationContext.get("last_raw_json", "").strip() != "{}"
)
if hasContinuation:
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
lastRawJson = continuationContext.get("last_raw_json", "")
lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
totalItemsCount = continuationContext.get("total_items_count", 0)
# Show the last few items to indicate where to continue (limit fragment size)
# Extract just the ending portion of the JSON to show where it cut off
fragmentSnippet = ""
if lastRawJson:
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
# Add ellipsis if truncated
if len(lastRawJson) > 1500:
fragmentSnippet = "..." + fragmentSnippet
# Build clear continuation guidance
continuationGuidance = []
if totalItemsCount > 0:
continuationGuidance.append(f"You have already generated {totalItemsCount} items.")
# Show the last complete item object (full object format)
if lastItemObject:
continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")
continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
# PROMPT FOR CONTINUATION
generationPrompt = f"""User request: "{userPrompt}"
The user already received part of the response. Continue generating the remaining content.
{continuationText}
Previous response ended here (JSON was cut off at this point):
```json
{fragmentSnippet if fragmentSnippet else "(No fragment available)"}
```
JSON structure template:
{jsonTemplate}
Instructions:
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use ONLY the element structures shown in the template.
- Continue from where it stopped — add NEW items only; do not repeat existing items.
- Generate remaining content to complete the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- Fill with actual content (no placeholders or instructional text such as "Add more...").
- IMPORTANT: Ensure "filename" in each document has meaningful name with appropriate extension matching the content.
- When the request is fully satisfied, add "complete_response": true at root level.
- Output JSON only; no markdown fences or extra text.
IMPORTANT: Before responding, analyse the remaining data to fully satisfy user request.
Continue generating:
"""
else:
# PROMPT FOR FIRST CALL
generationPrompt = f"""User request: "{userPrompt}"
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
JSON structure template:
{jsonTemplate}
Instructions:
- Start with {{"metadata": ...}} — return COMPLETE, STRICT JSON.
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- When the request is fully satisfied, add "complete_response": true at root level.
- Output JSON only; no markdown fences or extra text.
Generate your complete response starting from {{"metadata": ...}}:
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generationPrompt}"""
return generationPrompt.strip()