gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py
2025-11-19 23:51:25 +01:00

198 lines
10 KiB
Python

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""
import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
async def buildGenerationPrompt(
outputFormat: str,
userPrompt: str,
title: str,
extracted_content: str = None,
continuationContext: Dict[str, Any] = None
) -> str:
"""
Build the unified generation prompt using a single JSON template.
Generic solution that works for any user request.
Args:
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
userPrompt: User's original prompt for document generation
title: Title for the document
extracted_content: Optional extracted content from documents to prepend to prompt
continuationContext: Optional context from previous generation for continuation
Returns:
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
titleValue = title if title else "Generated Document"
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
# Build prompt based on whether this is a continuation or first call
# Check if we have valid continuation context with actual JSON fragment
hasContinuation = (
continuationContext
and continuationContext.get("section_count", 0) > 0
and continuationContext.get("last_raw_json", "")
and continuationContext.get("last_raw_json", "").strip() != "{}"
)
if hasContinuation:
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
lastItemObject = continuationContext.get("last_item_object", "") # Last complete sub-element (row, item, line, etc.)
totalItemsCount = continuationContext.get("total_items_count", 0)
# CRITICAL: Only use lastItemObject - it contains the last complete sub-element
# If extraction failed and lastItemObject is empty, we'll show a message that extraction failed
# No need for fragmentSnippet - it's redundant and causes duplication
# Build clear continuation guidance with PROGRESS STATISTICS from all accumulated sections
# This helps AI understand completion status without seeing entire content
# GENERIC approach: Works for all task types (books, reports, code, lists, tables, etc.)
continuationGuidance = []
progressStats = continuationContext.get("progress_stats", {})
totalRows = progressStats.get("total_rows", 0)
totalItems = progressStats.get("total_items", 0)
totalCodeLines = progressStats.get("total_code_lines", 0)
totalParagraphs = progressStats.get("total_paragraphs", 0)
totalHeadings = progressStats.get("total_headings", 0)
sectionCount = progressStats.get("section_count", 0)
contentTypeCount = progressStats.get("content_type_count", 0)
lastContentType = progressStats.get("last_content_type")
# CRITICAL: Filter progress stats based on Definition of Done from taskIntent
# Only show KPIs that are relevant for this specific action/task
taskIntent = continuationContext.get("taskIntent", {})
definitionOfDone = taskIntent.get("definitionOfDone", {}) if isinstance(taskIntent, dict) else {}
# Build comprehensive progress information (filtered by DoD if available)
progressParts = []
# Only show progress metrics that are relevant based on DoD KPIs
# If DoD specifies minTableRows, show rows; if minListItems, show items; etc.
if definitionOfDone:
# Filter based on DoD KPIs - only show metrics that matter for this task
if definitionOfDone.get("minTableRows", 0) > 0 and totalRows > 0:
progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}")
if definitionOfDone.get("minListItems", 0) > 0 and totalItems > 0:
progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}")
if definitionOfDone.get("minCodeLines", 0) > 0 and totalCodeLines > 0:
progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data")
if definitionOfDone.get("minParagraphs", 0) > 0 and totalParagraphs > 0:
progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}")
if definitionOfDone.get("minHeadings", 0) > 0 and totalHeadings > 0:
progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}")
if definitionOfDone.get("minSections", 0) > 0 and sectionCount > 0:
progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}")
# Only show contentSize if no other metrics are available (it's less informative)
# Prefer showing rows/items/lines over characters
if not progressParts and definitionOfDone.get("minContentSize", 0) > 0:
totalContentSize = progressStats.get("total_content_size", 0)
if totalContentSize > 0:
progressParts.append(f"{totalContentSize} characters")
else:
# No DoD available - show all progress metrics (fallback)
if sectionCount > 0:
progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}")
if totalHeadings > 0:
progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}")
if totalParagraphs > 0:
progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}")
if totalRows > 0:
progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}")
if totalItems > 0:
progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}")
if totalCodeLines > 0:
progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data")
if contentTypeCount > 1:
progressParts.append(f"{contentTypeCount} different content types")
if progressParts:
continuationGuidance.append(f"PROGRESS: You have already generated: {', '.join(progressParts)}.")
elif totalItemsCount > 0:
# Fallback to old totalItemsCount if progress_stats not available
continuationGuidance.append(f"PROGRESS: You have already generated {totalItemsCount} items.")
# Show the last complete item AND cut item for continuation point
# CRITICAL: AI needs both to know where to continue
cutItemObject = continuationContext.get("cut_item_object")
contentTypeForItems = continuationContext.get("content_type_for_items")
if lastItemObject:
if cutItemObject:
# Both complete and cut items available - show both
continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}")
continuationGuidance.append(f"Incomplete/cut {contentTypeForItems or 'item'} at the end: {cutItemObject}")
continuationGuidance.append(f"Continue from the incomplete item above - complete it first, then add NEW items.")
else:
# Only complete item available
continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}")
continuationGuidance.append(f"Continue with the NEXT item after this.")
continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
# PROMPT FOR CONTINUATION
generationPrompt = f"""User request: "{userPrompt}"
NOTE: The user already received part of the response.
TASK: Continue generating the remaining content.
{continuationText}
JSON structure template:
{jsonTemplate}
Instructions:
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use ONLY the element structures shown in the template.
- Continue from where it stopped - add NEW items only; do not repeat existing items.
- Generate remaining content to complete the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- Fill with actual content (no placeholders or instructional text such as "Add more...").
- IMPORTANT: Ensure "filename" in each document has meaningful name with appropriate extension matching the content.
- Output JSON only; no markdown fences or extra text.
IMPORTANT: Before responding, analyse the remaining data to fully satisfy user request.
Continue generating:
"""
else:
# PROMPT FOR FIRST CALL
generationPrompt = f"""User request: "{userPrompt}"
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
JSON structure template:
{jsonTemplate}
Instructions:
- Start with {{"metadata": ...}} - return COMPLETE, STRICT JSON.
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
Generate your complete response starting from {{"metadata": ...}}:
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generationPrompt}"""
return generationPrompt.strip()