198 lines
10 KiB
Python
198 lines
10 KiB
Python
"""
|
|
Prompt builder for document generation.
|
|
This module builds prompts for generating documents from extracted content.
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any
|
|
from modules.datamodels.datamodelJson import jsonTemplateDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
async def buildGenerationPrompt(
|
|
outputFormat: str,
|
|
userPrompt: str,
|
|
title: str,
|
|
extracted_content: str = None,
|
|
continuationContext: Dict[str, Any] = None
|
|
) -> str:
|
|
"""
|
|
Build the unified generation prompt using a single JSON template.
|
|
Generic solution that works for any user request.
|
|
|
|
Args:
|
|
outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
|
|
userPrompt: User's original prompt for document generation
|
|
title: Title for the document
|
|
extracted_content: Optional extracted content from documents to prepend to prompt
|
|
continuationContext: Optional context from previous generation for continuation
|
|
|
|
Returns:
|
|
Complete generation prompt string
|
|
"""
|
|
# Create a template - let AI generate title if not provided
|
|
titleValue = title if title else "Generated Document"
|
|
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
|
|
|
|
# Build prompt based on whether this is a continuation or first call
|
|
# Check if we have valid continuation context with actual JSON fragment
|
|
hasContinuation = (
|
|
continuationContext
|
|
and continuationContext.get("section_count", 0) > 0
|
|
and continuationContext.get("last_raw_json", "")
|
|
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
|
)
|
|
|
|
if hasContinuation:
|
|
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
|
|
lastItemObject = continuationContext.get("last_item_object", "") # Last complete sub-element (row, item, line, etc.)
|
|
totalItemsCount = continuationContext.get("total_items_count", 0)
|
|
|
|
# CRITICAL: Only use lastItemObject - it contains the last complete sub-element
|
|
# If extraction failed and lastItemObject is empty, we'll show a message that extraction failed
|
|
# No need for fragmentSnippet - it's redundant and causes duplication
|
|
|
|
# Build clear continuation guidance with PROGRESS STATISTICS from all accumulated sections
|
|
# This helps AI understand completion status without seeing entire content
|
|
# GENERIC approach: Works for all task types (books, reports, code, lists, tables, etc.)
|
|
continuationGuidance = []
|
|
|
|
progressStats = continuationContext.get("progress_stats", {})
|
|
totalRows = progressStats.get("total_rows", 0)
|
|
totalItems = progressStats.get("total_items", 0)
|
|
totalCodeLines = progressStats.get("total_code_lines", 0)
|
|
totalParagraphs = progressStats.get("total_paragraphs", 0)
|
|
totalHeadings = progressStats.get("total_headings", 0)
|
|
sectionCount = progressStats.get("section_count", 0)
|
|
contentTypeCount = progressStats.get("content_type_count", 0)
|
|
lastContentType = progressStats.get("last_content_type")
|
|
|
|
# CRITICAL: Filter progress stats based on Definition of Done from taskIntent
|
|
# Only show KPIs that are relevant for this specific action/task
|
|
taskIntent = continuationContext.get("taskIntent", {})
|
|
definitionOfDone = taskIntent.get("definitionOfDone", {}) if isinstance(taskIntent, dict) else {}
|
|
|
|
# Build comprehensive progress information (filtered by DoD if available)
|
|
progressParts = []
|
|
|
|
# Only show progress metrics that are relevant based on DoD KPIs
|
|
# If DoD specifies minTableRows, show rows; if minListItems, show items; etc.
|
|
if definitionOfDone:
|
|
# Filter based on DoD KPIs - only show metrics that matter for this task
|
|
if definitionOfDone.get("minTableRows", 0) > 0 and totalRows > 0:
|
|
progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}")
|
|
if definitionOfDone.get("minListItems", 0) > 0 and totalItems > 0:
|
|
progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}")
|
|
if definitionOfDone.get("minCodeLines", 0) > 0 and totalCodeLines > 0:
|
|
progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data")
|
|
if definitionOfDone.get("minParagraphs", 0) > 0 and totalParagraphs > 0:
|
|
progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}")
|
|
if definitionOfDone.get("minHeadings", 0) > 0 and totalHeadings > 0:
|
|
progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}")
|
|
if definitionOfDone.get("minSections", 0) > 0 and sectionCount > 0:
|
|
progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}")
|
|
# Only show contentSize if no other metrics are available (it's less informative)
|
|
# Prefer showing rows/items/lines over characters
|
|
if not progressParts and definitionOfDone.get("minContentSize", 0) > 0:
|
|
totalContentSize = progressStats.get("total_content_size", 0)
|
|
if totalContentSize > 0:
|
|
progressParts.append(f"{totalContentSize} characters")
|
|
else:
|
|
# No DoD available - show all progress metrics (fallback)
|
|
if sectionCount > 0:
|
|
progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}")
|
|
if totalHeadings > 0:
|
|
progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}")
|
|
if totalParagraphs > 0:
|
|
progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}")
|
|
if totalRows > 0:
|
|
progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}")
|
|
if totalItems > 0:
|
|
progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}")
|
|
if totalCodeLines > 0:
|
|
progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data")
|
|
if contentTypeCount > 1:
|
|
progressParts.append(f"{contentTypeCount} different content types")
|
|
|
|
if progressParts:
|
|
continuationGuidance.append(f"PROGRESS: You have already generated: {', '.join(progressParts)}.")
|
|
elif totalItemsCount > 0:
|
|
# Fallback to old totalItemsCount if progress_stats not available
|
|
continuationGuidance.append(f"PROGRESS: You have already generated {totalItemsCount} items.")
|
|
|
|
# Show the last complete item AND cut item for continuation point
|
|
# CRITICAL: AI needs both to know where to continue
|
|
cutItemObject = continuationContext.get("cut_item_object")
|
|
contentTypeForItems = continuationContext.get("content_type_for_items")
|
|
|
|
if lastItemObject:
|
|
if cutItemObject:
|
|
# Both complete and cut items available - show both
|
|
continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}")
|
|
continuationGuidance.append(f"Incomplete/cut {contentTypeForItems or 'item'} at the end: {cutItemObject}")
|
|
continuationGuidance.append(f"Continue from the incomplete item above - complete it first, then add NEW items.")
|
|
else:
|
|
# Only complete item available
|
|
continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}")
|
|
continuationGuidance.append(f"Continue with the NEXT item after this.")
|
|
|
|
continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
|
|
|
|
# PROMPT FOR CONTINUATION
|
|
generationPrompt = f"""User request: "{userPrompt}"
|
|
|
|
NOTE: The user already received part of the response.
|
|
TASK: Continue generating the remaining content.
|
|
|
|
{continuationText}
|
|
|
|
JSON structure template:
|
|
{jsonTemplate}
|
|
|
|
Instructions:
|
|
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
|
|
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
|
|
- Use ONLY the element structures shown in the template.
|
|
- Continue from where it stopped - add NEW items only; do not repeat existing items.
|
|
- Generate remaining content to complete the user request. Do NOT just give an instruction or comments. Deliver the complete response.
|
|
- Fill with actual content (no placeholders or instructional text such as "Add more...").
|
|
- IMPORTANT: Ensure "filename" in each document has meaningful name with appropriate extension matching the content.
|
|
- Output JSON only; no markdown fences or extra text.
|
|
|
|
IMPORTANT: Before responding, analyse the remaining data to fully satisfy user request.
|
|
|
|
Continue generating:
|
|
"""
|
|
else:
|
|
|
|
# PROMPT FOR FIRST CALL
|
|
|
|
generationPrompt = f"""User request: "{userPrompt}"
|
|
|
|
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
|
|
|
|
JSON structure template:
|
|
{jsonTemplate}
|
|
|
|
Instructions:
|
|
- Start with {{"metadata": ...}} - return COMPLETE, STRICT JSON.
|
|
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
|
|
- Do NOT reuse example section IDs; create your own.
|
|
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
|
|
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
|
|
- Output JSON only; no markdown fences or extra text.
|
|
|
|
Generate your complete response starting from {{"metadata": ...}}:
|
|
"""
|
|
|
|
# If we have extracted content, prepend it to the prompt
|
|
if extracted_content:
|
|
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
|
{extracted_content}
|
|
|
|
{generationPrompt}"""
|
|
|
|
return generationPrompt.strip()
|
|
|