gateway/modules/services/serviceGeneration/subPromptBuilderGeneration.py

"""
Prompt builder for document generation.
This module builds prompts for generating documents from extracted content.
"""

import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument

logger = logging.getLogger(__name__)


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    extracted_content: str = None,
    continuationContext: Dict[str, Any] = None
) -> str:
    """
    Build the unified generation prompt using a single JSON template.
    Generic solution that works for any user request.

    Args:
        outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt
        userPrompt: User's original prompt for document generation
        title: Title for the document
        extracted_content: Optional extracted content from documents to prepend to prompt
        continuationContext: Optional context from previous generation for continuation

    Returns:
        Complete generation prompt string
    """
    # Create a template - let AI generate title if not provided
    titleValue = title if title else "Generated Document"
    jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)

    # Build prompt based on whether this is a continuation or first call
    # Check if we have valid continuation context with actual JSON fragment
    hasContinuation = (
        continuationContext
        and continuationContext.get("section_count", 0) > 0
        and continuationContext.get("last_raw_json", "")
        and continuationContext.get("last_raw_json", "").strip() != "{}"
    )

    if hasContinuation:
        # CONTINUATION PROMPT - user already received first part, continue from where it stopped
        lastItemObject = continuationContext.get("last_item_object", "")  # Last complete sub-element (row, item, line, etc.)
        totalItemsCount = continuationContext.get("total_items_count", 0)

        # CRITICAL: Only use lastItemObject - it contains the last complete sub-element
        # If extraction failed and lastItemObject is empty, we'll show a message that extraction failed
        # No need for fragmentSnippet - it's redundant and causes duplication

        # Build clear continuation guidance with PROGRESS STATISTICS from all accumulated sections
        # This helps AI understand completion status without seeing entire content
        # GENERIC approach: Works for all task types (books, reports, code, lists, tables, etc.)
        continuationGuidance = []

        progressStats = continuationContext.get("progress_stats", {})
        totalRows = progressStats.get("total_rows", 0)
        totalItems = progressStats.get("total_items", 0)
        totalCodeLines = progressStats.get("total_code_lines", 0)
        totalParagraphs = progressStats.get("total_paragraphs", 0)
        totalHeadings = progressStats.get("total_headings", 0)
        sectionCount = progressStats.get("section_count", 0)
        contentTypeCount = progressStats.get("content_type_count", 0)
        lastContentType = progressStats.get("last_content_type")

        # CRITICAL: Filter progress stats based on Definition of Done from taskIntent
        # Only show KPIs that are relevant for this specific action/task
        taskIntent = continuationContext.get("taskIntent", {})
        definitionOfDone = taskIntent.get("definitionOfDone", {}) if isinstance(taskIntent, dict) else {}

        # Build comprehensive progress information (filtered by DoD if available)
        progressParts = []

        # Only show progress metrics that are relevant based on DoD KPIs
        # If DoD specifies minTableRows, show rows; if minListItems, show items; etc.
        if definitionOfDone:
            # Filter based on DoD KPIs - only show metrics that matter for this task
            if definitionOfDone.get("minTableRows", 0) > 0 and totalRows > 0:
                progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}")
            if definitionOfDone.get("minListItems", 0) > 0 and totalItems > 0:
                progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}")
            if definitionOfDone.get("minCodeLines", 0) > 0 and totalCodeLines > 0:
                progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data")
            if definitionOfDone.get("minParagraphs", 0) > 0 and totalParagraphs > 0:
                progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}")
            if definitionOfDone.get("minHeadings", 0) > 0 and totalHeadings > 0:
                progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}")
            if definitionOfDone.get("minSections", 0) > 0 and sectionCount > 0:
                progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}")
            # Only show contentSize if no other metrics are available (it's less informative)
            # Prefer showing rows/items/lines over characters
            if not progressParts and definitionOfDone.get("minContentSize", 0) > 0:
                totalContentSize = progressStats.get("total_content_size", 0)
                if totalContentSize > 0:
                    progressParts.append(f"{totalContentSize} characters")
        else:
            # No DoD available - show all progress metrics (fallback)
            if sectionCount > 0:
                progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}")
            if totalHeadings > 0:
                progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}")
            if totalParagraphs > 0:
                progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}")
            if totalRows > 0:
                progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}")
            if totalItems > 0:
                progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}")
            if totalCodeLines > 0:
                progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data")
            if contentTypeCount > 1:
                progressParts.append(f"{contentTypeCount} different content types")

        if progressParts:
            continuationGuidance.append(f"PROGRESS: You have already generated: {', '.join(progressParts)}.")
        elif totalItemsCount > 0:
            # Fallback to old totalItemsCount if progress_stats not available
            continuationGuidance.append(f"PROGRESS: You have already generated {totalItemsCount} items.")

        # Show the last complete item AND cut item for continuation point
        # CRITICAL: AI needs both to know where to continue
        cutItemObject = continuationContext.get("cut_item_object")
        contentTypeForItems = continuationContext.get("content_type_for_items")

        if lastItemObject:
            if cutItemObject:
                # Both complete and cut items available - show both
                continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}")
                continuationGuidance.append(f"Incomplete/cut {contentTypeForItems or 'item'} at the end: {cutItemObject}")
                continuationGuidance.append(f"Continue from the incomplete item above - complete it first, then add NEW items.")
            else:
                # Only complete item available
                continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}")
                continuationGuidance.append(f"Continue with the NEXT item after this.")

        continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."

        # PROMPT FOR CONTINUATION
        generationPrompt = f"""User request: "{userPrompt}"

NOTE: The user already received part of the response.
TASK: Continue generating the remaining content.

{continuationText}

JSON structure template:
{jsonTemplate}

Instructions:
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use ONLY the element structures shown in the template.
- Continue from where it stopped - add NEW items only; do not repeat existing items.
- Generate remaining content to complete the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- Fill with actual content (no placeholders or instructional text such as "Add more...").
- IMPORTANT: Ensure "filename" in each document has meaningful name with appropriate extension matching the content.
- Output JSON only; no markdown fences or extra text.

IMPORTANT: Before responding, analyse the remaining data to fully satisfy user request.

Continue generating:
"""
    else:

        # PROMPT FOR FIRST CALL

        generationPrompt = f"""User request: "{userPrompt}"

Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.

JSON structure template:
{jsonTemplate}

Instructions:
- Start with {{"metadata": ...}} - return COMPLETE, STRICT JSON.
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.

Generate your complete response starting from {{"metadata": ...}}:
"""

    # If we have extracted content, prepend it to the prompt
    if extracted_content:
        generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}

{generationPrompt}"""

    return generationPrompt.strip()