""" Prompt builder for document generation. This module builds prompts for generating documents from extracted content. """ import logging from typing import Dict, Any from modules.datamodels.datamodelJson import jsonTemplateDocument logger = logging.getLogger(__name__) async def buildGenerationPrompt( outputFormat: str, userPrompt: str, title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None ) -> str: """ Build the unified generation prompt using a single JSON template. Generic solution that works for any user request. Args: outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt userPrompt: User's original prompt for document generation title: Title for the document extracted_content: Optional extracted content from documents to prepend to prompt continuationContext: Optional context from previous generation for continuation Returns: Complete generation prompt string """ # Create a template - let AI generate title if not provided titleValue = title if title else "Generated Document" jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue) # Build prompt based on whether this is a continuation or first call # Check if we have valid continuation context with actual JSON fragment # CRITICAL: Allow continuation even if section_count is 0 (broken JSON that couldn't be parsed) # as long as we have last_raw_json - this handles cases where JSON is too broken to extract sections hasContinuation = ( continuationContext and continuationContext.get("last_raw_json", "") and continuationContext.get("last_raw_json", "").strip() != "{}" ) if hasContinuation: # CONTINUATION PROMPT - use new summary format from buildContinuationContext delivered_summary = continuationContext.get("delivered_summary", "") element_before_cutoff = continuationContext.get("element_before_cutoff") cut_off_element = continuationContext.get("cut_off_element") # Build continuation text with delivered summary and cut-off information # CRITICAL: Always include cut-off information if available (per loop_plan.md) continuationText = f"{delivered_summary}\n\n" continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n" # Add cut-off point information (per loop_plan.md: always add if available) # These are shown ONLY as REFERENCE to know where generation stopped if element_before_cutoff: continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n" continuationText += f"{element_before_cutoff}\n\n" if cut_off_element: continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n" continuationText += f"{cut_off_element}\n\n" continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n" continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n" continuationText += "Start directly with the next element/section that should follow.\n\n" # PROMPT FOR CONTINUATION generationPrompt = f"""{'='*80} USER REQUEST / USER PROMPT: {'='*80} {userPrompt} {'='*80} END OF USER REQUEST / USER PROMPT {'='*80} ⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content. {continuationText} JSON structure template: {jsonTemplate} Rules: - Return ONLY valid JSON (no comments, no trailing commas, double quotes only). - Reference elements shown above are ALREADY DELIVERED - DO NOT repeat them. - Generate ONLY the remaining content that comes AFTER the reference elements. - DO NOT regenerate the entire JSON structure - start directly with what comes next. - Output JSON only; no markdown fences or extra text. Continue generating the remaining content now. """ else: # PROMPT FOR FIRST CALL # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions if extracted_content: # If we have extracted content, put it FIRST and make it very clear it's the source data generationPrompt = f"""{'='*80} USER REQUEST / USER PROMPT: {'='*80} {userPrompt} {'='*80} END OF USER REQUEST / USER PROMPT {'='*80} {'='*80} ⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️ {'='*80} The content below contains the ACTUAL DATA extracted from the source documents. You MUST use this data - DO NOT generate fake or example data. {'='*80} EXTRACTED CONTENT FROM DOCUMENTS: {'='*80} {extracted_content} {'='*80} END OF EXTRACTED CONTENT {'='*80} Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source. The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data. You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template. JSON structure template (structure only - use data from EXTRACTED CONTENT above): {jsonTemplate} Instructions: - Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes. - Do NOT reuse example section IDs; create your own. - CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template. - Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response. - IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective. - Output JSON only; no markdown fences or extra text. Generate your complete response using the extracted content data. """ else: # No extracted content - generate from scratch generationPrompt = f"""{'='*80} USER REQUEST / USER PROMPT: {'='*80} {userPrompt} {'='*80} END OF USER REQUEST / USER PROMPT {'='*80} Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content. JSON structure template: {jsonTemplate} Instructions: - Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes. - Do NOT reuse example section IDs; create your own. - Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response. - IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective. - Output JSON only; no markdown fences or extra text. Generate your complete response. """ return generationPrompt.strip()