""" Prompt builder for document generation. This module builds prompts for generating documents from extracted content. """ import logging from typing import Dict, Any from modules.datamodels.datamodelJson import jsonTemplateDocument logger = logging.getLogger(__name__) async def buildGenerationPrompt( outputFormat: str, userPrompt: str, title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None ) -> str: """ Build the unified generation prompt using a single JSON template. Generic solution that works for any user request. Args: outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt userPrompt: User's original prompt for document generation title: Title for the document extracted_content: Optional extracted content from documents to prepend to prompt continuationContext: Optional context from previous generation for continuation Returns: Complete generation prompt string """ # Create a template - let AI generate title if not provided titleValue = title if title else "Generated Document" jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue) # Build prompt based on whether this is a continuation or first call # Check if we have valid continuation context with actual JSON fragment hasContinuation = ( continuationContext and continuationContext.get("section_count", 0) > 0 and continuationContext.get("last_raw_json", "") and continuationContext.get("last_raw_json", "").strip() != "{}" ) if hasContinuation: # CONTINUATION PROMPT - user already received first part, continue from where it stopped lastItemObject = continuationContext.get("last_item_object", "") # Last complete sub-element (row, item, line, etc.) totalItemsCount = continuationContext.get("total_items_count", 0) # CRITICAL: Only use lastItemObject - it contains the last complete sub-element # If extraction failed and lastItemObject is empty, we'll show a message that extraction failed # No need for fragmentSnippet - it's redundant and causes duplication # Build clear continuation guidance with PROGRESS STATISTICS from all accumulated sections # This helps AI understand completion status without seeing entire content # GENERIC approach: Works for all task types (books, reports, code, lists, tables, etc.) continuationGuidance = [] progressStats = continuationContext.get("progress_stats", {}) totalRows = progressStats.get("total_rows", 0) totalItems = progressStats.get("total_items", 0) totalCodeLines = progressStats.get("total_code_lines", 0) totalParagraphs = progressStats.get("total_paragraphs", 0) totalHeadings = progressStats.get("total_headings", 0) sectionCount = progressStats.get("section_count", 0) contentTypeCount = progressStats.get("content_type_count", 0) lastContentType = progressStats.get("last_content_type") # CRITICAL: Filter progress stats based on Definition of Done from taskIntent # Only show KPIs that are relevant for this specific action/task taskIntent = continuationContext.get("taskIntent", {}) definitionOfDone = taskIntent.get("definitionOfDone", {}) if isinstance(taskIntent, dict) else {} # Build comprehensive progress information (filtered by DoD if available) progressParts = [] # Only show progress metrics that are relevant based on DoD KPIs # If DoD specifies minTableRows, show rows; if minListItems, show items; etc. if definitionOfDone: # Filter based on DoD KPIs - only show metrics that matter for this task if definitionOfDone.get("minTableRows", 0) > 0 and totalRows > 0: progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}") if definitionOfDone.get("minListItems", 0) > 0 and totalItems > 0: progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}") if definitionOfDone.get("minCodeLines", 0) > 0 and totalCodeLines > 0: progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data") if definitionOfDone.get("minParagraphs", 0) > 0 and totalParagraphs > 0: progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}") if definitionOfDone.get("minHeadings", 0) > 0 and totalHeadings > 0: progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}") if definitionOfDone.get("minSections", 0) > 0 and sectionCount > 0: progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}") # Only show contentSize if no other metrics are available (it's less informative) # Prefer showing rows/items/lines over characters if not progressParts and definitionOfDone.get("minContentSize", 0) > 0: totalContentSize = progressStats.get("total_content_size", 0) if totalContentSize > 0: progressParts.append(f"{totalContentSize} characters") else: # No DoD available - show all progress metrics (fallback) if sectionCount > 0: progressParts.append(f"{sectionCount} section{'s' if sectionCount > 1 else ''}") if totalHeadings > 0: progressParts.append(f"{totalHeadings} heading{'s' if totalHeadings > 1 else ''}") if totalParagraphs > 0: progressParts.append(f"{totalParagraphs} paragraph{'s' if totalParagraphs > 1 else ''}") if totalRows > 0: progressParts.append(f"{totalRows} row{'s' if totalRows > 1 else ''}") if totalItems > 0: progressParts.append(f"{totalItems} item{'s' if totalItems > 1 else ''}") if totalCodeLines > 0: progressParts.append(f"{totalCodeLines} line{'s' if totalCodeLines > 1 else ''} of code/data") if contentTypeCount > 1: progressParts.append(f"{contentTypeCount} different content types") if progressParts: continuationGuidance.append(f"PROGRESS: You have already generated: {', '.join(progressParts)}.") elif totalItemsCount > 0: # Fallback to old totalItemsCount if progress_stats not available continuationGuidance.append(f"PROGRESS: You have already generated {totalItemsCount} items.") # Show the last complete item AND cut item for continuation point # CRITICAL: AI needs both to know where to continue cutItemObject = continuationContext.get("cut_item_object") contentTypeForItems = continuationContext.get("content_type_for_items") if lastItemObject: if cutItemObject: # Both complete and cut items available - show both continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}") continuationGuidance.append(f"Incomplete/cut {contentTypeForItems or 'item'} at the end: {cutItemObject}") continuationGuidance.append(f"Continue from the incomplete item above - complete it first, then add NEW items.") else: # Only complete item available continuationGuidance.append(f"Last complete {contentTypeForItems or 'item'} in previous response: {lastItemObject}") continuationGuidance.append(f"Continue with the NEXT item after this.") continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped." # PROMPT FOR CONTINUATION generationPrompt = f"""User request: "{userPrompt}" NOTE: The user already received part of the response. TASK: Continue generating the remaining content. {continuationText} JSON structure template: {jsonTemplate} Instructions: - Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes. - Arrays must contain ONLY JSON values; do not include comments or ellipses. - Use ONLY the element structures shown in the template. - Continue from where it stopped - add NEW items only; do not repeat existing items. - Generate remaining content to complete the user request. Do NOT just give an instruction or comments. Deliver the complete response. - Fill with actual content (no placeholders or instructional text such as "Add more..."). - IMPORTANT: Ensure "filename" in each document has meaningful name with appropriate extension matching the content. - Output JSON only; no markdown fences or extra text. IMPORTANT: Before responding, analyse the remaining data to fully satisfy user request. Continue generating: """ else: # PROMPT FOR FIRST CALL generationPrompt = f"""User request: "{userPrompt}" Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content. JSON structure template: {jsonTemplate} Instructions: - Start with {{"metadata": ...}} - return COMPLETE, STRICT JSON. - Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes. - Do NOT reuse example section IDs; create your own. - Generate complete content based on the user request. Do NOT just give an instruction or comments. Deliver the complete response. - IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective. - Output JSON only; no markdown fences or extra text. Generate your complete response starting from {{"metadata": ...}}: """ # If we have extracted content, prepend it to the prompt if extracted_content: generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: {extracted_content} {generationPrompt}""" return generationPrompt.strip()