""" Prompt builder for document generation. This module builds prompts for generating documents from extracted content. """ import logging from typing import Dict, Any logger = logging.getLogger(__name__) # Centralized JSON structure template for document generation # Includes examples for all content types so AI knows the structure patterns TEMPLATE_JSON_DOCUMENT_GENERATION = """{ "metadata": { "split_strategy": "single_document", "source_documents": [], "extraction_method": "ai_generation" }, "documents": [ { "id": "doc_1", "title": "{{DOCUMENT_TITLE}}", "filename": "document.json", "sections": [ { "id": "section_heading_example", "content_type": "heading", "elements": [ {"level": 1, "text": "Heading Text"} ], "order": 0 }, { "id": "section_paragraph_example", "content_type": "paragraph", "elements": [ {"text": "Paragraph text content"} ], "order": 0 }, { "id": "section_list_example", "content_type": "list", "elements": [ { "items": [ {"text": "Item 1"}, {"text": "Item 2"} ], "list_type": "numbered" } ], "order": 0 }, { "id": "section_table_example", "content_type": "table", "elements": [ { "headers": ["Column 1", "Column 2"], "rows": [ ["Row 1 Col 1", "Row 1 Col 2"], ["Row 2 Col 1", "Row 2 Col 2"] ], "caption": "Table caption" } ], "order": 0 }, { "id": "section_code_example", "content_type": "code", "elements": [ { "code": "function example() { return true; }", "language": "javascript" } ], "order": 0 } ] } ] }""" async def buildGenerationPrompt( outputFormat: str, userPrompt: str, title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None ) -> str: """ Build the unified generation prompt using a single JSON template. Generic solution that works for any user request. Args: outputFormat: Target output format (html, pdf, docx, etc.) - not used in prompt userPrompt: User's original prompt for document generation title: Title for the document extracted_content: Optional extracted content from documents to prepend to prompt continuationContext: Optional context from previous generation for continuation Returns: Complete generation prompt string """ # Create a template - let AI generate title if not provided title_value = title if title else "Generated Document" json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value) # Build prompt based on whether this is a continuation or first call # Check if we have valid continuation context with actual JSON fragment has_continuation = ( continuationContext and continuationContext.get("section_count", 0) > 0 and continuationContext.get("last_raw_json", "") and continuationContext.get("last_raw_json", "").strip() != "{}" ) if has_continuation: # CONTINUATION PROMPT - user already received first part, continue from where it stopped last_raw_json = continuationContext.get("last_raw_json", "") last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"} last_items_from_fragment = continuationContext.get("last_items_from_fragment", "") total_items_count = continuationContext.get("total_items_count", 0) # Show the last few items to indicate where to continue (limit fragment size) # Extract just the ending portion of the JSON to show where it cut off fragment_snippet = "" if last_raw_json: # Show last 1500 chars or the whole thing if shorter - just enough to show the cut point fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json # Add ellipsis if truncated if len(last_raw_json) > 1500: fragment_snippet = "..." + fragment_snippet # Build clear continuation guidance continuation_guidance = [] if total_items_count > 0: continuation_guidance.append(f"You have already generated {total_items_count} items.") # Show the last complete item object (full object format) if last_item_object: continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.") continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped." generation_prompt = f"""User request: "{userPrompt}" The user already received part of the response. Continue generating the remaining content. {continuation_text} Previous response ended here (JSON was cut off at this point): ```json {fragment_snippet if fragment_snippet else "(No fragment available)"} ``` JSON structure template: {json_template} Instructions: - Return full JSON structure (metadata + documents + sections) - Continue from where it stopped - add NEW items only, do not repeat old items - Use the element structures shown in the template - Generate all remaining content needed to complete the user request - Fill with actual content (no comments, no "Add more..." text, no placeholders) - When fully complete, add "complete_response": true at root level - Return only valid JSON (no comments, no markdown blocks) Continue generating: """ else: # FIRST CALL - initial generation generation_prompt = f"""User request: "{userPrompt}" Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning. JSON structure template (reference only - shows the pattern): {json_template} Instructions: - Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning - Do NOT continue from the template examples above - create your own sections - Generate content based on the user request - Use the element structures shown in the template (heading, paragraph, list, table, code) - Create your own section IDs (do not use the example IDs like "section_heading_example") - When fully complete, add "complete_response": true at root level - Return only valid JSON (no comments, no markdown blocks, no text before/after) Generate your complete response starting from {{"metadata": ...}}: """ # If we have extracted content, prepend it to the prompt if extracted_content: generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: {extracted_content} {generation_prompt}""" return generation_prompt.strip()