""" Prompt builder for document generation. This module builds prompts for generating documents from extracted content. """ import logging logger = logging.getLogger(__name__) # Centralized JSON structure template for document generation TEMPLATE_JSON_DOCUMENT_GENERATION = """{ "metadata": { "split_strategy": "single_document", "source_documents": [], "extraction_method": "ai_generation" }, "documents": [ { "id": "doc_1", "title": "{{DOCUMENT_TITLE}}", "filename": "document.json", "sections": [ { "id": "section_1", "content_type": "heading|paragraph|table|list|code", "elements": [ // heading: {"level": 1, "text": "..."} // paragraph: {"text": "..."} // table: {"headers": [...], "rows": [[...]], "caption": "..."} // list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"} // code: {"code": "...", "language": "..."} ], "order": 1 } ] } ], "continuation": null }""" async def buildGenerationPrompt( outputFormat: str, userPrompt: str, title: str, extracted_content: str = None ) -> str: """ Build the unified generation prompt using a single JSON template. Args: outputFormat: Target output format (html, pdf, docx, etc.) userPrompt: User's original prompt for document generation title: Title for the document extracted_content: Optional extracted content from documents to prepend to prompt Returns: Complete generation prompt string """ # Create a template - let AI generate title if not provided prompt_instruction = f"Use the following title: \"{title}\"" json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title) # Always use the proper generation prompt template with LOOP_INSTRUCTION generation_prompt = f"""Generate structured JSON content for document creation. USER CONTEXT: "{userPrompt}" TARGET FORMAT: {outputFormat} TITLE INSTRUCTION: {prompt_instruction} LOOP_INSTRUCTION RULES: - Follow the template structure below exactly; emit only one JSON object in the response - Fill sections with content based on the user request - Use appropriate content_type {json_template} """ # If we have extracted content, prepend it to the prompt if extracted_content: generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: {extracted_content} {generation_prompt}""" return generation_prompt.strip()