""" Prompt builder for document generation. This module builds prompts for generating documents from extracted content. """ import logging from typing import Dict, Any logger = logging.getLogger(__name__) # Centralized JSON structure template for document generation TEMPLATE_JSON_DOCUMENT_GENERATION = """{ "metadata": { "split_strategy": "single_document", "source_documents": [], "extraction_method": "ai_generation" }, "documents": [ { "id": "doc_1", "title": "{{DOCUMENT_TITLE}}", "filename": "document.json", "sections": [ { "id": "section_1", "content_type": "heading|paragraph|table|list|code", "elements": [ // heading: {"level": 1, "text": "..."} // paragraph: {"text": "..."} // table: {"headers": [...], "rows": [[...]], "caption": "..."} // list: {"items": [{"text": "...", "subitems": [...]}], "list_type": "bullet|numbered"} // code: {"code": "...", "language": "..."} ], "order": 1 } ] } ] }""" async def buildGenerationPrompt( outputFormat: str, userPrompt: str, title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None ) -> str: """ Build the unified generation prompt using a single JSON template. Simplified version without continuation logic in prompt. Args: outputFormat: Target output format (html, pdf, docx, etc.) userPrompt: User's original prompt for document generation title: Title for the document extracted_content: Optional extracted content from documents to prepend to prompt continuationContext: Optional context from previous generation for continuation Returns: Complete generation prompt string """ # Create a template - let AI generate title if not provided title_value = title if title else "Generated Document" json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value) # Check if this is a continuation request if continuationContext and continuationContext.get("section_count", 0) > 0: # Continuation prompt - simple and focused section_count = continuationContext.get("section_count", 0) next_order = continuationContext.get("next_order", 1) last_content_sample = continuationContext.get("last_content_sample", "") generation_prompt = f"""Continue generating structured JSON content. ORIGINAL REQUEST: "{userPrompt}" TARGET FORMAT: {outputFormat} TITLE: "{title_value}" CONTEXT - Already generated: - Total sections generated: {section_count} - Next section order: {next_order} - Last content: {last_content_sample} YOUR TASK: Continue where previous generation stopped. Generate the NEXT section(s) starting with section_{next_order}. Generate as much content as possible. RULES: - Follow the JSON template structure below exactly - Fill sections with ACTUAL data based on the user request - Use appropriate content_type for the data - Generate REAL content, not summaries or placeholders - Generate multiple sections if possible Return raw JSON (no ```json blocks, no text before/after) JSON Template {json_template} """ else: # First call - simple prompt without continuation complexity generation_prompt = f"""Generate structured JSON content for document creation. USER REQUEST: "{userPrompt}" TARGET FORMAT: {outputFormat} TITLE: "{title_value}" INSTRUCTIONS: - Follow the JSON template structure below exactly - Emit only one JSON object in the response - Fill sections with ACTUAL data based on the user request - Use appropriate content_type for each section - Generate REAL content, not summaries or instructions - Structure content in sections with order 1, 2, 3... - Each section should be complete before next - Generate as much content as possible Return raw JSON (no ```json blocks, no text before/after) JSON Template {json_template} """ # If we have extracted content, prepend it to the prompt if extracted_content: generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: {extracted_content} {generation_prompt}""" return generation_prompt.strip()