""" Centralized prompt builder for document generation across formats. Builds a robust prompt that: - Accepts any user intent (no fixed structure assumptions) - Injects format-specific guidelines from the selected renderer - Adds a common policy section to always use real data from source docs - Requires the AI to output a filename header that we can parse and use """ from typing import Protocol class _RendererLike(Protocol): def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines ... def buildExtractionPrompt( output_format: str, renderer: _RendererLike, user_prompt: str, title: str ) -> str: """ Build the final extraction prompt by combining: - The raw user prompt (verbatim) - Generic cross-format instructions (filename header + real-data policy) - Format-specific guidelines snippet provided by the renderer The AI must place a single filename header at the very top: FILENAME: followed by a blank line and then ONLY the document content according to the target format. """ format_guidelines = renderer.getExtractionPrompt(user_prompt, title) # Generic block appears once for every format generic_intro = f""" {user_prompt} You are generating a document in {output_format.upper()} format for the title: "{title}". Rules: - The user's intent fully defines the structure. Do not assume a fixed template or headings. - Use only factual information extracted from the supplied source documents. - Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD"). - The output must strictly follow the target format and be ready for saving without extra wrapping. - At the VERY TOP output exactly one line with the filename header: FILENAME: - The base name should be short, descriptive, and kebab-case or snake-case without spaces. - Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx). - Avoid special characters beyond [a-zA-Z0-9-_]. - After this header, insert a single blank line and then provide ONLY the document content. Common policy: - Use the actual data from the source documents to create the content. - Do not generate placeholder text or templates. - Extract and use the real data provided in the source documents to create meaningful content. """.strip() # Final assembly final_prompt = ( generic_intro + "\n\nFORMAT-SPECIFIC GUIDELINES:\n" + format_guidelines.strip() + "\n\nGenerate the complete document content now based on the source documents below:" ) return final_prompt