gateway/modules/services/serviceGeneration/renderers/html_renderer.py

"""
HTML renderer for report generation.
"""

from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List

class HtmlRenderer(BaseRenderer):
    """Renders content to HTML format with format-specific extraction."""

    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported HTML formats."""
        return ['html', 'htm']

    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['web', 'webpage']

    @classmethod
    def get_priority(cls) -> int:
        """Return priority for HTML renderer."""
        return 100

    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Get HTML-specific extraction prompt."""
        return f"""
{user_prompt}

Generate a comprehensive HTML report with the title: "{title}"

HTML STRUCTURE REQUIREMENTS:
- Create a complete, self-contained HTML document
- Start with: <!DOCTYPE html>
- Include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>
- Use proper HTML5 semantic elements: <header>, <main>, <section>, <article>, <footer>
- Include professional CSS styling in a <style> block
- Structure content with clear headings (h1, h2, h3)
- Use tables for structured data
- Use lists for bullet points
- Include source document information
- Add a footer with generation metadata

STYLING REQUIREMENTS:
- Professional, clean design
- Responsive layout
- Good typography and spacing
- Color scheme: blues and grays
- Tables with borders and alternating row colors
- Proper heading hierarchy

OUTPUT POLICY:
- Return ONLY the complete HTML document
- No markdown, no code blocks, no additional text
- Valid HTML5 that can be saved as .html file
- Include all necessary CSS inline
- Make it look professional and polished

CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.

Generate the complete HTML report using the actual data from the source documents:
"""

    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to HTML format."""
        try:
            # The extracted content should already be HTML from the AI
            # Just clean it up and ensure it's valid
            html_content = self._clean_html_content(extracted_content, title)

            return html_content, "text/html"

        except Exception as e:
            self.logger.error(f"Error rendering HTML: {str(e)}")
            # Return minimal HTML fallback
            return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"

    def _clean_html_content(self, content: str, title: str) -> str:
        """Clean and validate HTML content from AI."""
        content = content.strip()

        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()

        # Ensure it starts with DOCTYPE
        if not content.startswith('<!DOCTYPE'):
            if content.startswith('<html'):
                content = '<!DOCTYPE html>\n' + content
            else:
                content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'

        return content