96 lines
3.6 KiB
Python
96 lines
3.6 KiB
Python
"""
|
|
HTML renderer for report generation.
|
|
"""
|
|
|
|
from .base_renderer import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
|
|
class HtmlRenderer(BaseRenderer):
|
|
"""Renders content to HTML format with format-specific extraction."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported HTML formats."""
|
|
return ['html', 'htm']
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['web', 'webpage']
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for HTML renderer."""
|
|
return 100
|
|
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
|
"""Get HTML-specific extraction prompt."""
|
|
return f"""
|
|
{user_prompt}
|
|
|
|
Generate a comprehensive HTML report with the title: "{title}"
|
|
|
|
HTML STRUCTURE REQUIREMENTS:
|
|
- Create a complete, self-contained HTML document
|
|
- Start with: <!DOCTYPE html>
|
|
- Include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>
|
|
- Use proper HTML5 semantic elements: <header>, <main>, <section>, <article>, <footer>
|
|
- Include professional CSS styling in a <style> block
|
|
- Structure content with clear headings (h1, h2, h3)
|
|
- Use tables for structured data
|
|
- Use lists for bullet points
|
|
- Include source document information
|
|
- Add a footer with generation metadata
|
|
|
|
STYLING REQUIREMENTS:
|
|
- Professional, clean design
|
|
- Responsive layout
|
|
- Good typography and spacing
|
|
- Color scheme: blues and grays
|
|
- Tables with borders and alternating row colors
|
|
- Proper heading hierarchy
|
|
|
|
OUTPUT POLICY:
|
|
- Return ONLY the complete HTML document
|
|
- No markdown, no code blocks, no additional text
|
|
- Valid HTML5 that can be saved as .html file
|
|
- Include all necessary CSS inline
|
|
- Make it look professional and polished
|
|
|
|
CRITICAL: Use the actual data from the source documents to create the content. Do not generate placeholder text or templates. Extract and use the real data provided in the source documents to create meaningful content.
|
|
|
|
Generate the complete HTML report using the actual data from the source documents:
|
|
"""
|
|
|
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
|
"""Render extracted content to HTML format."""
|
|
try:
|
|
# The extracted content should already be HTML from the AI
|
|
# Just clean it up and ensure it's valid
|
|
html_content = self._clean_html_content(extracted_content, title)
|
|
|
|
return html_content, "text/html"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering HTML: {str(e)}")
|
|
# Return minimal HTML fallback
|
|
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
|
|
|
def _clean_html_content(self, content: str, title: str) -> str:
|
|
"""Clean and validate HTML content from AI."""
|
|
content = content.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if content.startswith("```") and content.endswith("```"):
|
|
lines = content.split('\n')
|
|
if len(lines) > 2:
|
|
content = '\n'.join(lines[1:-1]).strip()
|
|
|
|
# Ensure it starts with DOCTYPE
|
|
if not content.startswith('<!DOCTYPE'):
|
|
if content.startswith('<html'):
|
|
content = '<!DOCTYPE html>\n' + content
|
|
else:
|
|
content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
|
|
|
|
return content
|