129 lines
5 KiB
Python
129 lines
5 KiB
Python
"""
|
|
Text renderer for report generation.
|
|
"""
|
|
|
|
from .base_renderer import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
|
|
class TextRenderer(BaseRenderer):
|
|
"""Renders content to plain text format with format-specific extraction."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
|
return [
|
|
'txt', 'text', 'plain',
|
|
# Programming languages
|
|
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
|
|
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
|
|
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
|
|
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
|
|
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
|
|
# Web technologies (excluding html/htm which have dedicated renderer)
|
|
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
|
|
# Data formats (excluding csv, md/markdown which have dedicated renderers)
|
|
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
|
|
# Configuration files
|
|
'env', 'properties', 'conf', 'config', 'rc',
|
|
'gitattributes', 'editorconfig', 'eslintrc',
|
|
# Documentation
|
|
'readme', 'changelog', 'license', 'authors',
|
|
'contributing', 'todo', 'notes', 'docs'
|
|
]
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return [
|
|
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
|
'script', 'program', 'file', 'document',
|
|
'raw', 'unformatted', 'plaintext'
|
|
]
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for text renderer."""
|
|
return 90
|
|
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
|
"""Get text-specific extraction prompt for all plain text formats."""
|
|
return f"""
|
|
{user_prompt}
|
|
|
|
Generate a comprehensive text report with the title: "{title}"
|
|
|
|
TEXT FORMAT REQUIREMENTS:
|
|
- Use clear section headings with underlines (=== or ---)
|
|
- Structure content with proper hierarchy
|
|
- Use simple text formatting (no HTML, no markdown)
|
|
- Include tables using pipe separators (|)
|
|
- Use bullet points with dashes (-) or asterisks (*)
|
|
- Include source document information
|
|
- Add metadata at the end
|
|
- Preserve code formatting when dealing with programming files
|
|
- Maintain proper indentation for structured content
|
|
|
|
FORMATTING RULES:
|
|
- Title: Use equal signs (===) above and below
|
|
- Main sections: Use dashes (---) under headings
|
|
- Subsections: Use simple headings without underlines
|
|
- Tables: Use | separator, header row, then dash row
|
|
- Lists: Use - or * for bullet points
|
|
- Code blocks: Use indentation or simple formatting
|
|
- Source info: Include at end of each section
|
|
- Metadata: Include at very end with generation info
|
|
|
|
SUPPORTED TEXT FORMATS:
|
|
- Programming: .js, .py, .java, .cpp, .cs, .php, .rb, .go, .rs, .swift, .kt, .scala, .r, .m, .sh, .ps1, .bat
|
|
- Web: .css, .scss, .sass, .less, .xml, .yaml, .yml, .toml, .ini
|
|
- Data: .tsv, .log, .rst, .sql
|
|
- Config: .env, .properties, .conf, .config, .rc, .gitignore, .dockerfile
|
|
- Docs: .readme, .changelog, .license, .authors, .contributing, .todo, .notes
|
|
|
|
OUTPUT POLICY:
|
|
- Return ONLY plain text
|
|
- No HTML, no markdown, no code blocks
|
|
- Clean, readable text format
|
|
- Professional appearance
|
|
- Preserve code structure when appropriate
|
|
- Include all necessary information
|
|
|
|
Generate the complete text report:
|
|
"""
|
|
|
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
|
"""Render extracted content to plain text format."""
|
|
try:
|
|
# The extracted content should already be formatted text from the AI
|
|
# Just clean it up
|
|
text_content = self._clean_text_content(extracted_content, title)
|
|
|
|
return text_content, "text/plain"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering text: {str(e)}")
|
|
# Return minimal text fallback
|
|
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
|
|
|
def _clean_text_content(self, content: str, title: str) -> str:
|
|
"""Clean and validate text content from AI."""
|
|
content = content.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if content.startswith("```") and content.endswith("```"):
|
|
lines = content.split('\n')
|
|
if len(lines) > 2:
|
|
content = '\n'.join(lines[1:-1]).strip()
|
|
|
|
# Remove any remaining markdown formatting
|
|
content = content.replace('**', '').replace('*', '')
|
|
content = content.replace('__', '').replace('_', '')
|
|
|
|
# Clean up any HTML-like tags that might have slipped through
|
|
import re
|
|
content = re.sub(r'<[^>]+>', '', content)
|
|
|
|
# Ensure proper line endings
|
|
content = content.replace('\r\n', '\n').replace('\r', '\n')
|
|
|
|
return content
|