94 lines
3.9 KiB
Python
94 lines
3.9 KiB
Python
"""
|
|
Text renderer for report generation.
|
|
"""
|
|
|
|
from .base_renderer import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
|
|
class TextRenderer(BaseRenderer):
|
|
"""Renders content to plain text format with format-specific extraction."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
|
return [
|
|
'txt', 'text', 'plain',
|
|
# Programming languages
|
|
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
|
|
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
|
|
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
|
|
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
|
|
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
|
|
# Web technologies (excluding html/htm which have dedicated renderer)
|
|
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
|
|
# Data formats (excluding csv, md/markdown which have dedicated renderers)
|
|
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
|
|
# Configuration files
|
|
'env', 'properties', 'conf', 'config', 'rc',
|
|
'gitattributes', 'editorconfig', 'eslintrc',
|
|
# Documentation
|
|
'readme', 'changelog', 'license', 'authors',
|
|
'contributing', 'todo', 'notes', 'docs'
|
|
]
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return [
|
|
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
|
'script', 'program', 'file', 'document',
|
|
'raw', 'unformatted', 'plaintext'
|
|
]
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for text renderer."""
|
|
return 90
|
|
|
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
|
"""Return only plain-text guidelines; global prompt is built centrally."""
|
|
return (
|
|
"TEXT FORMAT GUIDELINES:\n"
|
|
"- Output ONLY plain text (no markdown or HTML).\n"
|
|
"- Use clear headings (you may underline with === or --- when helpful).\n"
|
|
"- Use simple bullet lists with '-' and tables with '|' when needed.\n"
|
|
"- Preserve indentation for code-like content if present.\n"
|
|
"OUTPUT: Return ONLY the raw text content."
|
|
)
|
|
|
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
|
"""Render extracted content to plain text format."""
|
|
try:
|
|
# The extracted content should already be formatted text from the AI
|
|
# Just clean it up
|
|
text_content = self._clean_text_content(extracted_content, title)
|
|
|
|
return text_content, "text/plain"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering text: {str(e)}")
|
|
# Return minimal text fallback
|
|
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
|
|
|
def _clean_text_content(self, content: str, title: str) -> str:
|
|
"""Clean and validate text content from AI."""
|
|
content = content.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if content.startswith("```") and content.endswith("```"):
|
|
lines = content.split('\n')
|
|
if len(lines) > 2:
|
|
content = '\n'.join(lines[1:-1]).strip()
|
|
|
|
# Remove any remaining markdown formatting
|
|
content = content.replace('**', '').replace('*', '')
|
|
content = content.replace('__', '').replace('_', '')
|
|
|
|
# Clean up any HTML-like tags that might have slipped through
|
|
import re
|
|
content = re.sub(r'<[^>]+>', '', content)
|
|
|
|
# Ensure proper line endings
|
|
content = content.replace('\r\n', '\n').replace('\r', '\n')
|
|
|
|
return content
|