gateway/modules/services/serviceGeneration/renderers/text_renderer.py

"""
Text renderer for report generation.
"""

from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List

class TextRenderer(BaseRenderer):
    """Renders content to plain text format with format-specific extraction."""

    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported text formats (excluding formats with dedicated renderers)."""
        return [
            'txt', 'text', 'plain',
            # Programming languages
            'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
            'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
            'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
            'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
            'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
            # Web technologies (excluding html/htm which have dedicated renderer)
            'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
            # Data formats (excluding csv, md/markdown which have dedicated renderers)
            'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
            # Configuration files
            'env', 'properties', 'conf', 'config', 'rc',
            'gitattributes', 'editorconfig', 'eslintrc',
            # Documentation
            'readme', 'changelog', 'license', 'authors',
            'contributing', 'todo', 'notes', 'docs'
        ]

    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return [
            'ascii', 'utf8', 'utf-8', 'code', 'source',
            'script', 'program', 'file', 'document',
            'raw', 'unformatted', 'plaintext'
        ]

    @classmethod
    def get_priority(cls) -> int:
        """Return priority for text renderer."""
        return 90

    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Get text-specific extraction prompt for all plain text formats."""
        return f"""
{user_prompt}

Generate a comprehensive text report with the title: "{title}"

TEXT FORMAT REQUIREMENTS:
- Use clear section headings with underlines (=== or ---)
- Structure content with proper hierarchy
- Use simple text formatting (no HTML, no markdown)
- Include tables using pipe separators (|)
- Use bullet points with dashes (-) or asterisks (*)
- Include source document information
- Add metadata at the end
- Preserve code formatting when dealing with programming files
- Maintain proper indentation for structured content

FORMATTING RULES:
- Title: Use equal signs (===) above and below
- Main sections: Use dashes (---) under headings
- Subsections: Use simple headings without underlines
- Tables: Use | separator, header row, then dash row
- Lists: Use - or * for bullet points
- Code blocks: Use indentation or simple formatting
- Source info: Include at end of each section
- Metadata: Include at very end with generation info

SUPPORTED TEXT FORMATS:
- Programming: .js, .py, .java, .cpp, .cs, .php, .rb, .go, .rs, .swift, .kt, .scala, .r, .m, .sh, .ps1, .bat
- Web: .css, .scss, .sass, .less, .xml, .yaml, .yml, .toml, .ini
- Data: .tsv, .log, .rst, .sql
- Config: .env, .properties, .conf, .config, .rc, .gitignore, .dockerfile
- Docs: .readme, .changelog, .license, .authors, .contributing, .todo, .notes

OUTPUT POLICY:
- Return ONLY plain text
- No HTML, no markdown, no code blocks
- Clean, readable text format
- Professional appearance
- Preserve code structure when appropriate
- Include all necessary information

Generate the complete text report:
"""

    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to plain text format."""
        try:
            # The extracted content should already be formatted text from the AI
            # Just clean it up
            text_content = self._clean_text_content(extracted_content, title)

            return text_content, "text/plain"

        except Exception as e:
            self.logger.error(f"Error rendering text: {str(e)}")
            # Return minimal text fallback
            return f"{title}\n\nError rendering report: {str(e)}", "text/plain"

    def _clean_text_content(self, content: str, title: str) -> str:
        """Clean and validate text content from AI."""
        content = content.strip()

        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()

        # Remove any remaining markdown formatting
        content = content.replace('**', '').replace('*', '')
        content = content.replace('__', '').replace('_', '')

        # Clean up any HTML-like tags that might have slipped through
        import re
        content = re.sub(r'<[^>]+>', '', content)

        # Ensure proper line endings
        content = content.replace('\r\n', '\n').replace('\r', '\n')

        return content