diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 6c6f76e2..5f24e158 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -746,8 +746,13 @@ Return only the JSON structure with actual content from the image. Do not includ # Process any document container as text content request_options = options if options is not None else AiCallOptions() request_options.operationType = OperationType.GENERAL - print(f"๐Ÿ” Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") + print(f"๐Ÿ” EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") + + # Log extraction prompt and context + print(f"๐Ÿ” EXTRACTION PROMPT: {prompt}") + print(f"๐Ÿ” EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters") + request = AiCallRequest( prompt=prompt, context=part.data, @@ -756,6 +761,23 @@ Return only the JSON structure with actual content from the image. Do not includ response = await self.aiObjects.call(request) ai_result = response.content + # Log extraction response + print(f"๐Ÿ” EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters") + + # Save full extraction prompt and response to debug file + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n") + f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n") + f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass + # If generating JSON, validate the response if generate_json: try: @@ -798,8 +820,13 @@ Return only the JSON structure with actual content from the image. Do not includ request_options = options if options is not None else AiCallOptions() # FIXED: Set operation type to general for text processing request_options.operationType = OperationType.GENERAL - print(f"๐Ÿ” Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") + print(f"๐Ÿ” EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") + + # Log extraction prompt and context + print(f"๐Ÿ” EXTRACTION PROMPT: {prompt}") + print(f"๐Ÿ” EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters") + request = AiCallRequest( prompt=prompt, context=part.data, @@ -808,6 +835,23 @@ Return only the JSON structure with actual content from the image. Do not includ response = await self.aiObjects.call(request) ai_result = response.content + # Log extraction response + print(f"๐Ÿ” EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters") + + # Save full extraction prompt and response to debug file + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n") + f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n") + f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass + # If generating JSON, validate the response if generate_json: try: diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 13c20fad..2d3aa21f 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -318,18 +318,17 @@ class GenerationService: if "sections" not in extractedContent: raise ValueError("extractedContent must contain 'sections' field") - # DEBUG: dump renderer input to diagnose JSON structure TODO REMOVE + # DEBUG: Log renderer input metadata only (no verbose JSON) TODO REMOVE try: import os - import json ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") debug_root = "./test-chat/ai" debug_dir = os.path.join(debug_root, f"render_input_{ts}") os.makedirs(debug_dir, exist_ok=True) with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f: f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n") - with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as f: - json.dump(extractedContent, f, indent=2, ensure_ascii=False) + f.write(f"content_size: {len(str(extractedContent))} characters\n") + f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n") except Exception: pass diff --git a/modules/services/serviceGeneration/renderers/base_renderer.py b/modules/services/serviceGeneration/renderers/base_renderer.py deleted file mode 100644 index dd91be09..00000000 --- a/modules/services/serviceGeneration/renderers/base_renderer.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -Base renderer class for all format renderers. -""" - -from abc import ABC, abstractmethod -from typing import Dict, Any, Tuple, List -import logging - -logger = logging.getLogger(__name__) - -class BaseRenderer(ABC): - """Base class for all format renderers.""" - - def __init__(self): - self.logger = logger - - @classmethod - def get_supported_formats(cls) -> List[str]: - """ - Return list of supported format names for this renderer. - Override this method in subclasses to specify supported formats. - """ - return [] - - @classmethod - def get_format_aliases(cls) -> List[str]: - """ - Return list of format aliases for this renderer. - Override this method in subclasses to specify format aliases. - """ - return [] - - @classmethod - def get_priority(cls) -> int: - """ - Return priority for this renderer (higher number = higher priority). - Used when multiple renderers support the same format. - """ - return 0 - - @abstractmethod - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """ - Get the format-specific extraction prompt for AI content extraction. - - Args: - user_prompt: User's original prompt for report generation - title: Report title - - Returns: - str: Format-specific prompt for AI extraction - """ - pass - - @abstractmethod - async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: - """ - Render extracted content to the target format. - - Args: - extracted_content: Raw content extracted by AI using format-specific prompt - title: Report title - - Returns: - tuple: (rendered_content, mime_type) - """ - pass - - def _extract_sections(self, report_data: Dict[str, Any]) -> list: - """Extract sections from report data.""" - return report_data.get('sections', []) - - def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]: - """Extract metadata from report data.""" - return report_data.get('metadata', {}) - - def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str: - """Get title from report data or use fallback.""" - return report_data.get('title', fallback_title) - - def _format_timestamp(self, timestamp: str = None) -> str: - """Format timestamp for display.""" - if timestamp: - return timestamp - from datetime import datetime, UTC - return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/renderers/html_renderer.py b/modules/services/serviceGeneration/renderers/html_renderer.py deleted file mode 100644 index c2b7e586..00000000 --- a/modules/services/serviceGeneration/renderers/html_renderer.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -HTML renderer for report generation. -""" - -from .base_renderer import BaseRenderer -from typing import Dict, Any, Tuple, List - -class HtmlRenderer(BaseRenderer): - """Renders content to HTML format with format-specific extraction.""" - - @classmethod - def get_supported_formats(cls) -> List[str]: - """Return supported HTML formats.""" - return ['html', 'htm'] - - @classmethod - def get_format_aliases(cls) -> List[str]: - """Return format aliases.""" - return ['web', 'webpage'] - - @classmethod - def get_priority(cls) -> int: - """Return priority for HTML renderer.""" - return 100 - - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only HTML-specific guidelines; global prompt is built centrally.""" - return ( - "HTML FORMAT GUIDELINES:\n" - "- Output a complete HTML5 document starting with .\n" - "- Include , with and , and <body>.\n" - "- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n" - "- Provide professional CSS in a <style> block; responsive, clean typography.\n" - "- Use h1/h2/h3 for headings; tables and lists for structure.\n" - "OUTPUT: Return ONLY valid HTML (no markdown, no code fences)." - ) - - async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: - """Render extracted content to HTML format.""" - try: - # The extracted content should already be HTML from the AI - # Just clean it up and ensure it's valid - html_content = self._clean_html_content(extracted_content, title) - - return html_content, "text/html" - - except Exception as e: - self.logger.error(f"Error rendering HTML: {str(e)}") - # Return minimal HTML fallback - return f"<html><head><title>{title}

{title}

Error rendering report: {str(e)}

", "text/html" - - def _clean_html_content(self, content: str, title: str) -> str: - """Clean and validate HTML content from AI.""" - content = content.strip() - - # Remove markdown code blocks if present - if content.startswith("```") and content.endswith("```"): - lines = content.split('\n') - if len(lines) > 2: - content = '\n'.join(lines[1:-1]).strip() - - # Ensure it starts with DOCTYPE - if not content.startswith('\n' + content - else: - content = f'\n\n{title}\n\n{content}\n\n' - - return content diff --git a/modules/services/serviceGeneration/renderers/json_renderer.py b/modules/services/serviceGeneration/renderers/json_renderer.py deleted file mode 100644 index 845d33c2..00000000 --- a/modules/services/serviceGeneration/renderers/json_renderer.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -JSON renderer for report generation. -""" - -from .base_renderer import BaseRenderer -from typing import Dict, Any, Tuple, List -import json - -class JsonRenderer(BaseRenderer): - """Renders content to JSON format with format-specific extraction.""" - - @classmethod - def get_supported_formats(cls) -> List[str]: - """Return supported JSON formats.""" - return ['json'] - - @classmethod - def get_format_aliases(cls) -> List[str]: - """Return format aliases.""" - return ['data'] - - @classmethod - def get_priority(cls) -> int: - """Return priority for JSON renderer.""" - return 80 - - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only JSON-specific guidelines; global prompt is built centrally.""" - return ( - "JSON FORMAT GUIDELINES:\n" - "- Output ONLY a single valid JSON object (no fences, no pre/post text).\n" - "- Choose a structure that best fits the user's intent; include a top-level title and data.\n" - "- Prefer arrays/objects that map cleanly to the extracted facts.\n" - "- Include minimal metadata only if useful (e.g., generatedAt, sources).\n" - "OUTPUT: Return ONLY valid, parseable JSON." - ) - - async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: - """Render extracted content to JSON format.""" - try: - # The extracted content should already be JSON from the AI - # Just validate and format it - json_content = self._clean_json_content(extracted_content, title) - - return json_content, "application/json" - - except Exception as e: - self.logger.error(f"Error rendering JSON: {str(e)}") - # Return minimal JSON fallback - fallback_data = { - "title": title, - "sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}], - "metadata": {"error": str(e)} - } - return json.dumps(fallback_data, indent=2), "application/json" - - def _clean_json_content(self, content: str, title: str) -> str: - """Clean and validate JSON content from AI.""" - content = content.strip() - - # Remove markdown code blocks if present - if content.startswith("```") and content.endswith("```"): - lines = content.split('\n') - if len(lines) > 2: - content = '\n'.join(lines[1:-1]).strip() - - # Validate JSON - try: - parsed = json.loads(content) - # Re-format with proper indentation - return json.dumps(parsed, indent=2, ensure_ascii=False) - except json.JSONDecodeError: - # If not valid JSON, return as-is - return content diff --git a/modules/services/serviceGeneration/renderers/markdown_renderer.py b/modules/services/serviceGeneration/renderers/markdown_renderer.py deleted file mode 100644 index 8b9b4293..00000000 --- a/modules/services/serviceGeneration/renderers/markdown_renderer.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -Markdown renderer for report generation. -""" - -from .base_renderer import BaseRenderer -from typing import Dict, Any, Tuple, List - -class MarkdownRenderer(BaseRenderer): - """Renders content to Markdown format with format-specific extraction.""" - - @classmethod - def get_supported_formats(cls) -> List[str]: - """Return supported Markdown formats.""" - return ['md', 'markdown'] - - @classmethod - def get_format_aliases(cls) -> List[str]: - """Return format aliases.""" - return ['mdown', 'mkd'] - - @classmethod - def get_priority(cls) -> int: - """Return priority for markdown renderer.""" - return 95 - - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only Markdown-specific guidelines; global prompt is built centrally.""" - return ( - "MARKDOWN FORMAT GUIDELINES:\n" - "- Use proper Markdown syntax only (no HTML wrappers).\n" - "- # for main title, ## for sections, ### for subsections.\n" - "- Tables with | separators and a header row.\n" - "- Bullet lists with - or *.\n" - "- Emphasis with **bold** and *italic*.\n" - "- Code blocks with ```language.\n" - "- Horizontal rules (---) to separate major sections when helpful.\n" - "- Include links [text](url) and images ![alt](url) when referenced by sources.\n" - "OUTPUT: Return ONLY raw Markdown content without code fences." - ) - - async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: - """Render extracted content to Markdown format.""" - try: - # The extracted content should already be Markdown from the AI - # Just clean it up - markdown_content = self._clean_markdown_content(extracted_content, title) - - return markdown_content, "text/markdown" - - except Exception as e: - self.logger.error(f"Error rendering markdown: {str(e)}") - # Return minimal markdown fallback - return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown" - - def _clean_markdown_content(self, content: str, title: str) -> str: - """Clean and validate Markdown content from AI.""" - content = content.strip() - - # Remove markdown code blocks if present - if content.startswith("```") and content.endswith("```"): - lines = content.split('\n') - if len(lines) > 2: - content = '\n'.join(lines[1:-1]).strip() - - return content diff --git a/modules/services/serviceGeneration/renderers/pdf_renderer.py b/modules/services/serviceGeneration/renderers/pdf_renderer.py deleted file mode 100644 index 6a8409a1..00000000 --- a/modules/services/serviceGeneration/renderers/pdf_renderer.py +++ /dev/null @@ -1,225 +0,0 @@ -""" -PDF renderer for report generation using reportlab. -""" - -from .base_renderer import BaseRenderer -from typing import Dict, Any, Tuple, List -import io -import base64 -from datetime import datetime, UTC - -try: - from reportlab.lib.pagesizes import letter, A4 - from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak - from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle - from reportlab.lib.units import inch - from reportlab.lib import colors - from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY - REPORTLAB_AVAILABLE = True -except ImportError: - REPORTLAB_AVAILABLE = False - -class PdfRenderer(BaseRenderer): - """Renders content to PDF format using reportlab.""" - - @classmethod - def get_supported_formats(cls) -> List[str]: - """Return supported PDF formats.""" - return ['pdf'] - - @classmethod - def get_format_aliases(cls) -> List[str]: - """Return format aliases.""" - return ['document', 'print'] - - @classmethod - def get_priority(cls) -> int: - """Return priority for PDF renderer.""" - return 120 - - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only PDF-specific guidelines; global prompt is built centrally.""" - return ( - "PDF FORMAT GUIDELINES:\n" - "- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n" - "- Use bullet lists and tables where useful; separate major sections clearly.\n" - "- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n" - "OUTPUT: Return ONLY the PDF-ready textual content (no fences)." - ) - - async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: - """Render extracted content to PDF format.""" - try: - if not REPORTLAB_AVAILABLE: - # Fallback to HTML if reportlab not available - from .html_renderer import HtmlRenderer - html_renderer = HtmlRenderer() - html_content, _ = await html_renderer.render(extracted_content, title) - return html_content, "text/html" - - # Generate PDF using reportlab - pdf_content = self._generate_pdf(extracted_content, title) - - return pdf_content, "application/pdf" - - except Exception as e: - self.logger.error(f"Error rendering PDF: {str(e)}") - # Return minimal fallback - return f"PDF Generation Error: {str(e)}", "text/plain" - - def _generate_pdf(self, content: str, title: str) -> str: - """Generate PDF content using reportlab.""" - try: - # Create a buffer to hold the PDF - buffer = io.BytesIO() - - # Create PDF document - doc = SimpleDocTemplate( - buffer, - pagesize=A4, - rightMargin=72, - leftMargin=72, - topMargin=72, - bottomMargin=18 - ) - - # Get styles - styles = getSampleStyleSheet() - - # Create custom styles - title_style = ParagraphStyle( - 'CustomTitle', - parent=styles['Heading1'], - fontSize=24, - spaceAfter=30, - alignment=TA_CENTER, - textColor=colors.darkblue - ) - - heading_style = ParagraphStyle( - 'CustomHeading', - parent=styles['Heading2'], - fontSize=16, - spaceAfter=12, - spaceBefore=12, - textColor=colors.darkblue - ) - - # Build PDF content - story = [] - - # Title page - story.append(Paragraph(title, title_style)) - story.append(Spacer(1, 20)) - story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal'])) - story.append(PageBreak()) - - # Process content - lines = content.split('\n') - current_section = [] - - for line in lines: - line = line.strip() - if not line: - continue - - # Check for headings - if line.startswith('# '): - # H1 heading - if current_section: - story.extend(self._process_section(current_section, styles)) - current_section = [] - story.append(Paragraph(line[2:], title_style)) - story.append(Spacer(1, 12)) - elif line.startswith('## '): - # H2 heading - if current_section: - story.extend(self._process_section(current_section, styles)) - current_section = [] - story.append(Paragraph(line[3:], heading_style)) - story.append(Spacer(1, 8)) - elif line.startswith('### '): - # H3 heading - if current_section: - story.extend(self._process_section(current_section, styles)) - current_section = [] - story.append(Paragraph(line[4:], styles['Heading3'])) - story.append(Spacer(1, 6)) - else: - current_section.append(line) - - # Process remaining content - if current_section: - story.extend(self._process_section(current_section, styles)) - - # Build PDF - doc.build(story) - - # Get PDF content as base64 - buffer.seek(0) - pdf_bytes = buffer.getvalue() - pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') - - return pdf_base64 - - except Exception as e: - self.logger.error(f"Error generating PDF: {str(e)}") - raise - - def _process_section(self, lines: list, styles) -> list: - """Process a section of content into PDF elements.""" - elements = [] - - for line in lines: - if not line.strip(): - continue - - # Check for tables (lines with |) - if '|' in line and not line.startswith('|'): - # This might be part of a table, process as table - table_data = self._extract_table_data(lines) - if table_data: - table = Table(table_data) - table.setStyle(TableStyle([ - ('BACKGROUND', (0, 0), (-1, 0), colors.grey), - ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), - ('ALIGN', (0, 0), (-1, -1), 'CENTER'), - ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), - ('FONTSIZE', (0, 0), (-1, 0), 14), - ('BOTTOMPADDING', (0, 0), (-1, 0), 12), - ('BACKGROUND', (0, 1), (-1, -1), colors.beige), - ('GRID', (0, 0), (-1, -1), 1, colors.black) - ])) - elements.append(table) - elements.append(Spacer(1, 12)) - return elements - - # Check for lists - if line.startswith('- ') or line.startswith('* '): - # This is a list item - elements.append(Paragraph(f"โ€ข {line[2:]}", styles['Normal'])) - else: - # Regular paragraph - elements.append(Paragraph(line, styles['Normal'])) - - elements.append(Spacer(1, 6)) - return elements - - def _extract_table_data(self, lines: list) -> list: - """Extract table data from lines.""" - table_data = [] - in_table = False - - for line in lines: - if '|' in line: - if not in_table: - in_table = True - # Split by | and clean up - cells = [cell.strip() for cell in line.split('|') if cell.strip()] - if cells: - table_data.append(cells) - elif in_table and not line.strip(): - # Empty line, might be end of table - break - - return table_data if len(table_data) > 1 else [] \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/registry.py b/modules/services/serviceGeneration/renderers/registry.py index 5c498081..6843c114 100644 --- a/modules/services/serviceGeneration/renderers/registry.py +++ b/modules/services/serviceGeneration/renderers/registry.py @@ -6,7 +6,7 @@ import logging import importlib import pkgutil from typing import Dict, Type, List, Optional -from .base_renderer import BaseRenderer +from .rendererBaseTemplate import BaseRenderer logger = logging.getLogger(__name__) @@ -37,7 +37,7 @@ class RendererRegistry: # Scan all Python files in the renderers directory for file_path in renderers_dir.glob("*.py"): - if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']: + if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']: continue # Extract module name from filename diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py new file mode 100644 index 00000000..d4b147a7 --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -0,0 +1,285 @@ +""" +Base renderer class for all format renderers. +""" + +from abc import ABC, abstractmethod +from typing import Dict, Any, Tuple, List +import logging +import json + +logger = logging.getLogger(__name__) + +class BaseRenderer(ABC): + """Base class for all format renderers.""" + + def __init__(self): + self.logger = logger + + @classmethod + def get_supported_formats(cls) -> List[str]: + """ + Return list of supported format names for this renderer. + Override this method in subclasses to specify supported formats. + """ + return [] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """ + Return list of format aliases for this renderer. + Override this method in subclasses to specify format aliases. + """ + return [] + + @classmethod + def get_priority(cls) -> int: + """ + Return priority for this renderer (higher number = higher priority). + Used when multiple renderers support the same format. + """ + return 0 + + @abstractmethod + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """ + Render extracted JSON content to the target format. + + Args: + extracted_content: Structured JSON content with sections and metadata + title: Report title + user_prompt: Original user prompt for context + ai_service: AI service instance for additional processing + + Returns: + tuple: (rendered_content, mime_type) + """ + pass + + def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract sections from report data.""" + return report_data.get('sections', []) + + def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]: + """Extract metadata from report data.""" + return report_data.get('metadata', {}) + + def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str: + """Get title from report data or use fallback.""" + metadata = report_data.get('metadata', {}) + return metadata.get('title', fallback_title) + + def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool: + """Validate that JSON content has the expected structure.""" + if not isinstance(json_content, dict): + return False + + if "sections" not in json_content: + return False + + sections = json_content.get("sections", []) + if not isinstance(sections, list): + return False + + # Validate each section has type and data + for section in sections: + if not isinstance(section, dict): + return False + if "type" not in section or "data" not in section: + return False + + return True + + def _get_section_type(self, section: Dict[str, Any]) -> str: + """Get the type of a section.""" + return section.get("type", "paragraph") + + def _get_section_data(self, section: Dict[str, Any]) -> Dict[str, Any]: + """Get the data of a section.""" + return section.get("data", {}) + + def _get_section_id(self, section: Dict[str, Any]) -> str: + """Get the ID of a section (if available).""" + return section.get("id", "unknown") + + def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: + """Extract table headers and rows from section data.""" + headers = section_data.get("headers", []) + rows = section_data.get("rows", []) + return headers, rows + + def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]: + """Extract bullet list items from section data.""" + items = section_data.get("items", []) + result = [] + for item in items: + if isinstance(item, str): + result.append(item) + elif isinstance(item, dict) and "text" in item: + result.append(item["text"]) + return result + + def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]: + """Extract heading level and text from section data.""" + level = section_data.get("level", 1) + text = section_data.get("text", "") + return level, text + + def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str: + """Extract paragraph text from section data.""" + return section_data.get("text", "") + + def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: + """Extract code and language from section data.""" + code = section_data.get("code", "") + language = section_data.get("language", "") + return code, language + + def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: + """Extract base64 data and alt text from section data.""" + base64_data = section_data.get("base64Data", "") + alt_text = section_data.get("altText", "Image") + return base64_data, alt_text + + def _get_supported_section_types(self) -> List[str]: + """Return list of supported section types.""" + return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"] + + def _is_valid_section_type(self, section_type: str) -> bool: + """Check if a section type is valid.""" + return section_type in self._get_supported_section_types() + + def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]: + """Process a section and return structured data based on its type.""" + section_type = self._get_section_type(section) + section_data = self._get_section_data(section) + + if section_type == "table": + headers, rows = self._extract_table_data(section_data) + return {"type": "table", "headers": headers, "rows": rows} + elif section_type == "bullet_list": + items = self._extract_bullet_list_items(section_data) + return {"type": "bullet_list", "items": items} + elif section_type == "heading": + level, text = self._extract_heading_data(section_data) + return {"type": "heading", "level": level, "text": text} + elif section_type == "paragraph": + text = self._extract_paragraph_text(section_data) + return {"type": "paragraph", "text": text} + elif section_type == "code_block": + code, language = self._extract_code_block_data(section_data) + return {"type": "code_block", "code": code, "language": language} + elif section_type == "image": + base64_data, alt_text = self._extract_image_data(section_data) + return {"type": "image", "base64Data": base64_data, "altText": alt_text} + else: + # Fallback to paragraph + text = self._extract_paragraph_text(section_data) + return {"type": "paragraph", "text": text} + + def _format_timestamp(self, timestamp: str = None) -> str: + """Format timestamp for display.""" + if timestamp: + return timestamp + from datetime import datetime, UTC + return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") + + # ===== GENERIC AI STYLING HELPERS ===== + + async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: + """ + Generic AI styling method that can be used by all renderers. + + Args: + ai_service: AI service instance + style_template: Format-specific style template + default_styles: Default styles to fall back to + + Returns: + Dict with styling definitions + """ + if not ai_service: + return default_styles + + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + + request_options = AiCallOptions() + request_options.operationType = OperationType.GENERAL + + request = AiCallRequest(prompt=style_template, context="", options=request_options) + response = await ai_service.aiObjects.call(request) + + import json + import re + + # Debug output + print(f"๐Ÿ” AI STYLING RESPONSE TYPE: {type(response)}") + print(f"๐Ÿ” AI STYLING RESPONSE LENGTH: {len(response.content) if response and hasattr(response, 'content') and response.content else 0}") + + # Clean and parse JSON + result = response.content.strip() if response and response.content else "" + + # Check if result is empty + if not result: + self.logger.warning("AI styling returned empty response, using defaults") + return default_styles + + # Extract JSON from markdown if present + json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) + if json_match: + result = json_match.group(1).strip() + print(f"๐Ÿ” EXTRACTED JSON FROM MARKDOWN: {result[:100]}...") + elif result.startswith('```json'): + result = re.sub(r'^```json\s*', '', result) + result = re.sub(r'\s*```$', '', result) + print(f"๐Ÿ” CLEANED JSON FROM MARKDOWN: {result[:100]}...") + elif result.startswith('```'): + result = re.sub(r'^```\s*', '', result) + result = re.sub(r'\s*```$', '', result) + print(f"๐Ÿ” CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...") + + # Try to parse JSON + try: + styles = json.loads(result) + print(f"๐Ÿ” AI STYLING PARSED KEYS: {list(styles.keys()) if isinstance(styles, dict) else 'Not a dict'}") + except json.JSONDecodeError as json_error: + print(f"๐Ÿ” AI STYLING JSON ERROR: {json_error}") + print(f"๐Ÿ” AI STYLING RAW RESULT: {result[:200]}...") + self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults") + return default_styles + + # Convert colors to appropriate format + styles = self._convert_colors_format(styles) + + return styles + + except Exception as e: + self.logger.warning(f"AI styling failed: {str(e)}, using defaults") + return default_styles + + def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + """ + Convert colors to appropriate format based on renderer type. + Override this method in subclasses for format-specific color handling. + """ + return styles + + def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str: + """ + Create a standardized AI style template for any format. + + Args: + format_name: Name of the format (e.g., "docx", "xlsx", "pptx") + user_prompt: User's original prompt + style_schema: Format-specific style schema + + Returns: + Formatted prompt string + """ + schema_json = json.dumps(style_schema, indent=4) + + return f"""Return this exact JSON structure with your styling customizations: + +{schema_json} + +NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE.""" \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/csv_renderer.py b/modules/services/serviceGeneration/renderers/rendererCsv.py similarity index 91% rename from modules/services/serviceGeneration/renderers/csv_renderer.py rename to modules/services/serviceGeneration/renderers/rendererCsv.py index 0e35eb30..782e7d4a 100644 --- a/modules/services/serviceGeneration/renderers/csv_renderer.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -2,12 +2,12 @@ CSV renderer for report generation. """ -from .base_renderer import BaseRenderer +from .rendererBaseTemplate import BaseRenderer from typing import Dict, Any, Tuple, List import csv import io -class CsvRenderer(BaseRenderer): +class RendererCsv(BaseRenderer): """Renders content to CSV format with format-specific extraction.""" @classmethod @@ -25,20 +25,6 @@ class CsvRenderer(BaseRenderer): """Return priority for CSV renderer.""" return 70 - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only CSV-specific guidelines; global prompt is built centrally.""" - return ( - "CSV FORMAT GUIDELINES:\n" - "- Extract structured data from source documents into JSON format\n" - "- Focus on tabular data, lists, and structured information\n" - "- For tables: Extract headers and rows as separate arrays\n" - "- For lists: Extract items with optional sub-items\n" - "- Structure content into sections with clear content types\n" - "- Use proper JSON structure with metadata, sections, and elements\n" - "- Ensure data is clean and ready for CSV conversion\n" - "OUTPUT: Return structured JSON that can be converted to CSV format." - ) - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: """Render extracted JSON content to CSV format.""" try: diff --git a/modules/services/serviceGeneration/renderers/docx_renderer.py b/modules/services/serviceGeneration/renderers/rendererDocx.py similarity index 80% rename from modules/services/serviceGeneration/renderers/docx_renderer.py rename to modules/services/serviceGeneration/renderers/rendererDocx.py index d3781797..b972fc07 100644 --- a/modules/services/serviceGeneration/renderers/docx_renderer.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -2,7 +2,7 @@ DOCX renderer for report generation using python-docx. """ -from .base_renderer import BaseRenderer +from .rendererBaseTemplate import BaseRenderer from typing import Dict, Any, Tuple, List import io import base64 @@ -22,7 +22,7 @@ try: except ImportError: DOCX_AVAILABLE = False -class DocxRenderer(BaseRenderer): +class RendererDocx(BaseRenderer): """Renders content to DOCX format using python-docx.""" @classmethod @@ -40,30 +40,14 @@ class DocxRenderer(BaseRenderer): """Return priority for DOCX renderer.""" return 115 - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only DOCX-specific guidelines; global prompt is built centrally.""" - return ( - "DOCX FORMAT GUIDELINES:\n" - "- Extract the ACTUAL table data, lists, and content from the source documents\n" - "- For tables: Extract all rows and columns in pipe-separated format (Column1 | Column2 | Column3)\n" - "- For lists: Extract the actual list items, not summaries\n" - "- Structure your response with clear headings using numbered format: 1) Heading, 2) Heading, etc.\n" - "- Use bullet points (-) for lists and sub-items\n" - "- Use **bold** for emphasis on key terms\n" - "- Provide clean, structured content that can be directly converted to Word formatting\n" - "- Do NOT include debug information, separators (---), metadata, or FILENAME headers\n" - "- Start directly with your content - no introductory text or separators\n" - "- Extract raw data, not analysis or summaries\n" - "OUTPUT: Return ONLY the structured plain text to be converted into DOCX." - ) - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: """Render extracted JSON content to DOCX format using AI-analyzed styling.""" + print(f"๐Ÿ” DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...") try: if not DOCX_AVAILABLE: # Fallback to HTML if python-docx not available - from .html_renderer import HtmlRenderer - html_renderer = HtmlRenderer() + from .rendererHtml import RendererHtml + html_renderer = RendererHtml() html_content, _ = await html_renderer.render(extracted_content, title) return html_content, "text/html" @@ -84,7 +68,10 @@ class DocxRenderer(BaseRenderer): doc = Document() # Get AI-generated styling definitions + print(f"๐Ÿ” ABOUT TO CALL AI STYLING: user_prompt={user_prompt[:50] if user_prompt else 'None'}...") + self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...") styles = await self._get_docx_styles(user_prompt, ai_service) + print(f"๐Ÿ” AI STYLING RESULT: {type(styles)}") # Apply basic document setup self._setup_basic_document_styles(doc) @@ -125,61 +112,24 @@ class DocxRenderer(BaseRenderer): raise Exception(f"DOCX generation failed: {str(e)}") async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: - """Simple AI call to get DOCX styling definitions.""" - if not ai_service: - return self._get_default_styles() + """Get DOCX styling definitions using base template AI styling.""" + style_schema = { + "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, + "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"}, + "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"}, + "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"}, + "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"}, + "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"}, + "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"}, + "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20}, + "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} + } - try: - prompt = f""" -For this DOCX document request: "{user_prompt}" - -Provide styling definitions for DOCX elements. IMPORTANT: Ensure proper contrast - never use white text on white background or dark text on dark background. Respond with ONLY JSON: - -{{ - "title": {{"font_size": 24, "color": "#1F4E79", "bold": true, "align": "center"}}, - "heading1": {{"font_size": 18, "color": "#2F2F2F", "bold": true, "align": "left"}}, - "heading2": {{"font_size": 14, "color": "#4F4F4F", "bold": true, "align": "left"}}, - "paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}}, - "table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}}, - "table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}}, - "table_border": {{"style": "horizontal_only", "color": "#000000", "thickness": "thin"}}, - "bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 20}}, - "code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}} -}} - -CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability. -""" - - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType - - request_options = AiCallOptions() - request_options.operationType = OperationType.GENERAL - - request = AiCallRequest(prompt=prompt, context="", options=request_options) - response = await ai_service.aiObjects.call(request) - - import json - import re - - # Clean and parse JSON - result = response.content.strip() - if result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - styles = json.loads(result) - - # Validate and fix contrast issues - styles = self._validate_styles_contrast(styles) - - return styles - - except Exception as e: - self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return self._get_default_styles() + style_template = self._create_ai_style_template("docx", user_prompt, style_schema) + styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles()) + + # Validate and fix contrast issues + return self._validate_styles_contrast(styles) def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" @@ -1005,145 +955,4 @@ CRITICAL: Table headers must have dark background with light text, table cells m # Bold text if part: run = para.add_run(part) - run.bold = True - - def _add_bullet_point(self, doc, text: str): - """Add a bullet point to the document.""" - if not text.strip(): - return - - # Create paragraph with bullet style - para = doc.add_paragraph(text, style='List Bullet') - - # Check for Markdown formatting in bullet point - if '**' in text or '*' in text: - # Clear the paragraph and rebuild with formatting - para.clear() - self._add_paragraph_to_doc(doc, text) - - def _style_table(self, table): - """Apply styling to the table.""" - try: - # Style header row - if len(table.rows) > 0: - header_cells = table.rows[0].cells - for cell in header_cells: - for paragraph in cell.paragraphs: - for run in paragraph.runs: - run.bold = True - except Exception as e: - self.logger.warning(f"Could not style table: {str(e)}") - - def _format_timestamp(self) -> str: - """Format current timestamp for document generation.""" - from datetime import datetime, UTC - return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") - """Process a table row and add it to the document.""" - if not line.strip(): - return - - # Clean the line - remove bullet point markers and bold markers - clean_line = line.strip() - if clean_line.startswith('โ€ข'): - clean_line = clean_line[1:] # Remove "โ€ข" - elif clean_line.startswith('- **'): - clean_line = clean_line[4:] # Remove "- **" - elif clean_line.startswith('- '): - clean_line = clean_line[2:] # Remove "- " - elif clean_line.startswith('**'): - clean_line = clean_line[2:] # Remove "**" - - # Remove trailing ** if present - if clean_line.endswith('**'): - clean_line = clean_line[:-2] - - # Split by pipe separator - parts = [part.strip() for part in clean_line.split('|')] - - if len(parts) >= 2: - # This is a table row - create a table if it doesn't exist - if not hasattr(self, '_current_table') or self._current_table is None: - # Create new table - self._current_table = doc.add_table(rows=1, cols=len(parts)) - self._current_table.style = 'Table Grid' - - # Check if this looks like a header row (contains common header words) - is_header = any(word.lower() in clean_line.lower() for word in ['name', 'quantity', 'part', 'number', 'description', 'tag', 'item', 'status']) - - # Add header row - for i, part in enumerate(parts): - if i < len(self._current_table.rows[0].cells): - cell = self._current_table.rows[0].cells[i] - cell.text = part - # Make header bold if it looks like a header - if is_header: - for paragraph in cell.paragraphs: - for run in paragraph.runs: - run.bold = True - else: - # Add data row to existing table - row = self._current_table.add_row() - for i, part in enumerate(parts): - if i < len(row.cells): - row.cells[i].text = part - else: - # Not a table row, treat as regular text - doc.add_paragraph(line) - - def _add_bullet_point(self, doc, text: str): - """Add a bullet point to the document.""" - if not text.strip(): - return - - # Create paragraph with bullet style - para = doc.add_paragraph(text, style='List Bullet') - - # Check for bold text in bullet point - if '**' in text: - # Clear the paragraph and rebuild with formatting - para.clear() - parts = text.split('**') - for i, part in enumerate(parts): - if i % 2 == 0: - # Regular text - if part: - para.add_run(part) - else: - # Bold text - if part: - run = para.add_run(part) - run.bold = True - - def _process_table_row(self, doc, line: str): - """Process a table row and add it to the document.""" - if not line.strip(): - return - - # Split by pipe separator - parts = [part.strip() for part in line.split('|')] - - if len(parts) >= 2: - # This is a table row - create a table if it doesn't exist - if not hasattr(self, '_current_table') or self._current_table is None: - # Create new table - self._current_table = doc.add_table(rows=1, cols=len(parts)) - self._current_table.style = 'Table Grid' - - # Add header row - for i, part in enumerate(parts): - if i < len(self._current_table.rows[0].cells): - cell = self._current_table.rows[0].cells[i] - cell.text = part - # Make header bold - for paragraph in cell.paragraphs: - for run in paragraph.runs: - run.bold = True - else: - # Add data row to existing table - row = self._current_table.add_row() - for i, part in enumerate(parts): - if i < len(row.cells): - row.cells[i].text = part - else: - # Not a table row, treat as regular text - doc.add_paragraph(line) \ No newline at end of file + run.bold = True \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/excel_renderer.py b/modules/services/serviceGeneration/renderers/rendererExcel.py similarity index 72% rename from modules/services/serviceGeneration/renderers/excel_renderer.py rename to modules/services/serviceGeneration/renderers/rendererExcel.py index a744e981..142892eb 100644 --- a/modules/services/serviceGeneration/renderers/excel_renderer.py +++ b/modules/services/serviceGeneration/renderers/rendererExcel.py @@ -2,7 +2,7 @@ Excel renderer for report generation using openpyxl. """ -from .base_renderer import BaseRenderer +from .rendererBaseTemplate import BaseRenderer from typing import Dict, Any, Tuple, List import io import base64 @@ -17,7 +17,7 @@ try: except ImportError: OPENPYXL_AVAILABLE = False -class ExcelRenderer(BaseRenderer): +class RendererExcel(BaseRenderer): """Renders content to Excel format using openpyxl.""" @classmethod @@ -35,27 +35,13 @@ class ExcelRenderer(BaseRenderer): """Return priority for Excel renderer.""" return 110 - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only Excel-specific guidelines; global prompt is built centrally.""" - return ( - "EXCEL FORMAT GUIDELINES:\n" - "- Extract structured data from source documents into JSON format\n" - "- Focus on tabular data, lists, and structured information suitable for spreadsheets\n" - "- For tables: Extract headers and rows as separate arrays with clear column names\n" - "- For lists: Extract items with optional sub-items and metadata\n" - "- Structure content into sections with clear content types (table, list, paragraph)\n" - "- Use proper JSON structure with metadata, sections, and elements\n" - "- Ensure data is clean and ready for Excel conversion with proper formatting\n" - "OUTPUT: Return structured JSON that can be converted to Excel format." - ) - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available - from .csv_renderer import CsvRenderer - csv_renderer = CsvRenderer() + from .rendererCsv import RendererCsv + csv_renderer = RendererCsv() csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service) return csv_content, "text/csv" @@ -215,6 +201,10 @@ class ExcelRenderer(BaseRenderer): async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: + # Debug output + print(f"๐Ÿ” EXCEL JSON CONTENT TYPE: {type(json_content)}") + print(f"๐Ÿ” EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}") + # Get AI-generated styling definitions styles = await self._get_excel_styles(user_prompt, ai_service) @@ -231,11 +221,9 @@ class ExcelRenderer(BaseRenderer): # Create workbook wb = Workbook() - # Remove default sheet - wb.remove(wb.active) - # Create sheets based on content sheets = self._create_excel_sheets(wb, json_content, styles) + print(f"๐Ÿ” EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}") # Populate sheets with content self._populate_excel_sheets(sheets, json_content, styles) @@ -247,7 +235,13 @@ class ExcelRenderer(BaseRenderer): # Convert to base64 excel_bytes = buffer.getvalue() - excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') + print(f"๐Ÿ” EXCEL BYTES LENGTH: {len(excel_bytes)}") + try: + excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') + print(f"๐Ÿ” EXCEL BASE64 LENGTH: {len(excel_base64)}") + except Exception as b64_error: + print(f"๐Ÿ” BASE64 ENCODING ERROR: {b64_error}") + raise return excel_base64 @@ -256,59 +250,38 @@ class ExcelRenderer(BaseRenderer): raise Exception(f"Excel generation failed: {str(e)}") async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: - """Simple AI call to get Excel styling definitions.""" - if not ai_service: - return self._get_default_excel_styles() + """Get Excel styling definitions using base template AI styling.""" + style_schema = { + "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, + "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, + "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, + "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"}, + "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2}, + "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"}, + "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} + } + style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema) + styles = await self._get_ai_styles(ai_service, style_template, self._get_default_excel_styles()) + + # Convert colors to aRGB format and validate + styles = self._convert_colors_format(styles) + return self._validate_excel_styles_contrast(styles) + + def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + """Convert hex colors to aRGB format for Excel compatibility.""" try: - prompt = f""" -For this Excel document request: "{user_prompt}" - -Provide styling definitions for Excel elements. Respond with ONLY JSON: - -{{ - "title": {{"font_size": 16, "color": "#1F4E79", "bold": true, "align": "center"}}, - "heading": {{"font_size": 14, "color": "#2F2F2F", "bold": true, "align": "left"}}, - "table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}}, - "table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}}, - "bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 2}}, - "paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}}, - "code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}} -}} - -CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability. -""" - - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType - - request_options = AiCallOptions() - request_options.operationType = OperationType.GENERAL - - request = AiCallRequest(prompt=prompt, context="", options=request_options) - response = await ai_service.aiObjects.call(request) - - import json - import re - - # Clean and parse JSON - result = response.content.strip() - if result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - styles = json.loads(result) - - # Validate and fix contrast issues - styles = self._validate_excel_styles_contrast(styles) - + for style_name, style_config in styles.items(): + if isinstance(style_config, dict): + for prop, value in style_config.items(): + if isinstance(value, str) and value.startswith('#') and len(value) == 7: + # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) + styles[style_name][prop] = f"FF{value[1:]}" + print(f"๐Ÿ” CONVERTED COLOR: {value} โ†’ {styles[style_name][prop]}") return styles - except Exception as e: - self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return self._get_default_excel_styles() + print(f"๐Ÿ” COLOR CONVERSION ERROR: {e}") + return styles def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" @@ -348,15 +321,15 @@ CRITICAL: Table headers must have dark background with light text, table cells m return self._get_default_excel_styles() def _get_default_excel_styles(self) -> Dict[str, Any]: - """Default Excel styles.""" + """Default Excel styles with aRGB color format.""" return { - "title": {"font_size": 16, "color": "#1F4E79", "bold": True, "align": "center"}, - "heading": {"font_size": 14, "color": "#2F2F2F", "bold": True, "align": "left"}, - "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"}, - "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"}, - "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 2}, - "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"}, - "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} + "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, + "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, + "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, + "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"}, + "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2}, + "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"}, + "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} } def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: @@ -365,13 +338,16 @@ CRITICAL: Table headers must have dark background with light text, table cells m # Get sheet names from AI styles or generate based on content sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content)) + print(f"๐Ÿ” EXCEL SHEET NAMES: {sheet_names}") # Create sheets for i, sheet_name in enumerate(sheet_names): if i == 0: + # Use the default sheet for the first sheet sheet = wb.active sheet.title = sheet_name else: + # Create additional sheets sheet = wb.create_sheet(sheet_name, i) sheets[sheet_name.lower()] = sheet @@ -437,7 +413,9 @@ CRITICAL: Table headers must have dark background with light text, table cells m document_title = json_content.get("metadata", {}).get("title", "Generated Report") sheet['A1'] = document_title - title_style = styles["title"] + # Safety check for title style + title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"}) + print(f"๐Ÿ” EXCEL TITLE STYLE: {title_style}") sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"]) sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) @@ -560,6 +538,107 @@ CRITICAL: Table headers must have dark background with light text, table cells m self.logger.warning(f"Could not add section to sheet: {str(e)}") return start_row + 1 + def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + """Add a table element to Excel sheet.""" + try: + table_data = element.get("data", {}) + headers = table_data.get("headers", []) + rows = table_data.get("rows", []) + + if not headers and not rows: + return start_row + + # Add headers + header_style = styles.get("table_header", {}) + for col, header in enumerate(headers, 1): + cell = sheet.cell(row=start_row, column=col, value=header) + if header_style.get("bold"): + cell.font = Font(bold=True, color=header_style.get("text_color", "#FF000000")) + if header_style.get("background"): + cell.fill = PatternFill(start_color=header_style["background"], end_color=header_style["background"], fill_type="solid") + + start_row += 1 + + # Add rows + cell_style = styles.get("table_cell", {}) + for row_data in rows: + for col, cell_value in enumerate(row_data, 1): + cell = sheet.cell(row=start_row, column=col, value=cell_value) + if cell_style.get("text_color"): + cell.font = Font(color=cell_style["text_color"]) + start_row += 1 + + return start_row + + except Exception as e: + self.logger.warning(f"Could not add table to Excel: {str(e)}") + return start_row + 1 + + def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + """Add a list element to Excel sheet.""" + try: + list_items = element.get("items", []) + + list_style = styles.get("bullet_list", {}) + for item in list_items: + sheet.cell(row=start_row, column=1, value=f"โ€ข {item}") + if list_style.get("color"): + sheet.cell(row=start_row, column=1).font = Font(color=list_style["color"]) + start_row += 1 + + return start_row + + except Exception as e: + self.logger.warning(f"Could not add list to Excel: {str(e)}") + return start_row + 1 + + def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + """Add a paragraph element to Excel sheet.""" + try: + text = element.get("text", "") + if text: + sheet.cell(row=start_row, column=1, value=text) + + paragraph_style = styles.get("paragraph", {}) + if paragraph_style.get("color"): + sheet.cell(row=start_row, column=1).font = Font(color=paragraph_style["color"]) + + start_row += 1 + + return start_row + + except Exception as e: + self.logger.warning(f"Could not add paragraph to Excel: {str(e)}") + return start_row + 1 + + def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + """Add a heading element to Excel sheet.""" + try: + text = element.get("text", "") + level = element.get("level", 1) + + if text: + sheet.cell(row=start_row, column=1, value=text) + + heading_style = styles.get("heading", {}) + font_size = heading_style.get("font_size", 14) + if level > 1: + font_size = max(10, font_size - (level - 1) * 2) + + sheet.cell(row=start_row, column=1).font = Font( + size=font_size, + bold=True, + color=heading_style.get("color", "#FF000000") + ) + + start_row += 1 + + return start_row + + except Exception as e: + self.logger.warning(f"Could not add heading to Excel: {str(e)}") + return start_row + 1 + def _format_timestamp(self) -> str: """Format current timestamp for document generation.""" return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py new file mode 100644 index 00000000..6950712e --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -0,0 +1,463 @@ +""" +HTML renderer for report generation. +""" + +from .rendererBaseTemplate import BaseRenderer +from typing import Dict, Any, Tuple, List + +class RendererHtml(BaseRenderer): + """Renders content to HTML format with format-specific extraction.""" + + @classmethod + def get_supported_formats(cls) -> List[str]: + """Return supported HTML formats.""" + return ['html', 'htm'] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """Return format aliases.""" + return ['web', 'webpage'] + + @classmethod + def get_priority(cls) -> int: + """Return priority for HTML renderer.""" + return 100 + + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """Render extracted JSON content to HTML format using AI-analyzed styling.""" + try: + # Generate HTML using AI-analyzed styling + html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service) + + return html_content, "text/html" + + except Exception as e: + self.logger.error(f"Error rendering HTML: {str(e)}") + # Return minimal HTML fallback + return f"{title}

{title}

Error rendering report: {str(e)}

", "text/html" + + async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + """Generate HTML content from structured JSON document using AI-generated styling.""" + try: + # Get AI-generated styling definitions + styles = await self._get_html_styles(user_prompt, ai_service) + + # Validate JSON structure + if not isinstance(json_content, dict): + raise ValueError("JSON content must be a dictionary") + + if "sections" not in json_content: + raise ValueError("JSON content must contain 'sections' field") + + # Use title from JSON metadata if available, otherwise use provided title + document_title = json_content.get("metadata", {}).get("title", title) + + # Build HTML document + html_parts = [] + + # HTML document structure + html_parts.append('') + html_parts.append('') + html_parts.append('') + html_parts.append('') + html_parts.append('') + html_parts.append(f'{document_title}') + html_parts.append('') + html_parts.append('') + html_parts.append('') + + # Document header + html_parts.append(f'

{document_title}

') + + # Main content + html_parts.append('
') + + # Process each section + sections = json_content.get("sections", []) + for section in sections: + section_html = self._render_json_section(section, styles) + if section_html: + html_parts.append(section_html) + + html_parts.append('
') + + # Footer + html_parts.append('') + + html_parts.append('') + html_parts.append('') + + return '\n'.join(html_parts) + + except Exception as e: + self.logger.error(f"Error generating HTML from JSON: {str(e)}") + raise Exception(f"HTML generation failed: {str(e)}") + + async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + """Simple AI call to get HTML styling definitions.""" + if not ai_service: + return self._get_default_html_styles() + + try: + prompt = f"""Return this exact JSON structure with your styling customizations: + +{{ + "title": {{"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}}, + "heading1": {{"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}}, + "heading2": {{"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}}, + "paragraph": {{"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"}}, + "table": {{"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"}}, + "table_header": {{"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"}}, + "table_cell": {{"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"}}, + "bullet_list": {{"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"}}, + "code_block": {{"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"}}, + "image": {{"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"}}, + "body": {{"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}} +}} + +NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE.""" + + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + + request_options = AiCallOptions() + request_options.operationType = OperationType.GENERAL + + request = AiCallRequest(prompt=prompt, context="", options=request_options) + response = await ai_service.aiObjects.call(request) + + import json + import re + + # Clean and parse JSON + result = response.content.strip() if response and response.content else "" + + # Check if result is empty + if not result: + self.logger.warning("AI styling returned empty response, using defaults") + return self._get_default_html_styles() + + # Extract JSON from markdown code blocks + json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) + if json_match: + result = json_match.group(1).strip() + elif result.startswith('```json'): + result = re.sub(r'^```json\s*', '', result) + result = re.sub(r'\s*```$', '', result) + elif result.startswith('```'): + result = re.sub(r'^```\s*', '', result) + result = re.sub(r'\s*```$', '', result) + + # Try to parse JSON + try: + styles = json.loads(result) + except json.JSONDecodeError as json_error: + self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults") + return self._get_default_html_styles() + + # Validate and fix contrast issues + styles = self._validate_html_styles_contrast(styles) + + return styles + + except Exception as e: + self.logger.warning(f"AI styling failed: {str(e)}, using defaults") + return self._get_default_html_styles() + + def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + """Validate and fix contrast issues in AI-generated styles.""" + try: + # Fix table header contrast + if "table_header" in styles: + header = styles["table_header"] + bg_color = header.get("background", "#FFFFFF") + text_color = header.get("color", "#000000") + + # If both are white or both are dark, fix it + if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + header["background"] = "#4F4F4F" + header["color"] = "#FFFFFF" + elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + header["background"] = "#4F4F4F" + header["color"] = "#FFFFFF" + + # Fix table cell contrast + if "table_cell" in styles: + cell = styles["table_cell"] + bg_color = cell.get("background", "#FFFFFF") + text_color = cell.get("color", "#000000") + + # If both are white or both are dark, fix it + if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + cell["background"] = "#FFFFFF" + cell["color"] = "#2F2F2F" + elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + cell["background"] = "#FFFFFF" + cell["color"] = "#2F2F2F" + + return styles + + except Exception as e: + self.logger.warning(f"Style validation failed: {str(e)}") + return self._get_default_html_styles() + + def _get_default_html_styles(self) -> Dict[str, Any]: + """Default HTML styles.""" + return { + "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}, + "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}, + "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}, + "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"}, + "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"}, + "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"}, + "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"}, + "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"}, + "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"}, + "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"}, + "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"} + } + + def _generate_css_styles(self, styles: Dict[str, Any]) -> str: + """Generate CSS from style definitions.""" + css_parts = [] + + # Body styles + body_style = styles.get("body", {}) + css_parts.append("body {") + for property_name, value in body_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Document title + title_style = styles.get("title", {}) + css_parts.append(".document-title {") + for property_name, value in title_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Headings + for heading_level in ["heading1", "heading2"]: + heading_style = styles.get(heading_level, {}) + css_class = f"h{heading_level[-1]}" + css_parts.append(f"{css_class} {{") + for property_name, value in heading_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Paragraphs + paragraph_style = styles.get("paragraph", {}) + css_parts.append("p {") + for property_name, value in paragraph_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Tables + table_style = styles.get("table", {}) + css_parts.append("table {") + for property_name, value in table_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Table headers + table_header_style = styles.get("table_header", {}) + css_parts.append("th {") + for property_name, value in table_header_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Table cells + table_cell_style = styles.get("table_cell", {}) + css_parts.append("td {") + for property_name, value in table_cell_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Lists + bullet_list_style = styles.get("bullet_list", {}) + css_parts.append("ul {") + for property_name, value in bullet_list_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Code blocks + code_block_style = styles.get("code_block", {}) + css_parts.append("pre {") + for property_name, value in code_block_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Images + image_style = styles.get("image", {}) + css_parts.append("img {") + for property_name, value in image_style.items(): + css_property = property_name.replace("_", "-") + css_parts.append(f" {css_property}: {value};") + css_parts.append("}") + + # Generated info + css_parts.append(".generated-info {") + css_parts.append(" font-size: 0.9em;") + css_parts.append(" color: #666;") + css_parts.append(" text-align: center;") + css_parts.append(" margin-top: 2em;") + css_parts.append(" padding-top: 1em;") + css_parts.append(" border-top: 1px solid #ddd;") + css_parts.append("}") + + return '\n'.join(css_parts) + + def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a single JSON section to HTML using AI-generated styles.""" + try: + section_type = self._get_section_type(section) + section_data = self._get_section_data(section) + + if section_type == "table": + return self._render_json_table(section_data, styles) + elif section_type == "bullet_list": + return self._render_json_bullet_list(section_data, styles) + elif section_type == "heading": + return self._render_json_heading(section_data, styles) + elif section_type == "paragraph": + return self._render_json_paragraph(section_data, styles) + elif section_type == "code_block": + return self._render_json_code_block(section_data, styles) + elif section_type == "image": + return self._render_json_image(section_data, styles) + else: + # Fallback to paragraph for unknown types + return self._render_json_paragraph(section_data, styles) + + except Exception as e: + self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + return f'
[Error rendering section: {str(e)}]
' + + def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a JSON table to HTML using AI-generated styles.""" + try: + headers = table_data.get("headers", []) + rows = table_data.get("rows", []) + + if not headers or not rows: + return "" + + html_parts = [''] + + # Table header + html_parts.append('') + for header in headers: + html_parts.append(f'') + html_parts.append('') + + # Table body + html_parts.append('') + for row in rows: + html_parts.append('') + for cell_data in row: + html_parts.append(f'') + html_parts.append('') + html_parts.append('') + + html_parts.append('
{header}
{cell_data}
') + return '\n'.join(html_parts) + + except Exception as e: + self.logger.warning(f"Error rendering table: {str(e)}") + return "" + + def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a JSON bullet list to HTML using AI-generated styles.""" + try: + items = list_data.get("items", []) + + if not items: + return "" + + html_parts = ['') + + return '\n'.join(html_parts) + + except Exception as e: + self.logger.warning(f"Error rendering bullet list: {str(e)}") + return "" + + def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a JSON heading to HTML using AI-generated styles.""" + try: + level = heading_data.get("level", 1) + text = heading_data.get("text", "") + + if text: + level = max(1, min(6, level)) + return f'{text}' + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering heading: {str(e)}") + return "" + + def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a JSON paragraph to HTML using AI-generated styles.""" + try: + text = paragraph_data.get("text", "") + + if text: + return f'

{text}

' + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering paragraph: {str(e)}") + return "" + + def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a JSON code block to HTML using AI-generated styles.""" + try: + code = code_data.get("code", "") + language = code_data.get("language", "") + + if code: + if language: + return f'
{code}
' + else: + return f'
{code}
' + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering code block: {str(e)}") + return "" + + def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + """Render a JSON image to HTML.""" + try: + base64_data = image_data.get("base64Data", "") + alt_text = image_data.get("altText", "Image") + + if base64_data: + return f'{alt_text}' + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering image: {str(e)}") + return f'
[Image: {image_data.get("altText", "Image")}]
' diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py new file mode 100644 index 00000000..17555b6f --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererJson.py @@ -0,0 +1,79 @@ +""" +JSON renderer for report generation. +""" + +from .rendererBaseTemplate import BaseRenderer +from typing import Dict, Any, Tuple, List +import json + +class RendererJson(BaseRenderer): + """Renders content to JSON format with format-specific extraction.""" + + @classmethod + def get_supported_formats(cls) -> List[str]: + """Return supported JSON formats.""" + return ['json'] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """Return format aliases.""" + return ['data'] + + @classmethod + def get_priority(cls) -> int: + """Return priority for JSON renderer.""" + return 80 + + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """Render extracted JSON content to JSON format.""" + try: + # The extracted content should already be JSON from the AI + # Just validate and format it + json_content = self._clean_json_content(extracted_content, title) + + return json_content, "application/json" + + except Exception as e: + self.logger.error(f"Error rendering JSON: {str(e)}") + # Return minimal JSON fallback + fallback_data = { + "title": title, + "sections": [{"type": "paragraph", "data": {"text": f"Error rendering report: {str(e)}"}}], + "metadata": {"error": str(e)} + } + return json.dumps(fallback_data, indent=2), "application/json" + + def _clean_json_content(self, content: Dict[str, Any], title: str) -> str: + """Clean and validate JSON content from AI.""" + try: + # Validate JSON structure + if not isinstance(content, dict): + raise ValueError("Content must be a dictionary") + + # Ensure it has the expected structure + if "sections" not in content: + # Convert old format to new format + content = { + "sections": [{"type": "paragraph", "data": {"text": str(content)}}], + "metadata": {"title": title} + } + + # Ensure metadata exists + if "metadata" not in content: + content["metadata"] = {} + + # Set title in metadata if not present + if "title" not in content["metadata"]: + content["metadata"]["title"] = title + + # Re-format with proper indentation + return json.dumps(content, indent=2, ensure_ascii=False) + + except Exception as e: + self.logger.warning(f"Error cleaning JSON content: {str(e)}") + # Return minimal valid JSON + fallback_data = { + "sections": [{"type": "paragraph", "data": {"text": str(content)}}], + "metadata": {"title": title, "error": str(e)} + } + return json.dumps(fallback_data, indent=2, ensure_ascii=False) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py new file mode 100644 index 00000000..61f0bebc --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -0,0 +1,213 @@ +""" +Markdown renderer for report generation. +""" + +from .rendererBaseTemplate import BaseRenderer +from typing import Dict, Any, Tuple, List + +class RendererMarkdown(BaseRenderer): + """Renders content to Markdown format with format-specific extraction.""" + + @classmethod + def get_supported_formats(cls) -> List[str]: + """Return supported Markdown formats.""" + return ['md', 'markdown'] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """Return format aliases.""" + return ['mdown', 'mkd'] + + @classmethod + def get_priority(cls) -> int: + """Return priority for markdown renderer.""" + return 95 + + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """Render extracted JSON content to Markdown format.""" + try: + # Generate markdown from JSON structure + markdown_content = self._generate_markdown_from_json(extracted_content, title) + + return markdown_content, "text/markdown" + + except Exception as e: + self.logger.error(f"Error rendering markdown: {str(e)}") + # Return minimal markdown fallback + return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown" + + def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str: + """Generate markdown content from structured JSON document.""" + try: + # Validate JSON structure + if not isinstance(json_content, dict): + raise ValueError("JSON content must be a dictionary") + + if "sections" not in json_content: + raise ValueError("JSON content must contain 'sections' field") + + # Use title from JSON metadata if available, otherwise use provided title + document_title = json_content.get("metadata", {}).get("title", title) + + # Build markdown content + markdown_parts = [] + + # Document title + markdown_parts.append(f"# {document_title}") + markdown_parts.append("") + + # Process each section + sections = json_content.get("sections", []) + for section in sections: + section_markdown = self._render_json_section(section) + if section_markdown: + markdown_parts.append(section_markdown) + markdown_parts.append("") # Add spacing between sections + + # Add generation info + markdown_parts.append("---") + markdown_parts.append(f"*Generated: {self._format_timestamp()}*") + + return '\n'.join(markdown_parts) + + except Exception as e: + self.logger.error(f"Error generating markdown from JSON: {str(e)}") + raise Exception(f"Markdown generation failed: {str(e)}") + + def _render_json_section(self, section: Dict[str, Any]) -> str: + """Render a single JSON section to markdown.""" + try: + section_type = self._get_section_type(section) + section_data = self._get_section_data(section) + + if section_type == "table": + return self._render_json_table(section_data) + elif section_type == "bullet_list": + return self._render_json_bullet_list(section_data) + elif section_type == "heading": + return self._render_json_heading(section_data) + elif section_type == "paragraph": + return self._render_json_paragraph(section_data) + elif section_type == "code_block": + return self._render_json_code_block(section_data) + elif section_type == "image": + return self._render_json_image(section_data) + else: + # Fallback to paragraph for unknown types + return self._render_json_paragraph(section_data) + + except Exception as e: + self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + return f"*[Error rendering section: {str(e)}]*" + + def _render_json_table(self, table_data: Dict[str, Any]) -> str: + """Render a JSON table to markdown.""" + try: + headers = table_data.get("headers", []) + rows = table_data.get("rows", []) + + if not headers or not rows: + return "" + + markdown_parts = [] + + # Create table header + header_line = " | ".join(str(header) for header in headers) + markdown_parts.append(header_line) + + # Add separator line + separator_line = " | ".join("---" for _ in headers) + markdown_parts.append(separator_line) + + # Add data rows + for row in rows: + row_line = " | ".join(str(cell_data) for cell_data in row) + markdown_parts.append(row_line) + + return '\n'.join(markdown_parts) + + except Exception as e: + self.logger.warning(f"Error rendering table: {str(e)}") + return "" + + def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str: + """Render a JSON bullet list to markdown.""" + try: + items = list_data.get("items", []) + + if not items: + return "" + + markdown_parts = [] + for item in items: + if isinstance(item, str): + markdown_parts.append(f"- {item}") + elif isinstance(item, dict) and "text" in item: + markdown_parts.append(f"- {item['text']}") + + return '\n'.join(markdown_parts) + + except Exception as e: + self.logger.warning(f"Error rendering bullet list: {str(e)}") + return "" + + def _render_json_heading(self, heading_data: Dict[str, Any]) -> str: + """Render a JSON heading to markdown.""" + try: + level = heading_data.get("level", 1) + text = heading_data.get("text", "") + + if text: + level = max(1, min(6, level)) + return f"{'#' * level} {text}" + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering heading: {str(e)}") + return "" + + def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str: + """Render a JSON paragraph to markdown.""" + try: + text = paragraph_data.get("text", "") + return text if text else "" + + except Exception as e: + self.logger.warning(f"Error rendering paragraph: {str(e)}") + return "" + + def _render_json_code_block(self, code_data: Dict[str, Any]) -> str: + """Render a JSON code block to markdown.""" + try: + code = code_data.get("code", "") + language = code_data.get("language", "") + + if code: + if language: + return f"```{language}\n{code}\n```" + else: + return f"```\n{code}\n```" + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering code block: {str(e)}") + return "" + + def _render_json_image(self, image_data: Dict[str, Any]) -> str: + """Render a JSON image to markdown.""" + try: + alt_text = image_data.get("altText", "Image") + base64_data = image_data.get("base64Data", "") + + if base64_data: + # For base64 images, we can't embed them directly in markdown + # So we'll use a placeholder with the alt text + return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)" + else: + return f"![{alt_text}](image-placeholder)" + + except Exception as e: + self.logger.warning(f"Error rendering image: {str(e)}") + return f"![{image_data.get('altText', 'Image')}](image-error)" diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py new file mode 100644 index 00000000..43c0ce6d --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -0,0 +1,416 @@ +""" +PDF renderer for report generation using reportlab. +""" + +from .rendererBaseTemplate import BaseRenderer +from typing import Dict, Any, Tuple, List +import io +import base64 +from datetime import datetime, UTC + +try: + from reportlab.lib.pagesizes import letter, A4 + from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak + from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle + from reportlab.lib.units import inch + from reportlab.lib import colors + from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY + REPORTLAB_AVAILABLE = True +except ImportError: + REPORTLAB_AVAILABLE = False + +class RendererPdf(BaseRenderer): + """Renders content to PDF format using reportlab.""" + + @classmethod + def get_supported_formats(cls) -> List[str]: + """Return supported PDF formats.""" + return ['pdf'] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """Return format aliases.""" + return ['document', 'print'] + + @classmethod + def get_priority(cls) -> int: + """Return priority for PDF renderer.""" + return 120 + + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """Render extracted JSON content to PDF format using AI-analyzed styling.""" + try: + if not REPORTLAB_AVAILABLE: + # Fallback to HTML if reportlab not available + from .rendererHtml import RendererHtml + html_renderer = RendererHtml() + html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service) + return html_content, "text/html" + + # Generate PDF using AI-analyzed styling + pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service) + + return pdf_content, "application/pdf" + + except Exception as e: + self.logger.error(f"Error rendering PDF: {str(e)}") + # Return minimal fallback + return f"PDF Generation Error: {str(e)}", "text/plain" + + async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + """Generate PDF content from structured JSON document using AI-generated styling.""" + try: + # Get AI-generated styling definitions + styles = await self._get_pdf_styles(user_prompt, ai_service) + + # Validate JSON structure + if not isinstance(json_content, dict): + raise ValueError("JSON content must be a dictionary") + + if "sections" not in json_content: + raise ValueError("JSON content must contain 'sections' field") + + # Use title from JSON metadata if available, otherwise use provided title + document_title = json_content.get("metadata", {}).get("title", title) + + # Create a buffer to hold the PDF + buffer = io.BytesIO() + + # Create PDF document + doc = SimpleDocTemplate( + buffer, + pagesize=A4, + rightMargin=72, + leftMargin=72, + topMargin=72, + bottomMargin=18 + ) + + # Build PDF content + story = [] + + # Title page + title_style = self._create_title_style(styles) + story.append(Paragraph(document_title, title_style)) + story.append(Spacer(1, 20)) + story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles))) + story.append(PageBreak()) + + # Process each section + sections = json_content.get("sections", []) + for section in sections: + section_elements = self._render_json_section(section, styles) + story.extend(section_elements) + + # Build PDF + doc.build(story) + + # Get PDF content as base64 + buffer.seek(0) + pdf_bytes = buffer.getvalue() + pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') + + return pdf_base64 + + except Exception as e: + self.logger.error(f"Error generating PDF from JSON: {str(e)}") + raise Exception(f"PDF generation failed: {str(e)}") + + async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + """Get PDF styling definitions using base template AI styling.""" + style_schema = { + "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, + "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12}, + "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8}, + "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2}, + "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12}, + "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10}, + "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3}, + "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6} + } + + style_template = self._create_ai_style_template("pdf", user_prompt, style_schema) + styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles()) + + # Validate and fix contrast issues + return self._validate_pdf_styles_contrast(styles) + + def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + """Validate and fix contrast issues in AI-generated styles.""" + try: + # Fix table header contrast + if "table_header" in styles: + header = styles["table_header"] + bg_color = header.get("background", "#FFFFFF") + text_color = header.get("text_color", "#000000") + + # If both are white or both are dark, fix it + if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + header["background"] = "#4F4F4F" + header["text_color"] = "#FFFFFF" + elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + header["background"] = "#4F4F4F" + header["text_color"] = "#FFFFFF" + + # Fix table cell contrast + if "table_cell" in styles: + cell = styles["table_cell"] + bg_color = cell.get("background", "#FFFFFF") + text_color = cell.get("text_color", "#000000") + + # If both are white or both are dark, fix it + if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + cell["background"] = "#FFFFFF" + cell["text_color"] = "#2F2F2F" + elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + cell["background"] = "#FFFFFF" + cell["text_color"] = "#2F2F2F" + + return styles + + except Exception as e: + self.logger.warning(f"Style validation failed: {str(e)}") + return self._get_default_pdf_styles() + + def _get_default_pdf_styles(self) -> Dict[str, Any]: + """Default PDF styles.""" + return { + "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, + "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12}, + "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8}, + "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2}, + "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12}, + "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10}, + "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3}, + "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6} + } + + def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle: + """Create title style from style definitions.""" + title_style_def = styles.get("title", {}) + + return ParagraphStyle( + 'CustomTitle', + fontSize=title_style_def.get("font_size", 24), + spaceAfter=title_style_def.get("space_after", 30), + alignment=self._get_alignment(title_style_def.get("align", "center")), + textColor=self._hex_to_color(title_style_def.get("color", "#1F4E79")) + ) + + def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle: + """Create heading style from style definitions.""" + heading_key = f"heading{level}" + heading_style_def = styles.get(heading_key, styles.get("heading1", {})) + + return ParagraphStyle( + f'CustomHeading{level}', + fontSize=heading_style_def.get("font_size", 18 - level * 2), + spaceAfter=heading_style_def.get("space_after", 12), + spaceBefore=heading_style_def.get("space_before", 12), + alignment=self._get_alignment(heading_style_def.get("align", "left")), + textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F")) + ) + + def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle: + """Create normal paragraph style from style definitions.""" + paragraph_style_def = styles.get("paragraph", {}) + + return ParagraphStyle( + 'CustomNormal', + fontSize=paragraph_style_def.get("font_size", 11), + spaceAfter=paragraph_style_def.get("space_after", 6), + alignment=self._get_alignment(paragraph_style_def.get("align", "left")), + textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")), + leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11) + ) + + def _get_alignment(self, align: str) -> int: + """Convert alignment string to reportlab alignment constant.""" + align_map = { + "center": TA_CENTER, + "left": TA_LEFT, + "justify": TA_JUSTIFY + } + return align_map.get(align.lower(), TA_LEFT) + + def _hex_to_color(self, hex_color: str) -> colors.Color: + """Convert hex color to reportlab color.""" + try: + hex_color = hex_color.lstrip('#') + r = int(hex_color[0:2], 16) / 255.0 + g = int(hex_color[2:4], 16) / 255.0 + b = int(hex_color[4:6], 16) / 255.0 + return colors.Color(r, g, b) + except: + return colors.black + + def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a single JSON section to PDF elements using AI-generated styles.""" + try: + section_type = self._get_section_type(section) + section_data = self._get_section_data(section) + + if section_type == "table": + return self._render_json_table(section_data, styles) + elif section_type == "bullet_list": + return self._render_json_bullet_list(section_data, styles) + elif section_type == "heading": + return self._render_json_heading(section_data, styles) + elif section_type == "paragraph": + return self._render_json_paragraph(section_data, styles) + elif section_type == "code_block": + return self._render_json_code_block(section_data, styles) + elif section_type == "image": + return self._render_json_image(section_data, styles) + else: + # Fallback to paragraph for unknown types + return self._render_json_paragraph(section_data, styles) + + except Exception as e: + self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))] + + def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a JSON table to PDF elements using AI-generated styles.""" + try: + headers = table_data.get("headers", []) + rows = table_data.get("rows", []) + + if not headers or not rows: + return [] + + # Prepare table data + table_data_list = [headers] + rows + + # Create table + table = Table(table_data_list) + + # Apply styling + table_header_style = styles.get("table_header", {}) + table_cell_style = styles.get("table_cell", {}) + + table_style = [ + ('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))), + ('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))), + ('ALIGN', (0, 0), (-1, -1), self._get_alignment(table_cell_style.get("align", "left"))), + ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'), + ('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)), + ('BOTTOMPADDING', (0, 0), (-1, 0), 12), + ('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))), + ('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)), + ('GRID', (0, 0), (-1, -1), 1, colors.black) + ] + + table.setStyle(TableStyle(table_style)) + + return [table, Spacer(1, 12)] + + except Exception as e: + self.logger.warning(f"Error rendering table: {str(e)}") + return [] + + def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a JSON bullet list to PDF elements using AI-generated styles.""" + try: + items = list_data.get("items", []) + bullet_style_def = styles.get("bullet_list", {}) + + elements = [] + for item in items: + if isinstance(item, str): + elements.append(Paragraph(f"โ€ข {item}", self._create_normal_style(styles))) + elif isinstance(item, dict) and "text" in item: + elements.append(Paragraph(f"โ€ข {item['text']}", self._create_normal_style(styles))) + + if elements: + elements.append(Spacer(1, bullet_style_def.get("space_after", 3))) + + return elements + + except Exception as e: + self.logger.warning(f"Error rendering bullet list: {str(e)}") + return [] + + def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a JSON heading to PDF elements using AI-generated styles.""" + try: + level = heading_data.get("level", 1) + text = heading_data.get("text", "") + + if text: + level = max(1, min(6, level)) + heading_style = self._create_heading_style(styles, level) + return [Paragraph(text, heading_style)] + + return [] + + except Exception as e: + self.logger.warning(f"Error rendering heading: {str(e)}") + return [] + + def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a JSON paragraph to PDF elements using AI-generated styles.""" + try: + text = paragraph_data.get("text", "") + + if text: + return [Paragraph(text, self._create_normal_style(styles))] + + return [] + + except Exception as e: + self.logger.warning(f"Error rendering paragraph: {str(e)}") + return [] + + def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a JSON code block to PDF elements using AI-generated styles.""" + try: + code = code_data.get("code", "") + language = code_data.get("language", "") + code_style_def = styles.get("code_block", {}) + + if code: + elements = [] + + if language: + lang_style = ParagraphStyle( + 'CodeLanguage', + fontSize=code_style_def.get("font_size", 9), + textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")), + fontName='Helvetica-Bold' + ) + elements.append(Paragraph(f"Code ({language}):", lang_style)) + + code_style = ParagraphStyle( + 'CodeBlock', + fontSize=code_style_def.get("font_size", 9), + textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")), + fontName=code_style_def.get("font", "Courier"), + backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")), + spaceAfter=code_style_def.get("space_after", 6) + ) + elements.append(Paragraph(code, code_style)) + + return elements + + return [] + + except Exception as e: + self.logger.warning(f"Error rendering code block: {str(e)}") + return [] + + def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + """Render a JSON image to PDF elements.""" + try: + base64_data = image_data.get("base64Data", "") + alt_text = image_data.get("altText", "Image") + + if base64_data: + # For now, just add a placeholder since reportlab image handling is complex + return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))] + + return [] + + except Exception as e: + self.logger.warning(f"Error rendering image: {str(e)}") + return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))] \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/pptx_renderer.py b/modules/services/serviceGeneration/renderers/rendererPptx.py similarity index 88% rename from modules/services/serviceGeneration/renderers/pptx_renderer.py rename to modules/services/serviceGeneration/renderers/rendererPptx.py index 73b390b4..4dd0a07d 100644 --- a/modules/services/serviceGeneration/renderers/pptx_renderer.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -1,13 +1,13 @@ import logging import base64 import io -from typing import Dict, Any, Optional, Tuple -from .base_renderer import BaseRenderer +from typing import Dict, Any, Optional, Tuple, List +from .rendererBaseTemplate import BaseRenderer logger = logging.getLogger(__name__) -class PptxRenderer(BaseRenderer): +class RendererPptx(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" def __init__(self): @@ -258,76 +258,25 @@ class PptxRenderer(BaseRenderer): """Get MIME type for rendered output.""" return self.output_mime_type - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only PowerPoint-specific guidelines; global prompt is built centrally.""" - return ( - "POWERPOINT FORMAT GUIDELINES:\n" - "- Extract structured data from source documents into JSON format\n" - "- Focus on presentation-ready content with clear sections and visual elements\n" - "- For tables: Extract headers and rows as separate arrays suitable for slides\n" - "- For lists: Extract items with optional sub-items for bullet points\n" - "- Structure content into sections with clear content types (heading, paragraph, table, list)\n" - "- Use proper JSON structure with metadata, sections, and elements\n" - "- Ensure content is concise and suitable for slide presentation\n" - "OUTPUT: Return structured JSON that can be converted to PowerPoint slides." - ) - async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: - """Simple AI call to get PowerPoint styling definitions.""" - if not ai_service: - return self._get_default_pptx_styles() + """Get PowerPoint styling definitions using base template AI styling.""" + style_schema = { + "title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"}, + "heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"}, + "subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"}, + "paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"}, + "bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20}, + "table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"}, + "table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"}, + "slide_size": "16:9", + "content_per_slide": "concise" + } - try: - prompt = f""" -For this PowerPoint presentation request: "{user_prompt}" - -Provide styling definitions for PowerPoint elements. Respond with ONLY JSON: - -{{ - "title": {{"font_size": 44, "color": "#1F4E79", "bold": true, "align": "center"}}, - "heading": {{"font_size": 32, "color": "#2F2F2F", "bold": true, "align": "left"}}, - "subheading": {{"font_size": 24, "color": "#4F4F4F", "bold": true, "align": "left"}}, - "paragraph": {{"font_size": 18, "color": "#2F2F2F", "bold": false, "align": "left"}}, - "bullet_list": {{"font_size": 18, "color": "#2F2F2F", "indent": 20}}, - "table_header": {{"font_size": 16, "color": "#FFFFFF", "bold": true, "background": "#4F4F4F"}}, - "table_cell": {{"font_size": 14, "color": "#2F2F2F", "bold": false, "background": "#FFFFFF"}}, - "slide_size": "16:9", - "content_per_slide": "concise" -}} - -CRITICAL: PowerPoint text must be large enough to read from a distance. Minimum font size should be 14pt for body text. -""" - - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType - - request_options = AiCallOptions() - request_options.operationType = OperationType.GENERAL - - request = AiCallRequest(prompt=prompt, context="", options=request_options) - response = await ai_service.aiObjects.call(request) - - import json - import re - - # Clean and parse JSON - result = response.content.strip() - if result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - styles = json.loads(result) - - # Validate font sizes for PowerPoint readability - styles = self._validate_pptx_styles_readability(styles) - - return styles - - except Exception as e: - logger.warning(f"AI styling failed: {str(e)}, using defaults") - return self._get_default_pptx_styles() + style_template = self._create_ai_style_template("pptx", user_prompt, style_schema) + styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pptx_styles()) + + # Validate PowerPoint-specific requirements + return self._validate_pptx_styles_readability(styles) def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix readability issues in AI-generated styles.""" diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py new file mode 100644 index 00000000..6ca1415b --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -0,0 +1,234 @@ +""" +Text renderer for report generation. +""" + +from .rendererBaseTemplate import BaseRenderer +from typing import Dict, Any, Tuple, List + +class RendererText(BaseRenderer): + """Renders content to plain text format with format-specific extraction.""" + + @classmethod + def get_supported_formats(cls) -> List[str]: + """Return supported text formats (excluding formats with dedicated renderers).""" + return [ + 'txt', 'text', 'plain', + # Programming languages + 'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx', + 'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp', + 'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust', + 'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc', + 'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd', + # Web technologies (excluding html/htm which have dedicated renderer) + 'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg', + # Data formats (excluding csv, md/markdown which have dedicated renderers) + 'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore', + # Configuration files + 'env', 'properties', 'conf', 'config', 'rc', + 'gitattributes', 'editorconfig', 'eslintrc', + # Documentation + 'readme', 'changelog', 'license', 'authors', + 'contributing', 'todo', 'notes', 'docs' + ] + + @classmethod + def get_format_aliases(cls) -> List[str]: + """Return format aliases.""" + return [ + 'ascii', 'utf8', 'utf-8', 'code', 'source', + 'script', 'program', 'file', 'document', + 'raw', 'unformatted', 'plaintext' + ] + + @classmethod + def get_priority(cls) -> int: + """Return priority for text renderer.""" + return 90 + + async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + """Render extracted JSON content to plain text format.""" + try: + # Generate text from JSON structure + text_content = self._generate_text_from_json(extracted_content, title) + + return text_content, "text/plain" + + except Exception as e: + self.logger.error(f"Error rendering text: {str(e)}") + # Return minimal text fallback + return f"{title}\n\nError rendering report: {str(e)}", "text/plain" + + def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str: + """Generate text content from structured JSON document.""" + try: + # Validate JSON structure + if not isinstance(json_content, dict): + raise ValueError("JSON content must be a dictionary") + + if "sections" not in json_content: + raise ValueError("JSON content must contain 'sections' field") + + # Use title from JSON metadata if available, otherwise use provided title + document_title = json_content.get("metadata", {}).get("title", title) + + # Build text content + text_parts = [] + + # Document title + text_parts.append(document_title) + text_parts.append("=" * len(document_title)) + text_parts.append("") + + # Process each section + sections = json_content.get("sections", []) + for section in sections: + section_text = self._render_json_section(section) + if section_text: + text_parts.append(section_text) + text_parts.append("") # Add spacing between sections + + # Add generation info + text_parts.append("") + text_parts.append(f"Generated: {self._format_timestamp()}") + + return '\n'.join(text_parts) + + except Exception as e: + self.logger.error(f"Error generating text from JSON: {str(e)}") + raise Exception(f"Text generation failed: {str(e)}") + + def _render_json_section(self, section: Dict[str, Any]) -> str: + """Render a single JSON section to text.""" + try: + section_type = self._get_section_type(section) + section_data = self._get_section_data(section) + + if section_type == "table": + return self._render_json_table(section_data) + elif section_type == "bullet_list": + return self._render_json_bullet_list(section_data) + elif section_type == "heading": + return self._render_json_heading(section_data) + elif section_type == "paragraph": + return self._render_json_paragraph(section_data) + elif section_type == "code_block": + return self._render_json_code_block(section_data) + elif section_type == "image": + return self._render_json_image(section_data) + else: + # Fallback to paragraph for unknown types + return self._render_json_paragraph(section_data) + + except Exception as e: + self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + return f"[Error rendering section: {str(e)}]" + + def _render_json_table(self, table_data: Dict[str, Any]) -> str: + """Render a JSON table to text.""" + try: + headers = table_data.get("headers", []) + rows = table_data.get("rows", []) + + if not headers or not rows: + return "" + + text_parts = [] + + # Create table header + header_line = " | ".join(str(header) for header in headers) + text_parts.append(header_line) + + # Add separator line + separator_line = " | ".join("-" * len(str(header)) for header in headers) + text_parts.append(separator_line) + + # Add data rows + for row in rows: + row_line = " | ".join(str(cell_data) for cell_data in row) + text_parts.append(row_line) + + return '\n'.join(text_parts) + + except Exception as e: + self.logger.warning(f"Error rendering table: {str(e)}") + return "" + + def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str: + """Render a JSON bullet list to text.""" + try: + items = list_data.get("items", []) + + if not items: + return "" + + text_parts = [] + for item in items: + if isinstance(item, str): + text_parts.append(f"- {item}") + elif isinstance(item, dict) and "text" in item: + text_parts.append(f"- {item['text']}") + + return '\n'.join(text_parts) + + except Exception as e: + self.logger.warning(f"Error rendering bullet list: {str(e)}") + return "" + + def _render_json_heading(self, heading_data: Dict[str, Any]) -> str: + """Render a JSON heading to text.""" + try: + level = heading_data.get("level", 1) + text = heading_data.get("text", "") + + if text: + level = max(1, min(6, level)) + if level == 1: + return f"{text}\n{'=' * len(text)}" + elif level == 2: + return f"{text}\n{'-' * len(text)}" + else: + return f"{'#' * level} {text}" + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering heading: {str(e)}") + return "" + + def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str: + """Render a JSON paragraph to text.""" + try: + text = paragraph_data.get("text", "") + return text if text else "" + + except Exception as e: + self.logger.warning(f"Error rendering paragraph: {str(e)}") + return "" + + def _render_json_code_block(self, code_data: Dict[str, Any]) -> str: + """Render a JSON code block to text.""" + try: + code = code_data.get("code", "") + language = code_data.get("language", "") + + if code: + if language: + return f"Code ({language}):\n{code}" + else: + return code + + return "" + + except Exception as e: + self.logger.warning(f"Error rendering code block: {str(e)}") + return "" + + def _render_json_image(self, image_data: Dict[str, Any]) -> str: + """Render a JSON image to text.""" + try: + alt_text = image_data.get("altText", "Image") + return f"[Image: {alt_text}]" + + except Exception as e: + self.logger.warning(f"Error rendering image: {str(e)}") + return f"[Image: {image_data.get('altText', 'Image')}]" diff --git a/modules/services/serviceGeneration/renderers/text_renderer.py b/modules/services/serviceGeneration/renderers/text_renderer.py deleted file mode 100644 index 67e32069..00000000 --- a/modules/services/serviceGeneration/renderers/text_renderer.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Text renderer for report generation. -""" - -from .base_renderer import BaseRenderer -from typing import Dict, Any, Tuple, List - -class TextRenderer(BaseRenderer): - """Renders content to plain text format with format-specific extraction.""" - - @classmethod - def get_supported_formats(cls) -> List[str]: - """Return supported text formats (excluding formats with dedicated renderers).""" - return [ - 'txt', 'text', 'plain', - # Programming languages - 'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx', - 'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp', - 'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust', - 'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc', - 'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd', - # Web technologies (excluding html/htm which have dedicated renderer) - 'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg', - # Data formats (excluding csv, md/markdown which have dedicated renderers) - 'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore', - # Configuration files - 'env', 'properties', 'conf', 'config', 'rc', - 'gitattributes', 'editorconfig', 'eslintrc', - # Documentation - 'readme', 'changelog', 'license', 'authors', - 'contributing', 'todo', 'notes', 'docs' - ] - - @classmethod - def get_format_aliases(cls) -> List[str]: - """Return format aliases.""" - return [ - 'ascii', 'utf8', 'utf-8', 'code', 'source', - 'script', 'program', 'file', 'document', - 'raw', 'unformatted', 'plaintext' - ] - - @classmethod - def get_priority(cls) -> int: - """Return priority for text renderer.""" - return 90 - - def getExtractionPrompt(self, user_prompt: str, title: str) -> str: - """Return only plain-text guidelines; global prompt is built centrally.""" - return ( - "TEXT FORMAT GUIDELINES:\n" - "- Output ONLY plain text (no markdown or HTML).\n" - "- Use clear headings (you may underline with === or --- when helpful).\n" - "- Use simple bullet lists with '-' and tables with '|' when needed.\n" - "- Preserve indentation for code-like content if present.\n" - "OUTPUT: Return ONLY the raw text content." - ) - - async def render(self, extracted_content: str, title: str) -> Tuple[str, str]: - """Render extracted content to plain text format.""" - try: - # The extracted content should already be formatted text from the AI - # Just clean it up - text_content = self._clean_text_content(extracted_content, title) - - return text_content, "text/plain" - - except Exception as e: - self.logger.error(f"Error rendering text: {str(e)}") - # Return minimal text fallback - return f"{title}\n\nError rendering report: {str(e)}", "text/plain" - - def _clean_text_content(self, content: str, title: str) -> str: - """Clean and validate text content from AI.""" - content = content.strip() - - # Remove markdown code blocks if present - if content.startswith("```") and content.endswith("```"): - lines = content.split('\n') - if len(lines) > 2: - content = '\n'.join(lines[1:-1]).strip() - - # Remove any remaining markdown formatting - content = content.replace('**', '').replace('*', '') - content = content.replace('__', '').replace('_', '') - - # Clean up any HTML-like tags that might have slipped through - import re - content = re.sub(r'<[^>]+>', '', content) - - # Ensure proper line endings - content = content.replace('\r\n', '\n').replace('\r', '\n') - - return content diff --git a/modules/services/serviceGeneration/subPromptBuilder.py b/modules/services/serviceGeneration/subPromptBuilder.py index e21b5017..f7054adb 100644 --- a/modules/services/serviceGeneration/subPromptBuilder.py +++ b/modules/services/serviceGeneration/subPromptBuilder.py @@ -103,8 +103,20 @@ Return only the JSON structure with actual data from the documents. Do not inclu finalPrompt = genericIntro # Debug output - print(f"๐Ÿ” DEBUG: Extraction Prompt: {finalPrompt}") - print(f"๐Ÿ” DEBUG: Extraction Intent: {extractionIntent}") + print(f"๐Ÿ” EXTRACTION INTENT: {extractionIntent}") + + # Save full extraction prompt to debug file + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION PROMPT:\n{finalPrompt}\n\n") + f.write(f"EXTRACTION INTENT:\n{extractionIntent}\n") + except Exception: + pass return finalPrompt @@ -127,6 +139,9 @@ async def buildGenerationPrompt( # Protect userPrompt from injection safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ') + # Debug output + print(f"๐Ÿ” GENERATION PROMPT REQUEST: buildGenerationPrompt called with outputFormat='{outputFormat}', title='{title}'") + # AI call to generate the appropriate generation prompt generationPromptRequest = f""" Based on this user request, create a detailed generation prompt for creating a {outputFormat} document. @@ -144,17 +159,23 @@ Create a generation prompt that: IMPORTANT: Always generate content in STANDARDIZED JSON FORMAT. In your response, include the exact text "PLACEHOLDER_FOR_FORMAT_RULES" where specific format rules will be inserted afterwards automatically. +CRITICAL: You MUST start your response with exactly "Generate a {outputFormat} document that:" - do NOT use "docx" or any other format. Use the exact format specified: {outputFormat} + Return only the generation prompt, starting with "Generate a {outputFormat} document that..." """ # Call AI service to generate the prompt - print(f"๐Ÿ” DEBUG: Calling AI for generation prompt...") - result = await aiService.callAi( - prompt=generationPromptRequest, - documents=None, - options=None - ) - print(f"๐Ÿ” DEBUG: AI generation prompt result: '{result}'") + print(f"๐Ÿ” GENERATION PROMPT REQUEST: Calling AI for generation prompt...") + + # Import and set proper options for AI call + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + request_options = AiCallOptions() + request_options.operationType = OperationType.GENERAL + + request = AiCallRequest(prompt=generationPromptRequest, context="", options=request_options) + response = await aiService.aiObjects.call(request) + result = response.content if response else "" + print(f"๐Ÿ” GENERATION PROMPT AI RESPONSE: '{result}'") # Replace the placeholder that the AI created with actual format rules if result: @@ -162,7 +183,21 @@ Return only the generation prompt, starting with "Generate a {outputFormat} docu result = result.replace("PLACEHOLDER_FOR_FORMAT_RULES", formatRules) # Debug output - print(f"๐Ÿ” DEBUG: Generation Prompt: {result if result else 'None'}") + print(f"๐Ÿ” GENERATION PROMPT FINAL: {result if result else 'None'}") + + # Save full generation prompt and AI response to debug file + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f: + f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n") + f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n") + f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n") + except Exception: + pass return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}" @@ -216,11 +251,15 @@ Do not include formatting instructions, file types, or output methods. # Call AI service to extract intention print(f"๐Ÿ” DEBUG: Calling AI for extraction intent...") - result = await aiService.callAi( - prompt=extractionPrompt, - documents=None, - options=None - ) + + # Import and set proper options for AI call + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + request_options = AiCallOptions() + request_options.operationType = OperationType.GENERAL + + request = AiCallRequest(prompt=extractionPrompt, context="", options=request_options) + response = await aiService.aiObjects.call(request) + result = response.content if response else "" print(f"๐Ÿ” DEBUG: AI extraction intent result: '{result}'") return result if result else f"Extract all relevant content from the document according to the user's requirements: {userPrompt}" diff --git a/rename_renderers.py b/rename_renderers.py deleted file mode 100644 index 7c398bec..00000000 --- a/rename_renderers.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to rename renderer files from _renderer.py to renderer.py -and update all references in the codebase. -""" - -import os -import re -import shutil -from pathlib import Path -from typing import Dict, List, Tuple - -def get_renderer_files(renderers_dir: Path) -> List[Tuple[str, str]]: - """Get list of renderer files to rename.""" - renderer_files = [] - - for file_path in renderers_dir.glob("*_renderer.py"): - if file_path.name not in ['base_renderer.py', 'registry.py']: - old_name = file_path.name - # Extract the name part (e.g., "csv" from "csv_renderer.py") - name_part = old_name.replace('_renderer.py', '') - # Create new name (e.g., "rendererCsv.py") - new_name = f"renderer{name_part.capitalize()}.py" - renderer_files.append((old_name, new_name)) - - return renderer_files - -def update_file_imports(file_path: Path, old_to_new: Dict[str, str]) -> bool: - """Update import statements in a file.""" - try: - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - - original_content = content - changes_made = False - - # Update import statements - for old_name, new_name in old_to_new.items(): - old_module = old_name.replace('.py', '') - new_module = new_name.replace('.py', '') - - # Pattern for from .old_module import - pattern1 = rf'from \.{re.escape(old_module)} import' - replacement1 = f'from .{new_module} import' - if re.search(pattern1, content): - content = re.sub(pattern1, replacement1, content) - changes_made = True - - # Pattern for from modules.services.serviceGeneration.renderers.old_module import - pattern2 = rf'from modules\.services\.serviceGeneration\.renderers\.{re.escape(old_module)} import' - replacement2 = f'from modules.services.serviceGeneration.renderers.{new_module} import' - if re.search(pattern2, content): - content = re.sub(pattern2, replacement2, content) - changes_made = True - - if changes_made: - with open(file_path, 'w', encoding='utf-8') as f: - f.write(content) - print(f"โœ… Updated imports in: {file_path}") - return True - else: - print(f"โ„น๏ธ No imports to update in: {file_path}") - return False - - except Exception as e: - print(f"โŒ Error updating {file_path}: {str(e)}") - return False - -def update_class_names_in_file(file_path: Path, old_to_new: Dict[str, str]) -> bool: - """Update class names in renderer files.""" - try: - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - - original_content = content - changes_made = False - - # Update class names - for old_name, new_name in old_to_new.items(): - old_module = old_name.replace('.py', '') - new_module = new_name.replace('.py', '') - - # Extract the name part for class name - name_part = old_module.replace('_renderer', '') - old_class = f"{name_part.capitalize()}Renderer" - new_class = f"Renderer{name_part.capitalize()}" - - # Update class definition - pattern1 = rf'class {re.escape(old_class)}\(' - replacement1 = f'class {new_class}(' - if re.search(pattern1, content): - content = re.sub(pattern1, replacement1, content) - changes_made = True - - # Update class instantiation - pattern2 = rf'{re.escape(old_class)}\(' - replacement2 = f'{new_class}(' - if re.search(pattern2, content): - content = re.sub(pattern2, replacement2, content) - changes_made = True - - if changes_made: - with open(file_path, 'w', encoding='utf-8') as f: - f.write(content) - print(f"โœ… Updated class names in: {file_path}") - return True - else: - print(f"โ„น๏ธ No class names to update in: {file_path}") - return False - - except Exception as e: - print(f"โŒ Error updating class names in {file_path}: {str(e)}") - return False - -def main(): - """Main function to rename renderer files and update references.""" - print("๐Ÿ”„ Starting renderer file renaming process...") - - # Get the gateway directory - gateway_dir = Path(__file__).parent - renderers_dir = gateway_dir / "modules" / "services" / "serviceGeneration" / "renderers" - - if not renderers_dir.exists(): - print(f"โŒ Renderers directory not found: {renderers_dir}") - return - - print(f"๐Ÿ“ Working in directory: {renderers_dir}") - - # Get list of files to rename - renderer_files = get_renderer_files(renderers_dir) - - if not renderer_files: - print("โ„น๏ธ No renderer files found to rename.") - return - - print(f"๐Ÿ“‹ Found {len(renderer_files)} renderer files to rename:") - for old_name, new_name in renderer_files: - print(f" {old_name} โ†’ {new_name}") - - # Create mapping dictionary - old_to_new = {old_name: new_name for old_name, new_name in renderer_files} - - # Step 1: Update imports in all Python files - print("\n๐Ÿ”„ Step 1: Updating import statements...") - updated_files = [] - - # Search in gateway directory - for py_file in gateway_dir.rglob("*.py"): - if py_file.name != "rename_renderers.py": # Skip this script - if update_file_imports(py_file, old_to_new): - updated_files.append(py_file) - - print(f"โœ… Updated imports in {len(updated_files)} files") - - # Step 2: Update class names in renderer files - print("\n๐Ÿ”„ Step 2: Updating class names in renderer files...") - class_updated_files = [] - - for old_name, new_name in renderer_files: - old_file_path = renderers_dir / old_name - if old_file_path.exists(): - if update_class_names_in_file(old_file_path, old_to_new): - class_updated_files.append(old_file_path) - - print(f"โœ… Updated class names in {len(class_updated_files)} files") - - # Step 3: Rename the files - print("\n๐Ÿ”„ Step 3: Renaming files...") - renamed_files = [] - - for old_name, new_name in renderer_files: - old_file_path = renderers_dir / old_name - new_file_path = renderers_dir / new_name - - if old_file_path.exists(): - try: - shutil.move(str(old_file_path), str(new_file_path)) - renamed_files.append((old_name, new_name)) - print(f"โœ… Renamed: {old_name} โ†’ {new_name}") - except Exception as e: - print(f"โŒ Error renaming {old_name}: {str(e)}") - else: - print(f"โš ๏ธ File not found: {old_name}") - - print(f"\n๐ŸŽ‰ Renaming process completed!") - print(f"๐Ÿ“Š Summary:") - print(f" - Files renamed: {len(renamed_files)}") - print(f" - Import statements updated: {len(updated_files)}") - print(f" - Class names updated: {len(class_updated_files)}") - - if renamed_files: - print(f"\n๐Ÿ“‹ Renamed files:") - for old_name, new_name in renamed_files: - print(f" โœ… {old_name} โ†’ {new_name}") - -if __name__ == "__main__": - main() diff --git a/test_document_processing.py b/test_document_processing.py index d21ae00b..6170a5c5 100644 --- a/test_document_processing.py +++ b/test_document_processing.py @@ -154,9 +154,11 @@ async def process_documents_and_generate_summary(): # userPrompt = "Analyze these documents and create a comprehensive DOCX summary document including: 1) Document types and purposes, 2) Key information and main points, 3) Important details and numbers, 4) Notable sections, 5) Overall assessment and recommendations." + userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted." + # userPrompt = "Create a docx file containing a summary and the COMPLETE list from the pdf file, having one additional column with a 'x' marker for all items, which are yellow highlighted." - userPrompt = "Create a docx file containing the combined documents in french language." + # userPrompt = "Create a docx file containing the combined documents in french language." try: # Single AI call with DOCX generation @@ -164,7 +166,7 @@ async def process_documents_and_generate_summary(): prompt=userPrompt, documents=documents, options=ai_options, - outputFormat="docx", + outputFormat="xlsx", title="Document Analysis Summary" ) diff --git a/test_fallback_mechanism.py b/test_fallback_mechanism.py deleted file mode 100644 index 076ce22f..00000000 --- a/test_fallback_mechanism.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify the fallback mechanism in interfaceAiObjects.py -""" - -import asyncio -import sys -import os -import logging -from pathlib import Path - -# Add the gateway directory to the Python path -gateway_dir = Path(__file__).parent -sys.path.insert(0, str(gateway_dir)) - -# Set up logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -async def test_fallback_mechanism(): - """Test the fallback mechanism by simulating a failing primary model.""" - try: - from modules.interfaces.interfaceAiObjects import AiObjects - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType - - logger.info("๐Ÿงช Testing fallback mechanism...") - - # Create AiObjects instance - ai_objects = await AiObjects.create() - logger.info("โœ… AiObjects created successfully") - - # Test 1: Normal operation (should work with primary model) - logger.info("๐Ÿ“ Test 1: Normal operation") - request = AiCallRequest( - prompt="Hello, this is a test prompt. Please respond with 'Test successful'.", - context="", - options=AiCallOptions(operationType=OperationType.GENERAL) - ) - - try: - response = await ai_objects.call(request) - logger.info(f"โœ… Test 1 successful: {response.modelName} - {response.content[:50]}...") - except Exception as e: - logger.warning(f"โš ๏ธ Test 1 failed: {str(e)}") - - # Test 2: Image analysis fallback - logger.info("๐Ÿ–ผ๏ธ Test 2: Image analysis fallback") - try: - # Create a dummy image data (base64 encoded 1x1 pixel) - dummy_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" - result = await ai_objects.callImage( - prompt="Describe this image", - imageData=dummy_image, - mimeType="image/png", - options=AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS) - ) - logger.info(f"โœ… Test 2 successful: {result[:50]}...") - except Exception as e: - logger.warning(f"โš ๏ธ Test 2 failed: {str(e)}") - - # Test 3: Test fallback model selection - logger.info("๐Ÿ”„ Test 3: Fallback model selection") - fallback_models = ai_objects._getFallbackModels(OperationType.GENERAL) - logger.info(f"โœ… Fallback models for GENERAL: {fallback_models}") - - fallback_models_image = ai_objects._getFallbackModels(OperationType.IMAGE_ANALYSIS) - logger.info(f"โœ… Fallback models for IMAGE_ANALYSIS: {fallback_models_image}") - - logger.info("๐ŸŽ‰ Fallback mechanism test completed!") - - except Exception as e: - logger.error(f"โŒ Test failed: {str(e)}") - import traceback - traceback.print_exc() - -if __name__ == "__main__": - asyncio.run(test_fallback_mechanism()) diff --git a/test_json_to_docx.docx b/test_json_to_docx.docx deleted file mode 100644 index 15112f37..00000000 Binary files a/test_json_to_docx.docx and /dev/null differ diff --git a/test_json_to_docx.py b/test_json_to_docx.py deleted file mode 100644 index a03e0b28..00000000 --- a/test_json_to_docx.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for JSON-to-DOCX rendering pipeline. -""" - -import asyncio -import json -import sys -import os - -from modules.services.serviceGeneration.renderers.docx_renderer import DocxRenderer - -async def test_json_to_docx(): - """Test the JSON-to-DOCX rendering pipeline.""" - - # Create test JSON document - test_json = { - "metadata": { - "title": "Test Document", - "version": "1.0" - }, - "sections": [ - { - "id": "heading1", - "type": "heading", - "data": { - "level": 1, - "text": "Document Overview" - } - }, - { - "id": "paragraph1", - "type": "paragraph", - "data": { - "text": "This is a test paragraph to verify JSON-to-DOCX rendering works correctly." - } - }, - { - "id": "table1", - "type": "table", - "data": { - "headers": ["Name", "Quantity", "Status"], - "rows": [ - ["Item 1", "5", "Active"], - ["Item 2", "3", "Inactive"], - ["Item 3", "10", "Active"] - ] - } - }, - { - "id": "list1", - "type": "bullet_list", - "data": { - "items": [ - "First bullet point", - "Second bullet point", - "Third bullet point" - ] - } - }, - { - "id": "heading2", - "type": "heading", - "data": { - "level": 2, - "text": "Summary" - } - }, - { - "id": "paragraph2", - "type": "paragraph", - "data": { - "text": "This document demonstrates the new JSON-based rendering system." - } - } - ] - } - - print("๐Ÿงช Testing JSON-to-DOCX rendering...") - print(f"๐Ÿ“„ Test document has {len(test_json['sections'])} sections") - - try: - # Create renderer - renderer = DocxRenderer() - - # Test rendering - docx_content, mime_type = await renderer.render( - extracted_content=test_json, - title="Test Document", - user_prompt="Create a test document" - ) - - print(f"โœ… Rendering successful!") - print(f"๐Ÿ“Š MIME type: {mime_type}") - print(f"๐Ÿ“ Content length: {len(docx_content)} characters") - print(f"๐Ÿ” Content preview: {docx_content[:100]}...") - - # Save test file - import base64 - docx_bytes = base64.b64decode(docx_content) - with open("test_json_to_docx.docx", "wb") as f: - f.write(docx_bytes) - - print(f"๐Ÿ’พ Test DOCX saved as: test_json_to_docx.docx") - - return True - - except Exception as e: - print(f"โŒ Rendering failed: {str(e)}") - import traceback - traceback.print_exc() - return False - -if __name__ == "__main__": - success = asyncio.run(test_json_to_docx()) - if success: - print("\n๐ŸŽ‰ JSON-to-DOCX rendering test PASSED!") - else: - print("\n๐Ÿ’ฅ JSON-to-DOCX rendering test FAILED!") - sys.exit(1)