285 lines
No EOL
12 KiB
Python
285 lines
No EOL
12 KiB
Python
"""
|
|
Base renderer class for all format renderers.
|
|
"""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Dict, Any, Tuple, List
|
|
import logging
|
|
import json
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class BaseRenderer(ABC):
|
|
"""Base class for all format renderers."""
|
|
|
|
def __init__(self):
|
|
self.logger = logger
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""
|
|
Return list of supported format names for this renderer.
|
|
Override this method in subclasses to specify supported formats.
|
|
"""
|
|
return []
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""
|
|
Return list of format aliases for this renderer.
|
|
Override this method in subclasses to specify format aliases.
|
|
"""
|
|
return []
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""
|
|
Return priority for this renderer (higher number = higher priority).
|
|
Used when multiple renderers support the same format.
|
|
"""
|
|
return 0
|
|
|
|
@abstractmethod
|
|
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
|
"""
|
|
Render extracted JSON content to the target format.
|
|
|
|
Args:
|
|
extracted_content: Structured JSON content with sections and metadata
|
|
title: Report title
|
|
user_prompt: Original user prompt for context
|
|
ai_service: AI service instance for additional processing
|
|
|
|
Returns:
|
|
tuple: (rendered_content, mime_type)
|
|
"""
|
|
pass
|
|
|
|
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""Extract sections from report data."""
|
|
return report_data.get('sections', [])
|
|
|
|
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Extract metadata from report data."""
|
|
return report_data.get('metadata', {})
|
|
|
|
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
|
"""Get title from report data or use fallback."""
|
|
metadata = report_data.get('metadata', {})
|
|
return metadata.get('title', fallback_title)
|
|
|
|
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
|
|
"""Validate that JSON content has the expected structure."""
|
|
if not isinstance(json_content, dict):
|
|
return False
|
|
|
|
if "sections" not in json_content:
|
|
return False
|
|
|
|
sections = json_content.get("sections", [])
|
|
if not isinstance(sections, list):
|
|
return False
|
|
|
|
# Validate each section has type and data
|
|
for section in sections:
|
|
if not isinstance(section, dict):
|
|
return False
|
|
if "type" not in section or "data" not in section:
|
|
return False
|
|
|
|
return True
|
|
|
|
def _get_section_type(self, section: Dict[str, Any]) -> str:
|
|
"""Get the type of a section."""
|
|
return section.get("type", "paragraph")
|
|
|
|
def _get_section_data(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Get the data of a section."""
|
|
return section.get("data", {})
|
|
|
|
def _get_section_id(self, section: Dict[str, Any]) -> str:
|
|
"""Get the ID of a section (if available)."""
|
|
return section.get("id", "unknown")
|
|
|
|
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
|
"""Extract table headers and rows from section data."""
|
|
headers = section_data.get("headers", [])
|
|
rows = section_data.get("rows", [])
|
|
return headers, rows
|
|
|
|
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
|
|
"""Extract bullet list items from section data."""
|
|
items = section_data.get("items", [])
|
|
result = []
|
|
for item in items:
|
|
if isinstance(item, str):
|
|
result.append(item)
|
|
elif isinstance(item, dict) and "text" in item:
|
|
result.append(item["text"])
|
|
return result
|
|
|
|
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
|
|
"""Extract heading level and text from section data."""
|
|
level = section_data.get("level", 1)
|
|
text = section_data.get("text", "")
|
|
return level, text
|
|
|
|
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
|
|
"""Extract paragraph text from section data."""
|
|
return section_data.get("text", "")
|
|
|
|
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
|
"""Extract code and language from section data."""
|
|
code = section_data.get("code", "")
|
|
language = section_data.get("language", "")
|
|
return code, language
|
|
|
|
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
|
"""Extract base64 data and alt text from section data."""
|
|
base64_data = section_data.get("base64Data", "")
|
|
alt_text = section_data.get("altText", "Image")
|
|
return base64_data, alt_text
|
|
|
|
def _get_supported_section_types(self) -> List[str]:
|
|
"""Return list of supported section types."""
|
|
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
|
|
|
|
def _is_valid_section_type(self, section_type: str) -> bool:
|
|
"""Check if a section type is valid."""
|
|
return section_type in self._get_supported_section_types()
|
|
|
|
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Process a section and return structured data based on its type."""
|
|
section_type = self._get_section_type(section)
|
|
section_data = self._get_section_data(section)
|
|
|
|
if section_type == "table":
|
|
headers, rows = self._extract_table_data(section_data)
|
|
return {"type": "table", "headers": headers, "rows": rows}
|
|
elif section_type == "bullet_list":
|
|
items = self._extract_bullet_list_items(section_data)
|
|
return {"type": "bullet_list", "items": items}
|
|
elif section_type == "heading":
|
|
level, text = self._extract_heading_data(section_data)
|
|
return {"type": "heading", "level": level, "text": text}
|
|
elif section_type == "paragraph":
|
|
text = self._extract_paragraph_text(section_data)
|
|
return {"type": "paragraph", "text": text}
|
|
elif section_type == "code_block":
|
|
code, language = self._extract_code_block_data(section_data)
|
|
return {"type": "code_block", "code": code, "language": language}
|
|
elif section_type == "image":
|
|
base64_data, alt_text = self._extract_image_data(section_data)
|
|
return {"type": "image", "base64Data": base64_data, "altText": alt_text}
|
|
else:
|
|
# Fallback to paragraph
|
|
text = self._extract_paragraph_text(section_data)
|
|
return {"type": "paragraph", "text": text}
|
|
|
|
def _format_timestamp(self, timestamp: str = None) -> str:
|
|
"""Format timestamp for display."""
|
|
if timestamp:
|
|
return timestamp
|
|
from datetime import datetime, UTC
|
|
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
|
|
# ===== GENERIC AI STYLING HELPERS =====
|
|
|
|
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Generic AI styling method that can be used by all renderers.
|
|
|
|
Args:
|
|
ai_service: AI service instance
|
|
style_template: Format-specific style template
|
|
default_styles: Default styles to fall back to
|
|
|
|
Returns:
|
|
Dict with styling definitions
|
|
"""
|
|
if not ai_service:
|
|
return default_styles
|
|
|
|
try:
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
|
|
|
request_options = AiCallOptions()
|
|
request_options.operationType = OperationType.GENERAL
|
|
|
|
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
|
response = await ai_service.aiObjects.call(request)
|
|
|
|
import json
|
|
import re
|
|
|
|
# Debug output
|
|
print(f"🔍 AI STYLING RESPONSE TYPE: {type(response)}")
|
|
print(f"🔍 AI STYLING RESPONSE LENGTH: {len(response.content) if response and hasattr(response, 'content') and response.content else 0}")
|
|
|
|
# Clean and parse JSON
|
|
result = response.content.strip() if response and response.content else ""
|
|
|
|
# Check if result is empty
|
|
if not result:
|
|
self.logger.warning("AI styling returned empty response, using defaults")
|
|
return default_styles
|
|
|
|
# Extract JSON from markdown if present
|
|
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
|
if json_match:
|
|
result = json_match.group(1).strip()
|
|
print(f"🔍 EXTRACTED JSON FROM MARKDOWN: {result[:100]}...")
|
|
elif result.startswith('```json'):
|
|
result = re.sub(r'^```json\s*', '', result)
|
|
result = re.sub(r'\s*```$', '', result)
|
|
print(f"🔍 CLEANED JSON FROM MARKDOWN: {result[:100]}...")
|
|
elif result.startswith('```'):
|
|
result = re.sub(r'^```\s*', '', result)
|
|
result = re.sub(r'\s*```$', '', result)
|
|
print(f"🔍 CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...")
|
|
|
|
# Try to parse JSON
|
|
try:
|
|
styles = json.loads(result)
|
|
print(f"🔍 AI STYLING PARSED KEYS: {list(styles.keys()) if isinstance(styles, dict) else 'Not a dict'}")
|
|
except json.JSONDecodeError as json_error:
|
|
print(f"🔍 AI STYLING JSON ERROR: {json_error}")
|
|
print(f"🔍 AI STYLING RAW RESULT: {result[:200]}...")
|
|
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
|
return default_styles
|
|
|
|
# Convert colors to appropriate format
|
|
styles = self._convert_colors_format(styles)
|
|
|
|
return styles
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
|
return default_styles
|
|
|
|
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Convert colors to appropriate format based on renderer type.
|
|
Override this method in subclasses for format-specific color handling.
|
|
"""
|
|
return styles
|
|
|
|
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
|
|
"""
|
|
Create a standardized AI style template for any format.
|
|
|
|
Args:
|
|
format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
|
|
user_prompt: User's original prompt
|
|
style_schema: Format-specific style schema
|
|
|
|
Returns:
|
|
Formatted prompt string
|
|
"""
|
|
schema_json = json.dumps(style_schema, indent=4)
|
|
|
|
return f"""Return this exact JSON structure with your styling customizations:
|
|
|
|
{schema_json}
|
|
|
|
NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE.""" |