258 lines
9.7 KiB
Python
258 lines
9.7 KiB
Python
"""
|
|
CSV renderer for report generation.
|
|
"""
|
|
|
|
from .rendererBaseTemplate import BaseRenderer
|
|
from typing import Dict, Any, Tuple, List
|
|
|
|
class RendererCsv(BaseRenderer):
|
|
"""Renders content to CSV format with format-specific extraction."""
|
|
|
|
@classmethod
|
|
def get_supported_formats(cls) -> List[str]:
|
|
"""Return supported CSV formats."""
|
|
return ['csv']
|
|
|
|
@classmethod
|
|
def get_format_aliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['spreadsheet', 'table']
|
|
|
|
@classmethod
|
|
def get_priority(cls) -> int:
|
|
"""Return priority for CSV renderer."""
|
|
return 70
|
|
|
|
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
|
"""Render extracted JSON content to CSV format."""
|
|
try:
|
|
# Generate CSV directly from JSON (no styling needed for CSV)
|
|
csv_content = await self._generate_csv_from_json(extracted_content, title)
|
|
|
|
return csv_content, "text/csv"
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering CSV: {str(e)}")
|
|
# Return minimal CSV fallback
|
|
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
|
|
|
async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
|
"""Generate CSV content from structured JSON document."""
|
|
try:
|
|
# Validate JSON structure
|
|
if not isinstance(json_content, dict):
|
|
raise ValueError("JSON content must be a dictionary")
|
|
|
|
if "sections" not in json_content:
|
|
raise ValueError("JSON content must contain 'sections' field")
|
|
|
|
# Use title from JSON metadata if available, otherwise use provided title
|
|
document_title = json_content.get("metadata", {}).get("title", title)
|
|
|
|
# Generate CSV content
|
|
csv_rows = []
|
|
|
|
# Add title row
|
|
if document_title:
|
|
csv_rows.append([document_title])
|
|
csv_rows.append([]) # Empty row
|
|
|
|
# Process each section in order
|
|
sections = json_content.get("sections", [])
|
|
for section in sections:
|
|
section_csv = self._render_json_section_to_csv(section)
|
|
if section_csv:
|
|
csv_rows.extend(section_csv)
|
|
csv_rows.append([]) # Empty row between sections
|
|
|
|
# Convert to CSV string
|
|
csv_content = self._convert_rows_to_csv(csv_rows)
|
|
|
|
return csv_content
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating CSV from JSON: {str(e)}")
|
|
raise Exception(f"CSV generation failed: {str(e)}")
|
|
|
|
def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
|
|
"""Render a single JSON section to CSV rows."""
|
|
try:
|
|
section_type = section.get("content_type", "paragraph")
|
|
elements = section.get("elements", [])
|
|
|
|
csv_rows = []
|
|
|
|
# Add section title if available
|
|
section_title = section.get("title")
|
|
if section_title:
|
|
csv_rows.append([f"# {section_title}"])
|
|
|
|
# Process each element in the section
|
|
for element in elements:
|
|
if section_type == "table":
|
|
csv_rows.extend(self._render_json_table_to_csv(element))
|
|
elif section_type == "list":
|
|
csv_rows.extend(self._render_json_list_to_csv(element))
|
|
elif section_type == "heading":
|
|
csv_rows.extend(self._render_json_heading_to_csv(element))
|
|
elif section_type == "paragraph":
|
|
csv_rows.extend(self._render_json_paragraph_to_csv(element))
|
|
elif section_type == "code":
|
|
csv_rows.extend(self._render_json_code_to_csv(element))
|
|
else:
|
|
# Fallback to paragraph for unknown types
|
|
csv_rows.extend(self._render_json_paragraph_to_csv(element))
|
|
|
|
return csv_rows
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
|
return [["[Error rendering section]"]]
|
|
|
|
def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
|
|
"""Render a JSON table to CSV rows."""
|
|
try:
|
|
headers = table_data.get("headers", [])
|
|
rows = table_data.get("rows", [])
|
|
|
|
csv_rows = []
|
|
|
|
if headers:
|
|
csv_rows.append(headers)
|
|
|
|
if rows:
|
|
csv_rows.extend(rows)
|
|
|
|
return csv_rows
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
|
return [["[Error rendering table]"]]
|
|
|
|
def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
|
|
"""Render a JSON list to CSV rows."""
|
|
try:
|
|
items = list_data.get("items", [])
|
|
csv_rows = []
|
|
|
|
for item in items:
|
|
if isinstance(item, dict):
|
|
text = item.get("text", "")
|
|
subitems = item.get("subitems", [])
|
|
csv_rows.append([text])
|
|
|
|
# Add subitems as indented rows
|
|
for subitem in subitems:
|
|
if isinstance(subitem, dict):
|
|
csv_rows.append([f" - {subitem.get('text', '')}"])
|
|
else:
|
|
csv_rows.append([f" - {subitem}"])
|
|
else:
|
|
csv_rows.append([str(item)])
|
|
|
|
return csv_rows
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering list: {str(e)}")
|
|
return [["[Error rendering list]"]]
|
|
|
|
def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
|
|
"""Render a JSON heading to CSV rows."""
|
|
try:
|
|
text = heading_data.get("text", "")
|
|
level = heading_data.get("level", 1)
|
|
|
|
if text:
|
|
# Use # symbols for heading levels
|
|
heading_text = f"{'#' * level} {text}"
|
|
return [[heading_text]]
|
|
|
|
return []
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
|
return [["[Error rendering heading]"]]
|
|
|
|
def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
|
|
"""Render a JSON paragraph to CSV rows."""
|
|
try:
|
|
text = paragraph_data.get("text", "")
|
|
|
|
if text:
|
|
# Split long paragraphs into multiple rows if needed
|
|
if len(text) > 100:
|
|
words = text.split()
|
|
rows = []
|
|
current_row = []
|
|
current_length = 0
|
|
|
|
for word in words:
|
|
if current_length + len(word) > 100 and current_row:
|
|
rows.append([" ".join(current_row)])
|
|
current_row = [word]
|
|
current_length = len(word)
|
|
else:
|
|
current_row.append(word)
|
|
current_length += len(word) + 1
|
|
|
|
if current_row:
|
|
rows.append([" ".join(current_row)])
|
|
|
|
return rows
|
|
else:
|
|
return [[text]]
|
|
|
|
return []
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
|
return [["[Error rendering paragraph]"]]
|
|
|
|
def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
|
|
"""Render a JSON code block to CSV rows."""
|
|
try:
|
|
code = code_data.get("code", "")
|
|
language = code_data.get("language", "")
|
|
|
|
csv_rows = []
|
|
|
|
if language:
|
|
csv_rows.append([f"Code ({language}):"])
|
|
|
|
if code:
|
|
# Split code into lines
|
|
code_lines = code.split('\n')
|
|
for line in code_lines:
|
|
csv_rows.append([f" {line}"])
|
|
|
|
return csv_rows
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
|
return [["[Error rendering code block]"]]
|
|
|
|
def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
|
|
"""Convert rows to CSV string."""
|
|
import csv
|
|
import io
|
|
|
|
output = io.StringIO()
|
|
writer = csv.writer(output)
|
|
|
|
for row in rows:
|
|
if row: # Only write non-empty rows
|
|
writer.writerow(row)
|
|
|
|
return output.getvalue()
|
|
|
|
def _clean_csv_content(self, content: str, title: str) -> str:
|
|
"""Clean and validate CSV content from AI."""
|
|
content = content.strip()
|
|
|
|
# Remove markdown code blocks if present
|
|
if content.startswith("```") and content.endswith("```"):
|
|
lines = content.split('\n')
|
|
if len(lines) > 2:
|
|
content = '\n'.join(lines[1:-1]).strip()
|
|
|
|
return content
|