all renderers active and using json objects
This commit is contained in:
parent
a26553c34c
commit
99215e27fe
25 changed files with 2006 additions and 1416 deletions
|
|
@ -746,8 +746,13 @@ Return only the JSON structure with actual content from the image. Do not includ
|
|||
# Process any document container as text content
|
||||
request_options = options if options is not None else AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
print(f"🔍 Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
|
||||
print(f"🔍 EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
|
||||
logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
|
||||
|
||||
# Log extraction prompt and context
|
||||
print(f"🔍 EXTRACTION PROMPT: {prompt}")
|
||||
print(f"🔍 EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters")
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context=part.data,
|
||||
|
|
@ -756,6 +761,23 @@ Return only the JSON structure with actual content from the image. Do not includ
|
|||
response = await self.aiObjects.call(request)
|
||||
ai_result = response.content
|
||||
|
||||
# Log extraction response
|
||||
print(f"🔍 EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters")
|
||||
|
||||
# Save full extraction prompt and response to debug file
|
||||
try:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
debug_root = "./test-chat/ai"
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
|
||||
f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
|
||||
f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If generating JSON, validate the response
|
||||
if generate_json:
|
||||
try:
|
||||
|
|
@ -798,8 +820,13 @@ Return only the JSON structure with actual content from the image. Do not includ
|
|||
request_options = options if options is not None else AiCallOptions()
|
||||
# FIXED: Set operation type to general for text processing
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
print(f"🔍 Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
|
||||
print(f"🔍 EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
|
||||
logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
|
||||
|
||||
# Log extraction prompt and context
|
||||
print(f"🔍 EXTRACTION PROMPT: {prompt}")
|
||||
print(f"🔍 EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters")
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context=part.data,
|
||||
|
|
@ -808,6 +835,23 @@ Return only the JSON structure with actual content from the image. Do not includ
|
|||
response = await self.aiObjects.call(request)
|
||||
ai_result = response.content
|
||||
|
||||
# Log extraction response
|
||||
print(f"🔍 EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters")
|
||||
|
||||
# Save full extraction prompt and response to debug file
|
||||
try:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
debug_root = "./test-chat/ai"
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
|
||||
f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
|
||||
f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If generating JSON, validate the response
|
||||
if generate_json:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -318,18 +318,17 @@ class GenerationService:
|
|||
if "sections" not in extractedContent:
|
||||
raise ValueError("extractedContent must contain 'sections' field")
|
||||
|
||||
# DEBUG: dump renderer input to diagnose JSON structure TODO REMOVE
|
||||
# DEBUG: Log renderer input metadata only (no verbose JSON) TODO REMOVE
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
debug_root = "./test-chat/ai"
|
||||
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
|
||||
os.makedirs(debug_dir, exist_ok=True)
|
||||
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
|
||||
with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as f:
|
||||
json.dump(extractedContent, f, indent=2, ensure_ascii=False)
|
||||
f.write(f"content_size: {len(str(extractedContent))} characters\n")
|
||||
f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
"""
|
||||
Base renderer class for all format renderers.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseRenderer(ABC):
|
||||
"""Base class for all format renderers."""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""
|
||||
Return list of supported format names for this renderer.
|
||||
Override this method in subclasses to specify supported formats.
|
||||
"""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""
|
||||
Return list of format aliases for this renderer.
|
||||
Override this method in subclasses to specify format aliases.
|
||||
"""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""
|
||||
Return priority for this renderer (higher number = higher priority).
|
||||
Used when multiple renderers support the same format.
|
||||
"""
|
||||
return 0
|
||||
|
||||
@abstractmethod
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""
|
||||
Get the format-specific extraction prompt for AI content extraction.
|
||||
|
||||
Args:
|
||||
user_prompt: User's original prompt for report generation
|
||||
title: Report title
|
||||
|
||||
Returns:
|
||||
str: Format-specific prompt for AI extraction
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Render extracted content to the target format.
|
||||
|
||||
Args:
|
||||
extracted_content: Raw content extracted by AI using format-specific prompt
|
||||
title: Report title
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _extract_sections(self, report_data: Dict[str, Any]) -> list:
|
||||
"""Extract sections from report data."""
|
||||
return report_data.get('sections', [])
|
||||
|
||||
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract metadata from report data."""
|
||||
return report_data.get('metadata', {})
|
||||
|
||||
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
||||
"""Get title from report data or use fallback."""
|
||||
return report_data.get('title', fallback_title)
|
||||
|
||||
def _format_timestamp(self, timestamp: str = None) -> str:
|
||||
"""Format timestamp for display."""
|
||||
if timestamp:
|
||||
return timestamp
|
||||
from datetime import datetime, UTC
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
"""
|
||||
HTML renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class HtmlRenderer(BaseRenderer):
|
||||
"""Renders content to HTML format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported HTML formats."""
|
||||
return ['html', 'htm']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['web', 'webpage']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for HTML renderer."""
|
||||
return 100
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only HTML-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"HTML FORMAT GUIDELINES:\n"
|
||||
"- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
|
||||
"- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
|
||||
"- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
|
||||
"- Provide professional CSS in a <style> block; responsive, clean typography.\n"
|
||||
"- Use h1/h2/h3 for headings; tables and lists for structure.\n"
|
||||
"OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to HTML format."""
|
||||
try:
|
||||
# The extracted content should already be HTML from the AI
|
||||
# Just clean it up and ensure it's valid
|
||||
html_content = self._clean_html_content(extracted_content, title)
|
||||
|
||||
return html_content, "text/html"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||
# Return minimal HTML fallback
|
||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||
|
||||
def _clean_html_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate HTML content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Ensure it starts with DOCTYPE
|
||||
if not content.startswith('<!DOCTYPE'):
|
||||
if content.startswith('<html'):
|
||||
content = '<!DOCTYPE html>\n' + content
|
||||
else:
|
||||
content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
|
||||
|
||||
return content
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
"""
|
||||
JSON renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import json
|
||||
|
||||
class JsonRenderer(BaseRenderer):
|
||||
"""Renders content to JSON format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported JSON formats."""
|
||||
return ['json']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['data']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for JSON renderer."""
|
||||
return 80
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only JSON-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"JSON FORMAT GUIDELINES:\n"
|
||||
"- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
|
||||
"- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
|
||||
"- Prefer arrays/objects that map cleanly to the extracted facts.\n"
|
||||
"- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
|
||||
"OUTPUT: Return ONLY valid, parseable JSON."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to JSON format."""
|
||||
try:
|
||||
# The extracted content should already be JSON from the AI
|
||||
# Just validate and format it
|
||||
json_content = self._clean_json_content(extracted_content, title)
|
||||
|
||||
return json_content, "application/json"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||
# Return minimal JSON fallback
|
||||
fallback_data = {
|
||||
"title": title,
|
||||
"sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
|
||||
"metadata": {"error": str(e)}
|
||||
}
|
||||
return json.dumps(fallback_data, indent=2), "application/json"
|
||||
|
||||
def _clean_json_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate JSON content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Validate JSON
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
# Re-format with proper indentation
|
||||
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
||||
except json.JSONDecodeError:
|
||||
# If not valid JSON, return as-is
|
||||
return content
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
"""
|
||||
Markdown renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class MarkdownRenderer(BaseRenderer):
|
||||
"""Renders content to Markdown format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported Markdown formats."""
|
||||
return ['md', 'markdown']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['mdown', 'mkd']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for markdown renderer."""
|
||||
return 95
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only Markdown-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"MARKDOWN FORMAT GUIDELINES:\n"
|
||||
"- Use proper Markdown syntax only (no HTML wrappers).\n"
|
||||
"- # for main title, ## for sections, ### for subsections.\n"
|
||||
"- Tables with | separators and a header row.\n"
|
||||
"- Bullet lists with - or *.\n"
|
||||
"- Emphasis with **bold** and *italic*.\n"
|
||||
"- Code blocks with ```language.\n"
|
||||
"- Horizontal rules (---) to separate major sections when helpful.\n"
|
||||
"- Include links [text](url) and images  when referenced by sources.\n"
|
||||
"OUTPUT: Return ONLY raw Markdown content without code fences."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to Markdown format."""
|
||||
try:
|
||||
# The extracted content should already be Markdown from the AI
|
||||
# Just clean it up
|
||||
markdown_content = self._clean_markdown_content(extracted_content, title)
|
||||
|
||||
return markdown_content, "text/markdown"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||
# Return minimal markdown fallback
|
||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||
|
||||
def _clean_markdown_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate Markdown content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
return content
|
||||
|
|
@ -1,225 +0,0 @@
|
|||
"""
|
||||
PDF renderer for report generation using reportlab.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
from datetime import datetime, UTC
|
||||
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter, A4
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
||||
REPORTLAB_AVAILABLE = True
|
||||
except ImportError:
|
||||
REPORTLAB_AVAILABLE = False
|
||||
|
||||
class PdfRenderer(BaseRenderer):
|
||||
"""Renders content to PDF format using reportlab."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported PDF formats."""
|
||||
return ['pdf']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['document', 'print']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for PDF renderer."""
|
||||
return 120
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only PDF-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"PDF FORMAT GUIDELINES:\n"
|
||||
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
|
||||
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
|
||||
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
|
||||
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to PDF format."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .html_renderer import HtmlRenderer
|
||||
html_renderer = HtmlRenderer()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||
return html_content, "text/html"
|
||||
|
||||
# Generate PDF using reportlab
|
||||
pdf_content = self._generate_pdf(extracted_content, title)
|
||||
|
||||
return pdf_content, "application/pdf"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||||
# Return minimal fallback
|
||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||
|
||||
def _generate_pdf(self, content: str, title: str) -> str:
|
||||
"""Generate PDF content using reportlab."""
|
||||
try:
|
||||
# Create a buffer to hold the PDF
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Create PDF document
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
rightMargin=72,
|
||||
leftMargin=72,
|
||||
topMargin=72,
|
||||
bottomMargin=18
|
||||
)
|
||||
|
||||
# Get styles
|
||||
styles = getSampleStyleSheet()
|
||||
|
||||
# Create custom styles
|
||||
title_style = ParagraphStyle(
|
||||
'CustomTitle',
|
||||
parent=styles['Heading1'],
|
||||
fontSize=24,
|
||||
spaceAfter=30,
|
||||
alignment=TA_CENTER,
|
||||
textColor=colors.darkblue
|
||||
)
|
||||
|
||||
heading_style = ParagraphStyle(
|
||||
'CustomHeading',
|
||||
parent=styles['Heading2'],
|
||||
fontSize=16,
|
||||
spaceAfter=12,
|
||||
spaceBefore=12,
|
||||
textColor=colors.darkblue
|
||||
)
|
||||
|
||||
# Build PDF content
|
||||
story = []
|
||||
|
||||
# Title page
|
||||
story.append(Paragraph(title, title_style))
|
||||
story.append(Spacer(1, 20))
|
||||
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
|
||||
story.append(PageBreak())
|
||||
|
||||
# Process content
|
||||
lines = content.split('\n')
|
||||
current_section = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for headings
|
||||
if line.startswith('# '):
|
||||
# H1 heading
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
current_section = []
|
||||
story.append(Paragraph(line[2:], title_style))
|
||||
story.append(Spacer(1, 12))
|
||||
elif line.startswith('## '):
|
||||
# H2 heading
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
current_section = []
|
||||
story.append(Paragraph(line[3:], heading_style))
|
||||
story.append(Spacer(1, 8))
|
||||
elif line.startswith('### '):
|
||||
# H3 heading
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
current_section = []
|
||||
story.append(Paragraph(line[4:], styles['Heading3']))
|
||||
story.append(Spacer(1, 6))
|
||||
else:
|
||||
current_section.append(line)
|
||||
|
||||
# Process remaining content
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
|
||||
# Build PDF
|
||||
doc.build(story)
|
||||
|
||||
# Get PDF content as base64
|
||||
buffer.seek(0)
|
||||
pdf_bytes = buffer.getvalue()
|
||||
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||||
|
||||
return pdf_base64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating PDF: {str(e)}")
|
||||
raise
|
||||
|
||||
def _process_section(self, lines: list, styles) -> list:
|
||||
"""Process a section of content into PDF elements."""
|
||||
elements = []
|
||||
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
# Check for tables (lines with |)
|
||||
if '|' in line and not line.startswith('|'):
|
||||
# This might be part of a table, process as table
|
||||
table_data = self._extract_table_data(lines)
|
||||
if table_data:
|
||||
table = Table(table_data)
|
||||
table.setStyle(TableStyle([
|
||||
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), 14),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||
]))
|
||||
elements.append(table)
|
||||
elements.append(Spacer(1, 12))
|
||||
return elements
|
||||
|
||||
# Check for lists
|
||||
if line.startswith('- ') or line.startswith('* '):
|
||||
# This is a list item
|
||||
elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
|
||||
else:
|
||||
# Regular paragraph
|
||||
elements.append(Paragraph(line, styles['Normal']))
|
||||
|
||||
elements.append(Spacer(1, 6))
|
||||
return elements
|
||||
|
||||
def _extract_table_data(self, lines: list) -> list:
|
||||
"""Extract table data from lines."""
|
||||
table_data = []
|
||||
in_table = False
|
||||
|
||||
for line in lines:
|
||||
if '|' in line:
|
||||
if not in_table:
|
||||
in_table = True
|
||||
# Split by | and clean up
|
||||
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||
if cells:
|
||||
table_data.append(cells)
|
||||
elif in_table and not line.strip():
|
||||
# Empty line, might be end of table
|
||||
break
|
||||
|
||||
return table_data if len(table_data) > 1 else []
|
||||
|
|
@ -6,7 +6,7 @@ import logging
|
|||
import importlib
|
||||
import pkgutil
|
||||
from typing import Dict, Type, List, Optional
|
||||
from .base_renderer import BaseRenderer
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ class RendererRegistry:
|
|||
|
||||
# Scan all Python files in the renderers directory
|
||||
for file_path in renderers_dir.glob("*.py"):
|
||||
if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
|
||||
if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
|
||||
continue
|
||||
|
||||
# Extract module name from filename
|
||||
|
|
|
|||
|
|
@ -0,0 +1,285 @@
|
|||
"""
|
||||
Base renderer class for all format renderers.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import logging
|
||||
import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseRenderer(ABC):
|
||||
"""Base class for all format renderers."""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""
|
||||
Return list of supported format names for this renderer.
|
||||
Override this method in subclasses to specify supported formats.
|
||||
"""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""
|
||||
Return list of format aliases for this renderer.
|
||||
Override this method in subclasses to specify format aliases.
|
||||
"""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""
|
||||
Return priority for this renderer (higher number = higher priority).
|
||||
Used when multiple renderers support the same format.
|
||||
"""
|
||||
return 0
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""
|
||||
Render extracted JSON content to the target format.
|
||||
|
||||
Args:
|
||||
extracted_content: Structured JSON content with sections and metadata
|
||||
title: Report title
|
||||
user_prompt: Original user prompt for context
|
||||
ai_service: AI service instance for additional processing
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract sections from report data."""
|
||||
return report_data.get('sections', [])
|
||||
|
||||
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract metadata from report data."""
|
||||
return report_data.get('metadata', {})
|
||||
|
||||
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
||||
"""Get title from report data or use fallback."""
|
||||
metadata = report_data.get('metadata', {})
|
||||
return metadata.get('title', fallback_title)
|
||||
|
||||
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
|
||||
"""Validate that JSON content has the expected structure."""
|
||||
if not isinstance(json_content, dict):
|
||||
return False
|
||||
|
||||
if "sections" not in json_content:
|
||||
return False
|
||||
|
||||
sections = json_content.get("sections", [])
|
||||
if not isinstance(sections, list):
|
||||
return False
|
||||
|
||||
# Validate each section has type and data
|
||||
for section in sections:
|
||||
if not isinstance(section, dict):
|
||||
return False
|
||||
if "type" not in section or "data" not in section:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _get_section_type(self, section: Dict[str, Any]) -> str:
|
||||
"""Get the type of a section."""
|
||||
return section.get("type", "paragraph")
|
||||
|
||||
def _get_section_data(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get the data of a section."""
|
||||
return section.get("data", {})
|
||||
|
||||
def _get_section_id(self, section: Dict[str, Any]) -> str:
|
||||
"""Get the ID of a section (if available)."""
|
||||
return section.get("id", "unknown")
|
||||
|
||||
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
||||
"""Extract table headers and rows from section data."""
|
||||
headers = section_data.get("headers", [])
|
||||
rows = section_data.get("rows", [])
|
||||
return headers, rows
|
||||
|
||||
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
|
||||
"""Extract bullet list items from section data."""
|
||||
items = section_data.get("items", [])
|
||||
result = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
result.append(item)
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
result.append(item["text"])
|
||||
return result
|
||||
|
||||
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
|
||||
"""Extract heading level and text from section data."""
|
||||
level = section_data.get("level", 1)
|
||||
text = section_data.get("text", "")
|
||||
return level, text
|
||||
|
||||
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
|
||||
"""Extract paragraph text from section data."""
|
||||
return section_data.get("text", "")
|
||||
|
||||
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Extract code and language from section data."""
|
||||
code = section_data.get("code", "")
|
||||
language = section_data.get("language", "")
|
||||
return code, language
|
||||
|
||||
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Extract base64 data and alt text from section data."""
|
||||
base64_data = section_data.get("base64Data", "")
|
||||
alt_text = section_data.get("altText", "Image")
|
||||
return base64_data, alt_text
|
||||
|
||||
def _get_supported_section_types(self) -> List[str]:
|
||||
"""Return list of supported section types."""
|
||||
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
|
||||
|
||||
def _is_valid_section_type(self, section_type: str) -> bool:
|
||||
"""Check if a section type is valid."""
|
||||
return section_type in self._get_supported_section_types()
|
||||
|
||||
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process a section and return structured data based on its type."""
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
|
||||
if section_type == "table":
|
||||
headers, rows = self._extract_table_data(section_data)
|
||||
return {"type": "table", "headers": headers, "rows": rows}
|
||||
elif section_type == "bullet_list":
|
||||
items = self._extract_bullet_list_items(section_data)
|
||||
return {"type": "bullet_list", "items": items}
|
||||
elif section_type == "heading":
|
||||
level, text = self._extract_heading_data(section_data)
|
||||
return {"type": "heading", "level": level, "text": text}
|
||||
elif section_type == "paragraph":
|
||||
text = self._extract_paragraph_text(section_data)
|
||||
return {"type": "paragraph", "text": text}
|
||||
elif section_type == "code_block":
|
||||
code, language = self._extract_code_block_data(section_data)
|
||||
return {"type": "code_block", "code": code, "language": language}
|
||||
elif section_type == "image":
|
||||
base64_data, alt_text = self._extract_image_data(section_data)
|
||||
return {"type": "image", "base64Data": base64_data, "altText": alt_text}
|
||||
else:
|
||||
# Fallback to paragraph
|
||||
text = self._extract_paragraph_text(section_data)
|
||||
return {"type": "paragraph", "text": text}
|
||||
|
||||
def _format_timestamp(self, timestamp: str = None) -> str:
|
||||
"""Format timestamp for display."""
|
||||
if timestamp:
|
||||
return timestamp
|
||||
from datetime import datetime, UTC
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
# ===== GENERIC AI STYLING HELPERS =====
|
||||
|
||||
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generic AI styling method that can be used by all renderers.
|
||||
|
||||
Args:
|
||||
ai_service: AI service instance
|
||||
style_template: Format-specific style template
|
||||
default_styles: Default styles to fall back to
|
||||
|
||||
Returns:
|
||||
Dict with styling definitions
|
||||
"""
|
||||
if not ai_service:
|
||||
return default_styles
|
||||
|
||||
try:
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
# Debug output
|
||||
print(f"🔍 AI STYLING RESPONSE TYPE: {type(response)}")
|
||||
print(f"🔍 AI STYLING RESPONSE LENGTH: {len(response.content) if response and hasattr(response, 'content') and response.content else 0}")
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip() if response and response.content else ""
|
||||
|
||||
# Check if result is empty
|
||||
if not result:
|
||||
self.logger.warning("AI styling returned empty response, using defaults")
|
||||
return default_styles
|
||||
|
||||
# Extract JSON from markdown if present
|
||||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||
if json_match:
|
||||
result = json_match.group(1).strip()
|
||||
print(f"🔍 EXTRACTED JSON FROM MARKDOWN: {result[:100]}...")
|
||||
elif result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
print(f"🔍 CLEANED JSON FROM MARKDOWN: {result[:100]}...")
|
||||
elif result.startswith('```'):
|
||||
result = re.sub(r'^```\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
print(f"🔍 CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...")
|
||||
|
||||
# Try to parse JSON
|
||||
try:
|
||||
styles = json.loads(result)
|
||||
print(f"🔍 AI STYLING PARSED KEYS: {list(styles.keys()) if isinstance(styles, dict) else 'Not a dict'}")
|
||||
except json.JSONDecodeError as json_error:
|
||||
print(f"🔍 AI STYLING JSON ERROR: {json_error}")
|
||||
print(f"🔍 AI STYLING RAW RESULT: {result[:200]}...")
|
||||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
||||
return default_styles
|
||||
|
||||
# Convert colors to appropriate format
|
||||
styles = self._convert_colors_format(styles)
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return default_styles
|
||||
|
||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert colors to appropriate format based on renderer type.
|
||||
Override this method in subclasses for format-specific color handling.
|
||||
"""
|
||||
return styles
|
||||
|
||||
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Create a standardized AI style template for any format.
|
||||
|
||||
Args:
|
||||
format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
|
||||
user_prompt: User's original prompt
|
||||
style_schema: Format-specific style schema
|
||||
|
||||
Returns:
|
||||
Formatted prompt string
|
||||
"""
|
||||
schema_json = json.dumps(style_schema, indent=4)
|
||||
|
||||
return f"""Return this exact JSON structure with your styling customizations:
|
||||
|
||||
{schema_json}
|
||||
|
||||
NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE."""
|
||||
|
|
@ -2,12 +2,12 @@
|
|||
CSV renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import csv
|
||||
import io
|
||||
|
||||
class CsvRenderer(BaseRenderer):
|
||||
class RendererCsv(BaseRenderer):
|
||||
"""Renders content to CSV format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
|
|
@ -25,20 +25,6 @@ class CsvRenderer(BaseRenderer):
|
|||
"""Return priority for CSV renderer."""
|
||||
return 70
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only CSV-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"CSV FORMAT GUIDELINES:\n"
|
||||
"- Extract structured data from source documents into JSON format\n"
|
||||
"- Focus on tabular data, lists, and structured information\n"
|
||||
"- For tables: Extract headers and rows as separate arrays\n"
|
||||
"- For lists: Extract items with optional sub-items\n"
|
||||
"- Structure content into sections with clear content types\n"
|
||||
"- Use proper JSON structure with metadata, sections, and elements\n"
|
||||
"- Ensure data is clean and ready for CSV conversion\n"
|
||||
"OUTPUT: Return structured JSON that can be converted to CSV format."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to CSV format."""
|
||||
try:
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
DOCX renderer for report generation using python-docx.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
|
|
@ -22,7 +22,7 @@ try:
|
|||
except ImportError:
|
||||
DOCX_AVAILABLE = False
|
||||
|
||||
class DocxRenderer(BaseRenderer):
|
||||
class RendererDocx(BaseRenderer):
|
||||
"""Renders content to DOCX format using python-docx."""
|
||||
|
||||
@classmethod
|
||||
|
|
@ -40,30 +40,14 @@ class DocxRenderer(BaseRenderer):
|
|||
"""Return priority for DOCX renderer."""
|
||||
return 115
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only DOCX-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"DOCX FORMAT GUIDELINES:\n"
|
||||
"- Extract the ACTUAL table data, lists, and content from the source documents\n"
|
||||
"- For tables: Extract all rows and columns in pipe-separated format (Column1 | Column2 | Column3)\n"
|
||||
"- For lists: Extract the actual list items, not summaries\n"
|
||||
"- Structure your response with clear headings using numbered format: 1) Heading, 2) Heading, etc.\n"
|
||||
"- Use bullet points (-) for lists and sub-items\n"
|
||||
"- Use **bold** for emphasis on key terms\n"
|
||||
"- Provide clean, structured content that can be directly converted to Word formatting\n"
|
||||
"- Do NOT include debug information, separators (---), metadata, or FILENAME headers\n"
|
||||
"- Start directly with your content - no introductory text or separators\n"
|
||||
"- Extract raw data, not analysis or summaries\n"
|
||||
"OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||
print(f"🔍 DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...")
|
||||
try:
|
||||
if not DOCX_AVAILABLE:
|
||||
# Fallback to HTML if python-docx not available
|
||||
from .html_renderer import HtmlRenderer
|
||||
html_renderer = HtmlRenderer()
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||
return html_content, "text/html"
|
||||
|
||||
|
|
@ -84,7 +68,10 @@ class DocxRenderer(BaseRenderer):
|
|||
doc = Document()
|
||||
|
||||
# Get AI-generated styling definitions
|
||||
print(f"🔍 ABOUT TO CALL AI STYLING: user_prompt={user_prompt[:50] if user_prompt else 'None'}...")
|
||||
self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
|
||||
styles = await self._get_docx_styles(user_prompt, ai_service)
|
||||
print(f"🔍 AI STYLING RESULT: {type(styles)}")
|
||||
|
||||
# Apply basic document setup
|
||||
self._setup_basic_document_styles(doc)
|
||||
|
|
@ -125,61 +112,24 @@ class DocxRenderer(BaseRenderer):
|
|||
raise Exception(f"DOCX generation failed: {str(e)}")
|
||||
|
||||
async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Simple AI call to get DOCX styling definitions."""
|
||||
if not ai_service:
|
||||
return self._get_default_styles()
|
||||
"""Get DOCX styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
|
||||
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
|
||||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
|
||||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
|
||||
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
|
||||
"table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
|
||||
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
|
||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
||||
}
|
||||
|
||||
try:
|
||||
prompt = f"""
|
||||
For this DOCX document request: "{user_prompt}"
|
||||
|
||||
Provide styling definitions for DOCX elements. IMPORTANT: Ensure proper contrast - never use white text on white background or dark text on dark background. Respond with ONLY JSON:
|
||||
|
||||
{{
|
||||
"title": {{"font_size": 24, "color": "#1F4E79", "bold": true, "align": "center"}},
|
||||
"heading1": {{"font_size": 18, "color": "#2F2F2F", "bold": true, "align": "left"}},
|
||||
"heading2": {{"font_size": 14, "color": "#4F4F4F", "bold": true, "align": "left"}},
|
||||
"paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}},
|
||||
"table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}},
|
||||
"table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}},
|
||||
"table_border": {{"style": "horizontal_only", "color": "#000000", "thickness": "thin"}},
|
||||
"bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 20}},
|
||||
"code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}}
|
||||
}}
|
||||
|
||||
CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability.
|
||||
"""
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=prompt, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip()
|
||||
if result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
elif result.startswith('```'):
|
||||
result = re.sub(r'^```\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
|
||||
styles = json.loads(result)
|
||||
|
||||
# Validate and fix contrast issues
|
||||
styles = self._validate_styles_contrast(styles)
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return self._get_default_styles()
|
||||
style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
|
||||
|
||||
# Validate and fix contrast issues
|
||||
return self._validate_styles_contrast(styles)
|
||||
|
||||
def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
|
|
@ -1005,145 +955,4 @@ CRITICAL: Table headers must have dark background with light text, table cells m
|
|||
# Bold text
|
||||
if part:
|
||||
run = para.add_run(part)
|
||||
run.bold = True
|
||||
|
||||
def _add_bullet_point(self, doc, text: str):
|
||||
"""Add a bullet point to the document."""
|
||||
if not text.strip():
|
||||
return
|
||||
|
||||
# Create paragraph with bullet style
|
||||
para = doc.add_paragraph(text, style='List Bullet')
|
||||
|
||||
# Check for Markdown formatting in bullet point
|
||||
if '**' in text or '*' in text:
|
||||
# Clear the paragraph and rebuild with formatting
|
||||
para.clear()
|
||||
self._add_paragraph_to_doc(doc, text)
|
||||
|
||||
def _style_table(self, table):
|
||||
"""Apply styling to the table."""
|
||||
try:
|
||||
# Style header row
|
||||
if len(table.rows) > 0:
|
||||
header_cells = table.rows[0].cells
|
||||
for cell in header_cells:
|
||||
for paragraph in cell.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.bold = True
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not style table: {str(e)}")
|
||||
|
||||
def _format_timestamp(self) -> str:
|
||||
"""Format current timestamp for document generation."""
|
||||
from datetime import datetime, UTC
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
"""Process a table row and add it to the document."""
|
||||
if not line.strip():
|
||||
return
|
||||
|
||||
# Clean the line - remove bullet point markers and bold markers
|
||||
clean_line = line.strip()
|
||||
if clean_line.startswith('•'):
|
||||
clean_line = clean_line[1:] # Remove "•"
|
||||
elif clean_line.startswith('- **'):
|
||||
clean_line = clean_line[4:] # Remove "- **"
|
||||
elif clean_line.startswith('- '):
|
||||
clean_line = clean_line[2:] # Remove "- "
|
||||
elif clean_line.startswith('**'):
|
||||
clean_line = clean_line[2:] # Remove "**"
|
||||
|
||||
# Remove trailing ** if present
|
||||
if clean_line.endswith('**'):
|
||||
clean_line = clean_line[:-2]
|
||||
|
||||
# Split by pipe separator
|
||||
parts = [part.strip() for part in clean_line.split('|')]
|
||||
|
||||
if len(parts) >= 2:
|
||||
# This is a table row - create a table if it doesn't exist
|
||||
if not hasattr(self, '_current_table') or self._current_table is None:
|
||||
# Create new table
|
||||
self._current_table = doc.add_table(rows=1, cols=len(parts))
|
||||
self._current_table.style = 'Table Grid'
|
||||
|
||||
# Check if this looks like a header row (contains common header words)
|
||||
is_header = any(word.lower() in clean_line.lower() for word in ['name', 'quantity', 'part', 'number', 'description', 'tag', 'item', 'status'])
|
||||
|
||||
# Add header row
|
||||
for i, part in enumerate(parts):
|
||||
if i < len(self._current_table.rows[0].cells):
|
||||
cell = self._current_table.rows[0].cells[i]
|
||||
cell.text = part
|
||||
# Make header bold if it looks like a header
|
||||
if is_header:
|
||||
for paragraph in cell.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.bold = True
|
||||
else:
|
||||
# Add data row to existing table
|
||||
row = self._current_table.add_row()
|
||||
for i, part in enumerate(parts):
|
||||
if i < len(row.cells):
|
||||
row.cells[i].text = part
|
||||
else:
|
||||
# Not a table row, treat as regular text
|
||||
doc.add_paragraph(line)
|
||||
|
||||
def _add_bullet_point(self, doc, text: str):
|
||||
"""Add a bullet point to the document."""
|
||||
if not text.strip():
|
||||
return
|
||||
|
||||
# Create paragraph with bullet style
|
||||
para = doc.add_paragraph(text, style='List Bullet')
|
||||
|
||||
# Check for bold text in bullet point
|
||||
if '**' in text:
|
||||
# Clear the paragraph and rebuild with formatting
|
||||
para.clear()
|
||||
parts = text.split('**')
|
||||
for i, part in enumerate(parts):
|
||||
if i % 2 == 0:
|
||||
# Regular text
|
||||
if part:
|
||||
para.add_run(part)
|
||||
else:
|
||||
# Bold text
|
||||
if part:
|
||||
run = para.add_run(part)
|
||||
run.bold = True
|
||||
|
||||
def _process_table_row(self, doc, line: str):
|
||||
"""Process a table row and add it to the document."""
|
||||
if not line.strip():
|
||||
return
|
||||
|
||||
# Split by pipe separator
|
||||
parts = [part.strip() for part in line.split('|')]
|
||||
|
||||
if len(parts) >= 2:
|
||||
# This is a table row - create a table if it doesn't exist
|
||||
if not hasattr(self, '_current_table') or self._current_table is None:
|
||||
# Create new table
|
||||
self._current_table = doc.add_table(rows=1, cols=len(parts))
|
||||
self._current_table.style = 'Table Grid'
|
||||
|
||||
# Add header row
|
||||
for i, part in enumerate(parts):
|
||||
if i < len(self._current_table.rows[0].cells):
|
||||
cell = self._current_table.rows[0].cells[i]
|
||||
cell.text = part
|
||||
# Make header bold
|
||||
for paragraph in cell.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.bold = True
|
||||
else:
|
||||
# Add data row to existing table
|
||||
row = self._current_table.add_row()
|
||||
for i, part in enumerate(parts):
|
||||
if i < len(row.cells):
|
||||
row.cells[i].text = part
|
||||
else:
|
||||
# Not a table row, treat as regular text
|
||||
doc.add_paragraph(line)
|
||||
run.bold = True
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
Excel renderer for report generation using openpyxl.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
|
|
@ -17,7 +17,7 @@ try:
|
|||
except ImportError:
|
||||
OPENPYXL_AVAILABLE = False
|
||||
|
||||
class ExcelRenderer(BaseRenderer):
|
||||
class RendererExcel(BaseRenderer):
|
||||
"""Renders content to Excel format using openpyxl."""
|
||||
|
||||
@classmethod
|
||||
|
|
@ -35,27 +35,13 @@ class ExcelRenderer(BaseRenderer):
|
|||
"""Return priority for Excel renderer."""
|
||||
return 110
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only Excel-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"EXCEL FORMAT GUIDELINES:\n"
|
||||
"- Extract structured data from source documents into JSON format\n"
|
||||
"- Focus on tabular data, lists, and structured information suitable for spreadsheets\n"
|
||||
"- For tables: Extract headers and rows as separate arrays with clear column names\n"
|
||||
"- For lists: Extract items with optional sub-items and metadata\n"
|
||||
"- Structure content into sections with clear content types (table, list, paragraph)\n"
|
||||
"- Use proper JSON structure with metadata, sections, and elements\n"
|
||||
"- Ensure data is clean and ready for Excel conversion with proper formatting\n"
|
||||
"OUTPUT: Return structured JSON that can be converted to Excel format."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||
try:
|
||||
if not OPENPYXL_AVAILABLE:
|
||||
# Fallback to CSV if openpyxl not available
|
||||
from .csv_renderer import CsvRenderer
|
||||
csv_renderer = CsvRenderer()
|
||||
from .rendererCsv import RendererCsv
|
||||
csv_renderer = RendererCsv()
|
||||
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
|
||||
return csv_content, "text/csv"
|
||||
|
||||
|
|
@ -215,6 +201,10 @@ class ExcelRenderer(BaseRenderer):
|
|||
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Debug output
|
||||
print(f"🔍 EXCEL JSON CONTENT TYPE: {type(json_content)}")
|
||||
print(f"🔍 EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}")
|
||||
|
||||
# Get AI-generated styling definitions
|
||||
styles = await self._get_excel_styles(user_prompt, ai_service)
|
||||
|
||||
|
|
@ -231,11 +221,9 @@ class ExcelRenderer(BaseRenderer):
|
|||
# Create workbook
|
||||
wb = Workbook()
|
||||
|
||||
# Remove default sheet
|
||||
wb.remove(wb.active)
|
||||
|
||||
# Create sheets based on content
|
||||
sheets = self._create_excel_sheets(wb, json_content, styles)
|
||||
print(f"🔍 EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}")
|
||||
|
||||
# Populate sheets with content
|
||||
self._populate_excel_sheets(sheets, json_content, styles)
|
||||
|
|
@ -247,7 +235,13 @@ class ExcelRenderer(BaseRenderer):
|
|||
|
||||
# Convert to base64
|
||||
excel_bytes = buffer.getvalue()
|
||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
||||
print(f"🔍 EXCEL BYTES LENGTH: {len(excel_bytes)}")
|
||||
try:
|
||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
||||
print(f"🔍 EXCEL BASE64 LENGTH: {len(excel_base64)}")
|
||||
except Exception as b64_error:
|
||||
print(f"🔍 BASE64 ENCODING ERROR: {b64_error}")
|
||||
raise
|
||||
|
||||
return excel_base64
|
||||
|
||||
|
|
@ -256,59 +250,38 @@ class ExcelRenderer(BaseRenderer):
|
|||
raise Exception(f"Excel generation failed: {str(e)}")
|
||||
|
||||
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Simple AI call to get Excel styling definitions."""
|
||||
if not ai_service:
|
||||
return self._get_default_excel_styles()
|
||||
"""Get Excel styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
||||
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
||||
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
|
||||
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
|
||||
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
|
||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
||||
}
|
||||
|
||||
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_excel_styles())
|
||||
|
||||
# Convert colors to aRGB format and validate
|
||||
styles = self._convert_colors_format(styles)
|
||||
return self._validate_excel_styles_contrast(styles)
|
||||
|
||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert hex colors to aRGB format for Excel compatibility."""
|
||||
try:
|
||||
prompt = f"""
|
||||
For this Excel document request: "{user_prompt}"
|
||||
|
||||
Provide styling definitions for Excel elements. Respond with ONLY JSON:
|
||||
|
||||
{{
|
||||
"title": {{"font_size": 16, "color": "#1F4E79", "bold": true, "align": "center"}},
|
||||
"heading": {{"font_size": 14, "color": "#2F2F2F", "bold": true, "align": "left"}},
|
||||
"table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}},
|
||||
"table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}},
|
||||
"bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 2}},
|
||||
"paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}},
|
||||
"code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}}
|
||||
}}
|
||||
|
||||
CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability.
|
||||
"""
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=prompt, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip()
|
||||
if result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
elif result.startswith('```'):
|
||||
result = re.sub(r'^```\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
|
||||
styles = json.loads(result)
|
||||
|
||||
# Validate and fix contrast issues
|
||||
styles = self._validate_excel_styles_contrast(styles)
|
||||
|
||||
for style_name, style_config in styles.items():
|
||||
if isinstance(style_config, dict):
|
||||
for prop, value in style_config.items():
|
||||
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
||||
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
|
||||
styles[style_name][prop] = f"FF{value[1:]}"
|
||||
print(f"🔍 CONVERTED COLOR: {value} → {styles[style_name][prop]}")
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return self._get_default_excel_styles()
|
||||
print(f"🔍 COLOR CONVERSION ERROR: {e}")
|
||||
return styles
|
||||
|
||||
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
|
|
@ -348,15 +321,15 @@ CRITICAL: Table headers must have dark background with light text, table cells m
|
|||
return self._get_default_excel_styles()
|
||||
|
||||
def _get_default_excel_styles(self) -> Dict[str, Any]:
|
||||
"""Default Excel styles."""
|
||||
"""Default Excel styles with aRGB color format."""
|
||||
return {
|
||||
"title": {"font_size": 16, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||
"heading": {"font_size": 14, "color": "#2F2F2F", "bold": True, "align": "left"},
|
||||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
|
||||
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
|
||||
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 2},
|
||||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
|
||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
||||
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
||||
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
|
||||
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
|
||||
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
|
||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
||||
}
|
||||
|
||||
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
|
@ -365,13 +338,16 @@ CRITICAL: Table headers must have dark background with light text, table cells m
|
|||
|
||||
# Get sheet names from AI styles or generate based on content
|
||||
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
|
||||
print(f"🔍 EXCEL SHEET NAMES: {sheet_names}")
|
||||
|
||||
# Create sheets
|
||||
for i, sheet_name in enumerate(sheet_names):
|
||||
if i == 0:
|
||||
# Use the default sheet for the first sheet
|
||||
sheet = wb.active
|
||||
sheet.title = sheet_name
|
||||
else:
|
||||
# Create additional sheets
|
||||
sheet = wb.create_sheet(sheet_name, i)
|
||||
sheets[sheet_name.lower()] = sheet
|
||||
|
||||
|
|
@ -437,7 +413,9 @@ CRITICAL: Table headers must have dark background with light text, table cells m
|
|||
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
|
||||
sheet['A1'] = document_title
|
||||
|
||||
title_style = styles["title"]
|
||||
# Safety check for title style
|
||||
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
|
||||
print(f"🔍 EXCEL TITLE STYLE: {title_style}")
|
||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"])
|
||||
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
||||
|
||||
|
|
@ -560,6 +538,107 @@ CRITICAL: Table headers must have dark background with light text, table cells m
|
|||
self.logger.warning(f"Could not add section to sheet: {str(e)}")
|
||||
return start_row + 1
|
||||
|
||||
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
"""Add a table element to Excel sheet."""
|
||||
try:
|
||||
table_data = element.get("data", {})
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
|
||||
if not headers and not rows:
|
||||
return start_row
|
||||
|
||||
# Add headers
|
||||
header_style = styles.get("table_header", {})
|
||||
for col, header in enumerate(headers, 1):
|
||||
cell = sheet.cell(row=start_row, column=col, value=header)
|
||||
if header_style.get("bold"):
|
||||
cell.font = Font(bold=True, color=header_style.get("text_color", "#FF000000"))
|
||||
if header_style.get("background"):
|
||||
cell.fill = PatternFill(start_color=header_style["background"], end_color=header_style["background"], fill_type="solid")
|
||||
|
||||
start_row += 1
|
||||
|
||||
# Add rows
|
||||
cell_style = styles.get("table_cell", {})
|
||||
for row_data in rows:
|
||||
for col, cell_value in enumerate(row_data, 1):
|
||||
cell = sheet.cell(row=start_row, column=col, value=cell_value)
|
||||
if cell_style.get("text_color"):
|
||||
cell.font = Font(color=cell_style["text_color"])
|
||||
start_row += 1
|
||||
|
||||
return start_row
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add table to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
|
||||
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
"""Add a list element to Excel sheet."""
|
||||
try:
|
||||
list_items = element.get("items", [])
|
||||
|
||||
list_style = styles.get("bullet_list", {})
|
||||
for item in list_items:
|
||||
sheet.cell(row=start_row, column=1, value=f"• {item}")
|
||||
if list_style.get("color"):
|
||||
sheet.cell(row=start_row, column=1).font = Font(color=list_style["color"])
|
||||
start_row += 1
|
||||
|
||||
return start_row
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add list to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
|
||||
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
"""Add a paragraph element to Excel sheet."""
|
||||
try:
|
||||
text = element.get("text", "")
|
||||
if text:
|
||||
sheet.cell(row=start_row, column=1, value=text)
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style.get("color"):
|
||||
sheet.cell(row=start_row, column=1).font = Font(color=paragraph_style["color"])
|
||||
|
||||
start_row += 1
|
||||
|
||||
return start_row
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
|
||||
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
"""Add a heading element to Excel sheet."""
|
||||
try:
|
||||
text = element.get("text", "")
|
||||
level = element.get("level", 1)
|
||||
|
||||
if text:
|
||||
sheet.cell(row=start_row, column=1, value=text)
|
||||
|
||||
heading_style = styles.get("heading", {})
|
||||
font_size = heading_style.get("font_size", 14)
|
||||
if level > 1:
|
||||
font_size = max(10, font_size - (level - 1) * 2)
|
||||
|
||||
sheet.cell(row=start_row, column=1).font = Font(
|
||||
size=font_size,
|
||||
bold=True,
|
||||
color=heading_style.get("color", "#FF000000")
|
||||
)
|
||||
|
||||
start_row += 1
|
||||
|
||||
return start_row
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
|
||||
def _format_timestamp(self) -> str:
|
||||
"""Format current timestamp for document generation."""
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
463
modules/services/serviceGeneration/renderers/rendererHtml.py
Normal file
463
modules/services/serviceGeneration/renderers/rendererHtml.py
Normal file
|
|
@ -0,0 +1,463 @@
|
|||
"""
|
||||
HTML renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class RendererHtml(BaseRenderer):
|
||||
"""Renders content to HTML format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported HTML formats."""
|
||||
return ['html', 'htm']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['web', 'webpage']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for HTML renderer."""
|
||||
return 100
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
|
||||
try:
|
||||
# Generate HTML using AI-analyzed styling
|
||||
html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
|
||||
|
||||
return html_content, "text/html"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||
# Return minimal HTML fallback
|
||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||
|
||||
async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get AI-generated styling definitions
|
||||
styles = await self._get_html_styles(user_prompt, ai_service)
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
|
||||
# Build HTML document
|
||||
html_parts = []
|
||||
|
||||
# HTML document structure
|
||||
html_parts.append('<!DOCTYPE html>')
|
||||
html_parts.append('<html lang="en">')
|
||||
html_parts.append('<head>')
|
||||
html_parts.append('<meta charset="UTF-8">')
|
||||
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
||||
html_parts.append(f'<title>{document_title}</title>')
|
||||
html_parts.append('<style>')
|
||||
html_parts.append(self._generate_css_styles(styles))
|
||||
html_parts.append('</style>')
|
||||
html_parts.append('</head>')
|
||||
html_parts.append('<body>')
|
||||
|
||||
# Document header
|
||||
html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
|
||||
|
||||
# Main content
|
||||
html_parts.append('<main>')
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
for section in sections:
|
||||
section_html = self._render_json_section(section, styles)
|
||||
if section_html:
|
||||
html_parts.append(section_html)
|
||||
|
||||
html_parts.append('</main>')
|
||||
|
||||
# Footer
|
||||
html_parts.append('<footer>')
|
||||
html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
|
||||
html_parts.append('</footer>')
|
||||
|
||||
html_parts.append('</body>')
|
||||
html_parts.append('</html>')
|
||||
|
||||
return '\n'.join(html_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
|
||||
raise Exception(f"HTML generation failed: {str(e)}")
|
||||
|
||||
async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Simple AI call to get HTML styling definitions."""
|
||||
if not ai_service:
|
||||
return self._get_default_html_styles()
|
||||
|
||||
try:
|
||||
prompt = f"""Return this exact JSON structure with your styling customizations:
|
||||
|
||||
{{
|
||||
"title": {{"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}},
|
||||
"heading1": {{"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}},
|
||||
"heading2": {{"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}},
|
||||
"paragraph": {{"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"}},
|
||||
"table": {{"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"}},
|
||||
"table_header": {{"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"}},
|
||||
"table_cell": {{"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"}},
|
||||
"bullet_list": {{"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"}},
|
||||
"code_block": {{"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"}},
|
||||
"image": {{"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"}},
|
||||
"body": {{"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}}
|
||||
}}
|
||||
|
||||
NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE."""
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=prompt, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip() if response and response.content else ""
|
||||
|
||||
# Check if result is empty
|
||||
if not result:
|
||||
self.logger.warning("AI styling returned empty response, using defaults")
|
||||
return self._get_default_html_styles()
|
||||
|
||||
# Extract JSON from markdown code blocks
|
||||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||
if json_match:
|
||||
result = json_match.group(1).strip()
|
||||
elif result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
elif result.startswith('```'):
|
||||
result = re.sub(r'^```\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
|
||||
# Try to parse JSON
|
||||
try:
|
||||
styles = json.loads(result)
|
||||
except json.JSONDecodeError as json_error:
|
||||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
||||
return self._get_default_html_styles()
|
||||
|
||||
# Validate and fix contrast issues
|
||||
styles = self._validate_html_styles_contrast(styles)
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return self._get_default_html_styles()
|
||||
|
||||
def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
try:
|
||||
# Fix table header contrast
|
||||
if "table_header" in styles:
|
||||
header = styles["table_header"]
|
||||
bg_color = header.get("background", "#FFFFFF")
|
||||
text_color = header.get("color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["color"] = "#FFFFFF"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["color"] = "#FFFFFF"
|
||||
|
||||
# Fix table cell contrast
|
||||
if "table_cell" in styles:
|
||||
cell = styles["table_cell"]
|
||||
bg_color = cell.get("background", "#FFFFFF")
|
||||
text_color = cell.get("color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["color"] = "#2F2F2F"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["color"] = "#2F2F2F"
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_html_styles()
|
||||
|
||||
def _get_default_html_styles(self) -> Dict[str, Any]:
|
||||
"""Default HTML styles."""
|
||||
return {
|
||||
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
||||
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
|
||||
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
|
||||
"paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
|
||||
"table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
|
||||
"table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
|
||||
"table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
|
||||
"bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
|
||||
"code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
|
||||
"image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
|
||||
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
||||
}
|
||||
|
||||
def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
|
||||
"""Generate CSS from style definitions."""
|
||||
css_parts = []
|
||||
|
||||
# Body styles
|
||||
body_style = styles.get("body", {})
|
||||
css_parts.append("body {")
|
||||
for property_name, value in body_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Document title
|
||||
title_style = styles.get("title", {})
|
||||
css_parts.append(".document-title {")
|
||||
for property_name, value in title_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Headings
|
||||
for heading_level in ["heading1", "heading2"]:
|
||||
heading_style = styles.get(heading_level, {})
|
||||
css_class = f"h{heading_level[-1]}"
|
||||
css_parts.append(f"{css_class} {{")
|
||||
for property_name, value in heading_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Paragraphs
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
css_parts.append("p {")
|
||||
for property_name, value in paragraph_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Tables
|
||||
table_style = styles.get("table", {})
|
||||
css_parts.append("table {")
|
||||
for property_name, value in table_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Table headers
|
||||
table_header_style = styles.get("table_header", {})
|
||||
css_parts.append("th {")
|
||||
for property_name, value in table_header_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Table cells
|
||||
table_cell_style = styles.get("table_cell", {})
|
||||
css_parts.append("td {")
|
||||
for property_name, value in table_cell_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Lists
|
||||
bullet_list_style = styles.get("bullet_list", {})
|
||||
css_parts.append("ul {")
|
||||
for property_name, value in bullet_list_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Code blocks
|
||||
code_block_style = styles.get("code_block", {})
|
||||
css_parts.append("pre {")
|
||||
for property_name, value in code_block_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Images
|
||||
image_style = styles.get("image", {})
|
||||
css_parts.append("img {")
|
||||
for property_name, value in image_style.items():
|
||||
css_property = property_name.replace("_", "-")
|
||||
css_parts.append(f" {css_property}: {value};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Generated info
|
||||
css_parts.append(".generated-info {")
|
||||
css_parts.append(" font-size: 0.9em;")
|
||||
css_parts.append(" color: #666;")
|
||||
css_parts.append(" text-align: center;")
|
||||
css_parts.append(" margin-top: 2em;")
|
||||
css_parts.append(" padding-top: 1em;")
|
||||
css_parts.append(" border-top: 1px solid #ddd;")
|
||||
css_parts.append("}")
|
||||
|
||||
return '\n'.join(css_parts)
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to HTML using AI-generated styles."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
|
||||
if section_type == "table":
|
||||
return self._render_json_table(section_data, styles)
|
||||
elif section_type == "bullet_list":
|
||||
return self._render_json_bullet_list(section_data, styles)
|
||||
elif section_type == "heading":
|
||||
return self._render_json_heading(section_data, styles)
|
||||
elif section_type == "paragraph":
|
||||
return self._render_json_paragraph(section_data, styles)
|
||||
elif section_type == "code_block":
|
||||
return self._render_json_code_block(section_data, styles)
|
||||
elif section_type == "image":
|
||||
return self._render_json_image(section_data, styles)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
return self._render_json_paragraph(section_data, styles)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
return f'<div class="error">[Error rendering section: {str(e)}]</div>'
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON table to HTML using AI-generated styles."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return ""
|
||||
|
||||
html_parts = ['<table>']
|
||||
|
||||
# Table header
|
||||
html_parts.append('<thead><tr>')
|
||||
for header in headers:
|
||||
html_parts.append(f'<th>{header}</th>')
|
||||
html_parts.append('</tr></thead>')
|
||||
|
||||
# Table body
|
||||
html_parts.append('<tbody>')
|
||||
for row in rows:
|
||||
html_parts.append('<tr>')
|
||||
for cell_data in row:
|
||||
html_parts.append(f'<td>{cell_data}</td>')
|
||||
html_parts.append('</tr>')
|
||||
html_parts.append('</tbody>')
|
||||
|
||||
html_parts.append('</table>')
|
||||
return '\n'.join(html_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON bullet list to HTML using AI-generated styles."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
html_parts = ['<ul>']
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
html_parts.append(f'<li>{item}</li>')
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
html_parts.append(f'<li>{item["text"]}</li>')
|
||||
html_parts.append('</ul>')
|
||||
|
||||
return '\n'.join(html_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON heading to HTML using AI-generated styles."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
return f'<h{level}>{text}</h{level}>'
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to HTML using AI-generated styles."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
|
||||
if text:
|
||||
return f'<p>{text}</p>'
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON code block to HTML using AI-generated styles."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
|
||||
if code:
|
||||
if language:
|
||||
return f'<pre><code class="language-{language}">{code}</code></pre>'
|
||||
else:
|
||||
return f'<pre><code>{code}</code></pre>'
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to HTML."""
|
||||
try:
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
|
||||
if base64_data:
|
||||
return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'
|
||||
79
modules/services/serviceGeneration/renderers/rendererJson.py
Normal file
79
modules/services/serviceGeneration/renderers/rendererJson.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""
|
||||
JSON renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import json
|
||||
|
||||
class RendererJson(BaseRenderer):
|
||||
"""Renders content to JSON format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported JSON formats."""
|
||||
return ['json']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['data']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for JSON renderer."""
|
||||
return 80
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to JSON format."""
|
||||
try:
|
||||
# The extracted content should already be JSON from the AI
|
||||
# Just validate and format it
|
||||
json_content = self._clean_json_content(extracted_content, title)
|
||||
|
||||
return json_content, "application/json"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||
# Return minimal JSON fallback
|
||||
fallback_data = {
|
||||
"title": title,
|
||||
"sections": [{"type": "paragraph", "data": {"text": f"Error rendering report: {str(e)}"}}],
|
||||
"metadata": {"error": str(e)}
|
||||
}
|
||||
return json.dumps(fallback_data, indent=2), "application/json"
|
||||
|
||||
def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
|
||||
"""Clean and validate JSON content from AI."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not isinstance(content, dict):
|
||||
raise ValueError("Content must be a dictionary")
|
||||
|
||||
# Ensure it has the expected structure
|
||||
if "sections" not in content:
|
||||
# Convert old format to new format
|
||||
content = {
|
||||
"sections": [{"type": "paragraph", "data": {"text": str(content)}}],
|
||||
"metadata": {"title": title}
|
||||
}
|
||||
|
||||
# Ensure metadata exists
|
||||
if "metadata" not in content:
|
||||
content["metadata"] = {}
|
||||
|
||||
# Set title in metadata if not present
|
||||
if "title" not in content["metadata"]:
|
||||
content["metadata"]["title"] = title
|
||||
|
||||
# Re-format with proper indentation
|
||||
return json.dumps(content, indent=2, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
|
||||
# Return minimal valid JSON
|
||||
fallback_data = {
|
||||
"sections": [{"type": "paragraph", "data": {"text": str(content)}}],
|
||||
"metadata": {"title": title, "error": str(e)}
|
||||
}
|
||||
return json.dumps(fallback_data, indent=2, ensure_ascii=False)
|
||||
213
modules/services/serviceGeneration/renderers/rendererMarkdown.py
Normal file
213
modules/services/serviceGeneration/renderers/rendererMarkdown.py
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
"""
|
||||
Markdown renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class RendererMarkdown(BaseRenderer):
|
||||
"""Renders content to Markdown format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported Markdown formats."""
|
||||
return ['md', 'markdown']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['mdown', 'mkd']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for markdown renderer."""
|
||||
return 95
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to Markdown format."""
|
||||
try:
|
||||
# Generate markdown from JSON structure
|
||||
markdown_content = self._generate_markdown_from_json(extracted_content, title)
|
||||
|
||||
return markdown_content, "text/markdown"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||
# Return minimal markdown fallback
|
||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||
|
||||
def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
||||
"""Generate markdown content from structured JSON document."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
|
||||
# Build markdown content
|
||||
markdown_parts = []
|
||||
|
||||
# Document title
|
||||
markdown_parts.append(f"# {document_title}")
|
||||
markdown_parts.append("")
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
for section in sections:
|
||||
section_markdown = self._render_json_section(section)
|
||||
if section_markdown:
|
||||
markdown_parts.append(section_markdown)
|
||||
markdown_parts.append("") # Add spacing between sections
|
||||
|
||||
# Add generation info
|
||||
markdown_parts.append("---")
|
||||
markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
|
||||
|
||||
return '\n'.join(markdown_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
|
||||
raise Exception(f"Markdown generation failed: {str(e)}")
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to markdown."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
|
||||
if section_type == "table":
|
||||
return self._render_json_table(section_data)
|
||||
elif section_type == "bullet_list":
|
||||
return self._render_json_bullet_list(section_data)
|
||||
elif section_type == "heading":
|
||||
return self._render_json_heading(section_data)
|
||||
elif section_type == "paragraph":
|
||||
return self._render_json_paragraph(section_data)
|
||||
elif section_type == "code_block":
|
||||
return self._render_json_code_block(section_data)
|
||||
elif section_type == "image":
|
||||
return self._render_json_image(section_data)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
return self._render_json_paragraph(section_data)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
return f"*[Error rendering section: {str(e)}]*"
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON table to markdown."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return ""
|
||||
|
||||
markdown_parts = []
|
||||
|
||||
# Create table header
|
||||
header_line = " | ".join(str(header) for header in headers)
|
||||
markdown_parts.append(header_line)
|
||||
|
||||
# Add separator line
|
||||
separator_line = " | ".join("---" for _ in headers)
|
||||
markdown_parts.append(separator_line)
|
||||
|
||||
# Add data rows
|
||||
for row in rows:
|
||||
row_line = " | ".join(str(cell_data) for cell_data in row)
|
||||
markdown_parts.append(row_line)
|
||||
|
||||
return '\n'.join(markdown_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON bullet list to markdown."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
markdown_parts = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
markdown_parts.append(f"- {item}")
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
markdown_parts.append(f"- {item['text']}")
|
||||
|
||||
return '\n'.join(markdown_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON heading to markdown."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
return f"{'#' * level} {text}"
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to markdown."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
return text if text else ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON code block to markdown."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
|
||||
if code:
|
||||
if language:
|
||||
return f"```{language}\n{code}\n```"
|
||||
else:
|
||||
return f"```\n{code}\n```"
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to markdown."""
|
||||
try:
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
|
||||
if base64_data:
|
||||
# For base64 images, we can't embed them directly in markdown
|
||||
# So we'll use a placeholder with the alt text
|
||||
return f""
|
||||
else:
|
||||
return f""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return f""
|
||||
416
modules/services/serviceGeneration/renderers/rendererPdf.py
Normal file
416
modules/services/serviceGeneration/renderers/rendererPdf.py
Normal file
|
|
@ -0,0 +1,416 @@
|
|||
"""
|
||||
PDF renderer for report generation using reportlab.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
from datetime import datetime, UTC
|
||||
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter, A4
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
||||
REPORTLAB_AVAILABLE = True
|
||||
except ImportError:
|
||||
REPORTLAB_AVAILABLE = False
|
||||
|
||||
class RendererPdf(BaseRenderer):
|
||||
"""Renders content to PDF format using reportlab."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported PDF formats."""
|
||||
return ['pdf']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['document', 'print']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for PDF renderer."""
|
||||
return 120
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
|
||||
return html_content, "text/html"
|
||||
|
||||
# Generate PDF using AI-analyzed styling
|
||||
pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
|
||||
|
||||
return pdf_content, "application/pdf"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||||
# Return minimal fallback
|
||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||
|
||||
async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get AI-generated styling definitions
|
||||
styles = await self._get_pdf_styles(user_prompt, ai_service)
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
|
||||
# Create a buffer to hold the PDF
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Create PDF document
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
rightMargin=72,
|
||||
leftMargin=72,
|
||||
topMargin=72,
|
||||
bottomMargin=18
|
||||
)
|
||||
|
||||
# Build PDF content
|
||||
story = []
|
||||
|
||||
# Title page
|
||||
title_style = self._create_title_style(styles)
|
||||
story.append(Paragraph(document_title, title_style))
|
||||
story.append(Spacer(1, 20))
|
||||
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
|
||||
story.append(PageBreak())
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
for section in sections:
|
||||
section_elements = self._render_json_section(section, styles)
|
||||
story.extend(section_elements)
|
||||
|
||||
# Build PDF
|
||||
doc.build(story)
|
||||
|
||||
# Get PDF content as base64
|
||||
buffer.seek(0)
|
||||
pdf_bytes = buffer.getvalue()
|
||||
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||||
|
||||
return pdf_base64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
||||
raise Exception(f"PDF generation failed: {str(e)}")
|
||||
|
||||
async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Get PDF styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
|
||||
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
|
||||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
|
||||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
|
||||
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
|
||||
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
|
||||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||
}
|
||||
|
||||
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
|
||||
|
||||
# Validate and fix contrast issues
|
||||
return self._validate_pdf_styles_contrast(styles)
|
||||
|
||||
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
try:
|
||||
# Fix table header contrast
|
||||
if "table_header" in styles:
|
||||
header = styles["table_header"]
|
||||
bg_color = header.get("background", "#FFFFFF")
|
||||
text_color = header.get("text_color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["text_color"] = "#FFFFFF"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["text_color"] = "#FFFFFF"
|
||||
|
||||
# Fix table cell contrast
|
||||
if "table_cell" in styles:
|
||||
cell = styles["table_cell"]
|
||||
bg_color = cell.get("background", "#FFFFFF")
|
||||
text_color = cell.get("text_color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["text_color"] = "#2F2F2F"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["text_color"] = "#2F2F2F"
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_pdf_styles()
|
||||
|
||||
def _get_default_pdf_styles(self) -> Dict[str, Any]:
|
||||
"""Default PDF styles."""
|
||||
return {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
|
||||
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
|
||||
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
|
||||
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
|
||||
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
|
||||
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
|
||||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||
}
|
||||
|
||||
def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
"""Create title style from style definitions."""
|
||||
title_style_def = styles.get("title", {})
|
||||
|
||||
return ParagraphStyle(
|
||||
'CustomTitle',
|
||||
fontSize=title_style_def.get("font_size", 24),
|
||||
spaceAfter=title_style_def.get("space_after", 30),
|
||||
alignment=self._get_alignment(title_style_def.get("align", "center")),
|
||||
textColor=self._hex_to_color(title_style_def.get("color", "#1F4E79"))
|
||||
)
|
||||
|
||||
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
||||
"""Create heading style from style definitions."""
|
||||
heading_key = f"heading{level}"
|
||||
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
|
||||
|
||||
return ParagraphStyle(
|
||||
f'CustomHeading{level}',
|
||||
fontSize=heading_style_def.get("font_size", 18 - level * 2),
|
||||
spaceAfter=heading_style_def.get("space_after", 12),
|
||||
spaceBefore=heading_style_def.get("space_before", 12),
|
||||
alignment=self._get_alignment(heading_style_def.get("align", "left")),
|
||||
textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
|
||||
)
|
||||
|
||||
def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
"""Create normal paragraph style from style definitions."""
|
||||
paragraph_style_def = styles.get("paragraph", {})
|
||||
|
||||
return ParagraphStyle(
|
||||
'CustomNormal',
|
||||
fontSize=paragraph_style_def.get("font_size", 11),
|
||||
spaceAfter=paragraph_style_def.get("space_after", 6),
|
||||
alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
|
||||
textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
|
||||
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
|
||||
)
|
||||
|
||||
def _get_alignment(self, align: str) -> int:
|
||||
"""Convert alignment string to reportlab alignment constant."""
|
||||
align_map = {
|
||||
"center": TA_CENTER,
|
||||
"left": TA_LEFT,
|
||||
"justify": TA_JUSTIFY
|
||||
}
|
||||
return align_map.get(align.lower(), TA_LEFT)
|
||||
|
||||
def _hex_to_color(self, hex_color: str) -> colors.Color:
|
||||
"""Convert hex color to reportlab color."""
|
||||
try:
|
||||
hex_color = hex_color.lstrip('#')
|
||||
r = int(hex_color[0:2], 16) / 255.0
|
||||
g = int(hex_color[2:4], 16) / 255.0
|
||||
b = int(hex_color[4:6], 16) / 255.0
|
||||
return colors.Color(r, g, b)
|
||||
except:
|
||||
return colors.black
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a single JSON section to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
|
||||
if section_type == "table":
|
||||
return self._render_json_table(section_data, styles)
|
||||
elif section_type == "bullet_list":
|
||||
return self._render_json_bullet_list(section_data, styles)
|
||||
elif section_type == "heading":
|
||||
return self._render_json_heading(section_data, styles)
|
||||
elif section_type == "paragraph":
|
||||
return self._render_json_paragraph(section_data, styles)
|
||||
elif section_type == "code_block":
|
||||
return self._render_json_code_block(section_data, styles)
|
||||
elif section_type == "image":
|
||||
return self._render_json_image(section_data, styles)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
return self._render_json_paragraph(section_data, styles)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON table to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return []
|
||||
|
||||
# Prepare table data
|
||||
table_data_list = [headers] + rows
|
||||
|
||||
# Create table
|
||||
table = Table(table_data_list)
|
||||
|
||||
# Apply styling
|
||||
table_header_style = styles.get("table_header", {})
|
||||
table_cell_style = styles.get("table_cell", {})
|
||||
|
||||
table_style = [
|
||||
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
|
||||
('ALIGN', (0, 0), (-1, -1), self._get_alignment(table_cell_style.get("align", "left"))),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))),
|
||||
('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||
]
|
||||
|
||||
table.setStyle(TableStyle(table_style))
|
||||
|
||||
return [table, Spacer(1, 12)]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
bullet_style_def = styles.get("bullet_list", {})
|
||||
|
||||
elements = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
elements.append(Paragraph(f"• {item}", self._create_normal_style(styles)))
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
elements.append(Paragraph(f"• {item['text']}", self._create_normal_style(styles)))
|
||||
|
||||
if elements:
|
||||
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
|
||||
|
||||
return elements
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON heading to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
heading_style = self._create_heading_style(styles, level)
|
||||
return [Paragraph(text, heading_style)]
|
||||
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
|
||||
if text:
|
||||
return [Paragraph(text, self._create_normal_style(styles))]
|
||||
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON code block to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
code_style_def = styles.get("code_block", {})
|
||||
|
||||
if code:
|
||||
elements = []
|
||||
|
||||
if language:
|
||||
lang_style = ParagraphStyle(
|
||||
'CodeLanguage',
|
||||
fontSize=code_style_def.get("font_size", 9),
|
||||
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
|
||||
fontName='Helvetica-Bold'
|
||||
)
|
||||
elements.append(Paragraph(f"Code ({language}):", lang_style))
|
||||
|
||||
code_style = ParagraphStyle(
|
||||
'CodeBlock',
|
||||
fontSize=code_style_def.get("font_size", 9),
|
||||
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
|
||||
fontName=code_style_def.get("font", "Courier"),
|
||||
backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
|
||||
spaceAfter=code_style_def.get("space_after", 6)
|
||||
)
|
||||
elements.append(Paragraph(code, code_style))
|
||||
|
||||
return elements
|
||||
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON image to PDF elements."""
|
||||
try:
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
|
||||
if base64_data:
|
||||
# For now, just add a placeholder since reportlab image handling is complex
|
||||
return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
|
||||
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
import logging
|
||||
import base64
|
||||
import io
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PptxRenderer(BaseRenderer):
|
||||
class RendererPptx(BaseRenderer):
|
||||
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
|
||||
|
||||
def __init__(self):
|
||||
|
|
@ -258,76 +258,25 @@ class PptxRenderer(BaseRenderer):
|
|||
"""Get MIME type for rendered output."""
|
||||
return self.output_mime_type
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only PowerPoint-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"POWERPOINT FORMAT GUIDELINES:\n"
|
||||
"- Extract structured data from source documents into JSON format\n"
|
||||
"- Focus on presentation-ready content with clear sections and visual elements\n"
|
||||
"- For tables: Extract headers and rows as separate arrays suitable for slides\n"
|
||||
"- For lists: Extract items with optional sub-items for bullet points\n"
|
||||
"- Structure content into sections with clear content types (heading, paragraph, table, list)\n"
|
||||
"- Use proper JSON structure with metadata, sections, and elements\n"
|
||||
"- Ensure content is concise and suitable for slide presentation\n"
|
||||
"OUTPUT: Return structured JSON that can be converted to PowerPoint slides."
|
||||
)
|
||||
|
||||
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Simple AI call to get PowerPoint styling definitions."""
|
||||
if not ai_service:
|
||||
return self._get_default_pptx_styles()
|
||||
"""Get PowerPoint styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||
"heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
|
||||
"subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
|
||||
"paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
|
||||
"bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
|
||||
"table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
|
||||
"table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
|
||||
"slide_size": "16:9",
|
||||
"content_per_slide": "concise"
|
||||
}
|
||||
|
||||
try:
|
||||
prompt = f"""
|
||||
For this PowerPoint presentation request: "{user_prompt}"
|
||||
|
||||
Provide styling definitions for PowerPoint elements. Respond with ONLY JSON:
|
||||
|
||||
{{
|
||||
"title": {{"font_size": 44, "color": "#1F4E79", "bold": true, "align": "center"}},
|
||||
"heading": {{"font_size": 32, "color": "#2F2F2F", "bold": true, "align": "left"}},
|
||||
"subheading": {{"font_size": 24, "color": "#4F4F4F", "bold": true, "align": "left"}},
|
||||
"paragraph": {{"font_size": 18, "color": "#2F2F2F", "bold": false, "align": "left"}},
|
||||
"bullet_list": {{"font_size": 18, "color": "#2F2F2F", "indent": 20}},
|
||||
"table_header": {{"font_size": 16, "color": "#FFFFFF", "bold": true, "background": "#4F4F4F"}},
|
||||
"table_cell": {{"font_size": 14, "color": "#2F2F2F", "bold": false, "background": "#FFFFFF"}},
|
||||
"slide_size": "16:9",
|
||||
"content_per_slide": "concise"
|
||||
}}
|
||||
|
||||
CRITICAL: PowerPoint text must be large enough to read from a distance. Minimum font size should be 14pt for body text.
|
||||
"""
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=prompt, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
# Clean and parse JSON
|
||||
result = response.content.strip()
|
||||
if result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
elif result.startswith('```'):
|
||||
result = re.sub(r'^```\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
|
||||
styles = json.loads(result)
|
||||
|
||||
# Validate font sizes for PowerPoint readability
|
||||
styles = self._validate_pptx_styles_readability(styles)
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return self._get_default_pptx_styles()
|
||||
style_template = self._create_ai_style_template("pptx", user_prompt, style_schema)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pptx_styles())
|
||||
|
||||
# Validate PowerPoint-specific requirements
|
||||
return self._validate_pptx_styles_readability(styles)
|
||||
|
||||
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix readability issues in AI-generated styles."""
|
||||
234
modules/services/serviceGeneration/renderers/rendererText.py
Normal file
234
modules/services/serviceGeneration/renderers/rendererText.py
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
"""
|
||||
Text renderer for report generation.
|
||||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class RendererText(BaseRenderer):
|
||||
"""Renders content to plain text format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
||||
return [
|
||||
'txt', 'text', 'plain',
|
||||
# Programming languages
|
||||
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
|
||||
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
|
||||
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
|
||||
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
|
||||
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
|
||||
# Web technologies (excluding html/htm which have dedicated renderer)
|
||||
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
|
||||
# Data formats (excluding csv, md/markdown which have dedicated renderers)
|
||||
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
|
||||
# Configuration files
|
||||
'env', 'properties', 'conf', 'config', 'rc',
|
||||
'gitattributes', 'editorconfig', 'eslintrc',
|
||||
# Documentation
|
||||
'readme', 'changelog', 'license', 'authors',
|
||||
'contributing', 'todo', 'notes', 'docs'
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return [
|
||||
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
||||
'script', 'program', 'file', 'document',
|
||||
'raw', 'unformatted', 'plaintext'
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for text renderer."""
|
||||
return 90
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to plain text format."""
|
||||
try:
|
||||
# Generate text from JSON structure
|
||||
text_content = self._generate_text_from_json(extracted_content, title)
|
||||
|
||||
return text_content, "text/plain"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering text: {str(e)}")
|
||||
# Return minimal text fallback
|
||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||
|
||||
def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
||||
"""Generate text content from structured JSON document."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
|
||||
# Build text content
|
||||
text_parts = []
|
||||
|
||||
# Document title
|
||||
text_parts.append(document_title)
|
||||
text_parts.append("=" * len(document_title))
|
||||
text_parts.append("")
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
for section in sections:
|
||||
section_text = self._render_json_section(section)
|
||||
if section_text:
|
||||
text_parts.append(section_text)
|
||||
text_parts.append("") # Add spacing between sections
|
||||
|
||||
# Add generation info
|
||||
text_parts.append("")
|
||||
text_parts.append(f"Generated: {self._format_timestamp()}")
|
||||
|
||||
return '\n'.join(text_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating text from JSON: {str(e)}")
|
||||
raise Exception(f"Text generation failed: {str(e)}")
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to text."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
|
||||
if section_type == "table":
|
||||
return self._render_json_table(section_data)
|
||||
elif section_type == "bullet_list":
|
||||
return self._render_json_bullet_list(section_data)
|
||||
elif section_type == "heading":
|
||||
return self._render_json_heading(section_data)
|
||||
elif section_type == "paragraph":
|
||||
return self._render_json_paragraph(section_data)
|
||||
elif section_type == "code_block":
|
||||
return self._render_json_code_block(section_data)
|
||||
elif section_type == "image":
|
||||
return self._render_json_image(section_data)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
return self._render_json_paragraph(section_data)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
return f"[Error rendering section: {str(e)}]"
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON table to text."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return ""
|
||||
|
||||
text_parts = []
|
||||
|
||||
# Create table header
|
||||
header_line = " | ".join(str(header) for header in headers)
|
||||
text_parts.append(header_line)
|
||||
|
||||
# Add separator line
|
||||
separator_line = " | ".join("-" * len(str(header)) for header in headers)
|
||||
text_parts.append(separator_line)
|
||||
|
||||
# Add data rows
|
||||
for row in rows:
|
||||
row_line = " | ".join(str(cell_data) for cell_data in row)
|
||||
text_parts.append(row_line)
|
||||
|
||||
return '\n'.join(text_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON bullet list to text."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
text_parts = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
text_parts.append(f"- {item}")
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
text_parts.append(f"- {item['text']}")
|
||||
|
||||
return '\n'.join(text_parts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON heading to text."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
if level == 1:
|
||||
return f"{text}\n{'=' * len(text)}"
|
||||
elif level == 2:
|
||||
return f"{text}\n{'-' * len(text)}"
|
||||
else:
|
||||
return f"{'#' * level} {text}"
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to text."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
return text if text else ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON code block to text."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
|
||||
if code:
|
||||
if language:
|
||||
return f"Code ({language}):\n{code}"
|
||||
else:
|
||||
return code
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to text."""
|
||||
try:
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
return f"[Image: {alt_text}]"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return f"[Image: {image_data.get('altText', 'Image')}]"
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
"""
|
||||
Text renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class TextRenderer(BaseRenderer):
|
||||
"""Renders content to plain text format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
||||
return [
|
||||
'txt', 'text', 'plain',
|
||||
# Programming languages
|
||||
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
|
||||
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
|
||||
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
|
||||
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
|
||||
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
|
||||
# Web technologies (excluding html/htm which have dedicated renderer)
|
||||
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
|
||||
# Data formats (excluding csv, md/markdown which have dedicated renderers)
|
||||
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
|
||||
# Configuration files
|
||||
'env', 'properties', 'conf', 'config', 'rc',
|
||||
'gitattributes', 'editorconfig', 'eslintrc',
|
||||
# Documentation
|
||||
'readme', 'changelog', 'license', 'authors',
|
||||
'contributing', 'todo', 'notes', 'docs'
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return [
|
||||
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
||||
'script', 'program', 'file', 'document',
|
||||
'raw', 'unformatted', 'plaintext'
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for text renderer."""
|
||||
return 90
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only plain-text guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"TEXT FORMAT GUIDELINES:\n"
|
||||
"- Output ONLY plain text (no markdown or HTML).\n"
|
||||
"- Use clear headings (you may underline with === or --- when helpful).\n"
|
||||
"- Use simple bullet lists with '-' and tables with '|' when needed.\n"
|
||||
"- Preserve indentation for code-like content if present.\n"
|
||||
"OUTPUT: Return ONLY the raw text content."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to plain text format."""
|
||||
try:
|
||||
# The extracted content should already be formatted text from the AI
|
||||
# Just clean it up
|
||||
text_content = self._clean_text_content(extracted_content, title)
|
||||
|
||||
return text_content, "text/plain"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering text: {str(e)}")
|
||||
# Return minimal text fallback
|
||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||
|
||||
def _clean_text_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate text content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Remove any remaining markdown formatting
|
||||
content = content.replace('**', '').replace('*', '')
|
||||
content = content.replace('__', '').replace('_', '')
|
||||
|
||||
# Clean up any HTML-like tags that might have slipped through
|
||||
import re
|
||||
content = re.sub(r'<[^>]+>', '', content)
|
||||
|
||||
# Ensure proper line endings
|
||||
content = content.replace('\r\n', '\n').replace('\r', '\n')
|
||||
|
||||
return content
|
||||
|
|
@ -103,8 +103,20 @@ Return only the JSON structure with actual data from the documents. Do not inclu
|
|||
finalPrompt = genericIntro
|
||||
|
||||
# Debug output
|
||||
print(f"🔍 DEBUG: Extraction Prompt: {finalPrompt}")
|
||||
print(f"🔍 DEBUG: Extraction Intent: {extractionIntent}")
|
||||
print(f"🔍 EXTRACTION INTENT: {extractionIntent}")
|
||||
|
||||
# Save full extraction prompt to debug file
|
||||
try:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
debug_root = "./test-chat/ai"
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(f"EXTRACTION PROMPT:\n{finalPrompt}\n\n")
|
||||
f.write(f"EXTRACTION INTENT:\n{extractionIntent}\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return finalPrompt
|
||||
|
||||
|
|
@ -127,6 +139,9 @@ async def buildGenerationPrompt(
|
|||
# Protect userPrompt from injection
|
||||
safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
|
||||
|
||||
# Debug output
|
||||
print(f"🔍 GENERATION PROMPT REQUEST: buildGenerationPrompt called with outputFormat='{outputFormat}', title='{title}'")
|
||||
|
||||
# AI call to generate the appropriate generation prompt
|
||||
generationPromptRequest = f"""
|
||||
Based on this user request, create a detailed generation prompt for creating a {outputFormat} document.
|
||||
|
|
@ -144,17 +159,23 @@ Create a generation prompt that:
|
|||
|
||||
IMPORTANT: Always generate content in STANDARDIZED JSON FORMAT. In your response, include the exact text "PLACEHOLDER_FOR_FORMAT_RULES" where specific format rules will be inserted afterwards automatically.
|
||||
|
||||
CRITICAL: You MUST start your response with exactly "Generate a {outputFormat} document that:" - do NOT use "docx" or any other format. Use the exact format specified: {outputFormat}
|
||||
|
||||
Return only the generation prompt, starting with "Generate a {outputFormat} document that..."
|
||||
"""
|
||||
|
||||
# Call AI service to generate the prompt
|
||||
print(f"🔍 DEBUG: Calling AI for generation prompt...")
|
||||
result = await aiService.callAi(
|
||||
prompt=generationPromptRequest,
|
||||
documents=None,
|
||||
options=None
|
||||
)
|
||||
print(f"🔍 DEBUG: AI generation prompt result: '{result}'")
|
||||
print(f"🔍 GENERATION PROMPT REQUEST: Calling AI for generation prompt...")
|
||||
|
||||
# Import and set proper options for AI call
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=generationPromptRequest, context="", options=request_options)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
result = response.content if response else ""
|
||||
print(f"🔍 GENERATION PROMPT AI RESPONSE: '{result}'")
|
||||
|
||||
# Replace the placeholder that the AI created with actual format rules
|
||||
if result:
|
||||
|
|
@ -162,7 +183,21 @@ Return only the generation prompt, starting with "Generate a {outputFormat} docu
|
|||
result = result.replace("PLACEHOLDER_FOR_FORMAT_RULES", formatRules)
|
||||
|
||||
# Debug output
|
||||
print(f"🔍 DEBUG: Generation Prompt: {result if result else 'None'}")
|
||||
print(f"🔍 GENERATION PROMPT FINAL: {result if result else 'None'}")
|
||||
|
||||
# Save full generation prompt and AI response to debug file
|
||||
try:
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
debug_root = "./test-chat/ai"
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n")
|
||||
f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n")
|
||||
f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}"
|
||||
|
||||
|
|
@ -216,11 +251,15 @@ Do not include formatting instructions, file types, or output methods.
|
|||
|
||||
# Call AI service to extract intention
|
||||
print(f"🔍 DEBUG: Calling AI for extraction intent...")
|
||||
result = await aiService.callAi(
|
||||
prompt=extractionPrompt,
|
||||
documents=None,
|
||||
options=None
|
||||
)
|
||||
|
||||
# Import and set proper options for AI call
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
|
||||
request = AiCallRequest(prompt=extractionPrompt, context="", options=request_options)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
result = response.content if response else ""
|
||||
print(f"🔍 DEBUG: AI extraction intent result: '{result}'")
|
||||
|
||||
return result if result else f"Extract all relevant content from the document according to the user's requirements: {userPrompt}"
|
||||
|
|
|
|||
|
|
@ -1,197 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to rename renderer files from <name>_renderer.py to renderer<Name>.py
|
||||
and update all references in the codebase.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
def get_renderer_files(renderers_dir: Path) -> List[Tuple[str, str]]:
|
||||
"""Get list of renderer files to rename."""
|
||||
renderer_files = []
|
||||
|
||||
for file_path in renderers_dir.glob("*_renderer.py"):
|
||||
if file_path.name not in ['base_renderer.py', 'registry.py']:
|
||||
old_name = file_path.name
|
||||
# Extract the name part (e.g., "csv" from "csv_renderer.py")
|
||||
name_part = old_name.replace('_renderer.py', '')
|
||||
# Create new name (e.g., "rendererCsv.py")
|
||||
new_name = f"renderer{name_part.capitalize()}.py"
|
||||
renderer_files.append((old_name, new_name))
|
||||
|
||||
return renderer_files
|
||||
|
||||
def update_file_imports(file_path: Path, old_to_new: Dict[str, str]) -> bool:
|
||||
"""Update import statements in a file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
original_content = content
|
||||
changes_made = False
|
||||
|
||||
# Update import statements
|
||||
for old_name, new_name in old_to_new.items():
|
||||
old_module = old_name.replace('.py', '')
|
||||
new_module = new_name.replace('.py', '')
|
||||
|
||||
# Pattern for from .old_module import
|
||||
pattern1 = rf'from \.{re.escape(old_module)} import'
|
||||
replacement1 = f'from .{new_module} import'
|
||||
if re.search(pattern1, content):
|
||||
content = re.sub(pattern1, replacement1, content)
|
||||
changes_made = True
|
||||
|
||||
# Pattern for from modules.services.serviceGeneration.renderers.old_module import
|
||||
pattern2 = rf'from modules\.services\.serviceGeneration\.renderers\.{re.escape(old_module)} import'
|
||||
replacement2 = f'from modules.services.serviceGeneration.renderers.{new_module} import'
|
||||
if re.search(pattern2, content):
|
||||
content = re.sub(pattern2, replacement2, content)
|
||||
changes_made = True
|
||||
|
||||
if changes_made:
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
print(f"✅ Updated imports in: {file_path}")
|
||||
return True
|
||||
else:
|
||||
print(f"ℹ️ No imports to update in: {file_path}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error updating {file_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
def update_class_names_in_file(file_path: Path, old_to_new: Dict[str, str]) -> bool:
|
||||
"""Update class names in renderer files."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
original_content = content
|
||||
changes_made = False
|
||||
|
||||
# Update class names
|
||||
for old_name, new_name in old_to_new.items():
|
||||
old_module = old_name.replace('.py', '')
|
||||
new_module = new_name.replace('.py', '')
|
||||
|
||||
# Extract the name part for class name
|
||||
name_part = old_module.replace('_renderer', '')
|
||||
old_class = f"{name_part.capitalize()}Renderer"
|
||||
new_class = f"Renderer{name_part.capitalize()}"
|
||||
|
||||
# Update class definition
|
||||
pattern1 = rf'class {re.escape(old_class)}\('
|
||||
replacement1 = f'class {new_class}('
|
||||
if re.search(pattern1, content):
|
||||
content = re.sub(pattern1, replacement1, content)
|
||||
changes_made = True
|
||||
|
||||
# Update class instantiation
|
||||
pattern2 = rf'{re.escape(old_class)}\('
|
||||
replacement2 = f'{new_class}('
|
||||
if re.search(pattern2, content):
|
||||
content = re.sub(pattern2, replacement2, content)
|
||||
changes_made = True
|
||||
|
||||
if changes_made:
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
print(f"✅ Updated class names in: {file_path}")
|
||||
return True
|
||||
else:
|
||||
print(f"ℹ️ No class names to update in: {file_path}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error updating class names in {file_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main function to rename renderer files and update references."""
|
||||
print("🔄 Starting renderer file renaming process...")
|
||||
|
||||
# Get the gateway directory
|
||||
gateway_dir = Path(__file__).parent
|
||||
renderers_dir = gateway_dir / "modules" / "services" / "serviceGeneration" / "renderers"
|
||||
|
||||
if not renderers_dir.exists():
|
||||
print(f"❌ Renderers directory not found: {renderers_dir}")
|
||||
return
|
||||
|
||||
print(f"📁 Working in directory: {renderers_dir}")
|
||||
|
||||
# Get list of files to rename
|
||||
renderer_files = get_renderer_files(renderers_dir)
|
||||
|
||||
if not renderer_files:
|
||||
print("ℹ️ No renderer files found to rename.")
|
||||
return
|
||||
|
||||
print(f"📋 Found {len(renderer_files)} renderer files to rename:")
|
||||
for old_name, new_name in renderer_files:
|
||||
print(f" {old_name} → {new_name}")
|
||||
|
||||
# Create mapping dictionary
|
||||
old_to_new = {old_name: new_name for old_name, new_name in renderer_files}
|
||||
|
||||
# Step 1: Update imports in all Python files
|
||||
print("\n🔄 Step 1: Updating import statements...")
|
||||
updated_files = []
|
||||
|
||||
# Search in gateway directory
|
||||
for py_file in gateway_dir.rglob("*.py"):
|
||||
if py_file.name != "rename_renderers.py": # Skip this script
|
||||
if update_file_imports(py_file, old_to_new):
|
||||
updated_files.append(py_file)
|
||||
|
||||
print(f"✅ Updated imports in {len(updated_files)} files")
|
||||
|
||||
# Step 2: Update class names in renderer files
|
||||
print("\n🔄 Step 2: Updating class names in renderer files...")
|
||||
class_updated_files = []
|
||||
|
||||
for old_name, new_name in renderer_files:
|
||||
old_file_path = renderers_dir / old_name
|
||||
if old_file_path.exists():
|
||||
if update_class_names_in_file(old_file_path, old_to_new):
|
||||
class_updated_files.append(old_file_path)
|
||||
|
||||
print(f"✅ Updated class names in {len(class_updated_files)} files")
|
||||
|
||||
# Step 3: Rename the files
|
||||
print("\n🔄 Step 3: Renaming files...")
|
||||
renamed_files = []
|
||||
|
||||
for old_name, new_name in renderer_files:
|
||||
old_file_path = renderers_dir / old_name
|
||||
new_file_path = renderers_dir / new_name
|
||||
|
||||
if old_file_path.exists():
|
||||
try:
|
||||
shutil.move(str(old_file_path), str(new_file_path))
|
||||
renamed_files.append((old_name, new_name))
|
||||
print(f"✅ Renamed: {old_name} → {new_name}")
|
||||
except Exception as e:
|
||||
print(f"❌ Error renaming {old_name}: {str(e)}")
|
||||
else:
|
||||
print(f"⚠️ File not found: {old_name}")
|
||||
|
||||
print(f"\n🎉 Renaming process completed!")
|
||||
print(f"📊 Summary:")
|
||||
print(f" - Files renamed: {len(renamed_files)}")
|
||||
print(f" - Import statements updated: {len(updated_files)}")
|
||||
print(f" - Class names updated: {len(class_updated_files)}")
|
||||
|
||||
if renamed_files:
|
||||
print(f"\n📋 Renamed files:")
|
||||
for old_name, new_name in renamed_files:
|
||||
print(f" ✅ {old_name} → {new_name}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -154,9 +154,11 @@ async def process_documents_and_generate_summary():
|
|||
|
||||
# userPrompt = "Analyze these documents and create a comprehensive DOCX summary document including: 1) Document types and purposes, 2) Key information and main points, 3) Important details and numbers, 4) Notable sections, 5) Overall assessment and recommendations."
|
||||
|
||||
userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
|
||||
|
||||
# userPrompt = "Create a docx file containing a summary and the COMPLETE list from the pdf file, having one additional column with a 'x' marker for all items, which are yellow highlighted."
|
||||
|
||||
userPrompt = "Create a docx file containing the combined documents in french language."
|
||||
# userPrompt = "Create a docx file containing the combined documents in french language."
|
||||
|
||||
try:
|
||||
# Single AI call with DOCX generation
|
||||
|
|
@ -164,7 +166,7 @@ async def process_documents_and_generate_summary():
|
|||
prompt=userPrompt,
|
||||
documents=documents,
|
||||
options=ai_options,
|
||||
outputFormat="docx",
|
||||
outputFormat="xlsx",
|
||||
title="Document Analysis Summary"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify the fallback mechanism in interfaceAiObjects.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# Add the gateway directory to the Python path
|
||||
gateway_dir = Path(__file__).parent
|
||||
sys.path.insert(0, str(gateway_dir))
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def test_fallback_mechanism():
|
||||
"""Test the fallback mechanism by simulating a failing primary model."""
|
||||
try:
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
|
||||
logger.info("🧪 Testing fallback mechanism...")
|
||||
|
||||
# Create AiObjects instance
|
||||
ai_objects = await AiObjects.create()
|
||||
logger.info("✅ AiObjects created successfully")
|
||||
|
||||
# Test 1: Normal operation (should work with primary model)
|
||||
logger.info("📝 Test 1: Normal operation")
|
||||
request = AiCallRequest(
|
||||
prompt="Hello, this is a test prompt. Please respond with 'Test successful'.",
|
||||
context="",
|
||||
options=AiCallOptions(operationType=OperationType.GENERAL)
|
||||
)
|
||||
|
||||
try:
|
||||
response = await ai_objects.call(request)
|
||||
logger.info(f"✅ Test 1 successful: {response.modelName} - {response.content[:50]}...")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Test 1 failed: {str(e)}")
|
||||
|
||||
# Test 2: Image analysis fallback
|
||||
logger.info("🖼️ Test 2: Image analysis fallback")
|
||||
try:
|
||||
# Create a dummy image data (base64 encoded 1x1 pixel)
|
||||
dummy_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
|
||||
result = await ai_objects.callImage(
|
||||
prompt="Describe this image",
|
||||
imageData=dummy_image,
|
||||
mimeType="image/png",
|
||||
options=AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
|
||||
)
|
||||
logger.info(f"✅ Test 2 successful: {result[:50]}...")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ Test 2 failed: {str(e)}")
|
||||
|
||||
# Test 3: Test fallback model selection
|
||||
logger.info("🔄 Test 3: Fallback model selection")
|
||||
fallback_models = ai_objects._getFallbackModels(OperationType.GENERAL)
|
||||
logger.info(f"✅ Fallback models for GENERAL: {fallback_models}")
|
||||
|
||||
fallback_models_image = ai_objects._getFallbackModels(OperationType.IMAGE_ANALYSIS)
|
||||
logger.info(f"✅ Fallback models for IMAGE_ANALYSIS: {fallback_models_image}")
|
||||
|
||||
logger.info("🎉 Fallback mechanism test completed!")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Test failed: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_fallback_mechanism())
|
||||
Binary file not shown.
|
|
@ -1,120 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for JSON-to-DOCX rendering pipeline.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
from modules.services.serviceGeneration.renderers.docx_renderer import DocxRenderer
|
||||
|
||||
async def test_json_to_docx():
|
||||
"""Test the JSON-to-DOCX rendering pipeline."""
|
||||
|
||||
# Create test JSON document
|
||||
test_json = {
|
||||
"metadata": {
|
||||
"title": "Test Document",
|
||||
"version": "1.0"
|
||||
},
|
||||
"sections": [
|
||||
{
|
||||
"id": "heading1",
|
||||
"type": "heading",
|
||||
"data": {
|
||||
"level": 1,
|
||||
"text": "Document Overview"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "paragraph1",
|
||||
"type": "paragraph",
|
||||
"data": {
|
||||
"text": "This is a test paragraph to verify JSON-to-DOCX rendering works correctly."
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "table1",
|
||||
"type": "table",
|
||||
"data": {
|
||||
"headers": ["Name", "Quantity", "Status"],
|
||||
"rows": [
|
||||
["Item 1", "5", "Active"],
|
||||
["Item 2", "3", "Inactive"],
|
||||
["Item 3", "10", "Active"]
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "list1",
|
||||
"type": "bullet_list",
|
||||
"data": {
|
||||
"items": [
|
||||
"First bullet point",
|
||||
"Second bullet point",
|
||||
"Third bullet point"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "heading2",
|
||||
"type": "heading",
|
||||
"data": {
|
||||
"level": 2,
|
||||
"text": "Summary"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "paragraph2",
|
||||
"type": "paragraph",
|
||||
"data": {
|
||||
"text": "This document demonstrates the new JSON-based rendering system."
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
print("🧪 Testing JSON-to-DOCX rendering...")
|
||||
print(f"📄 Test document has {len(test_json['sections'])} sections")
|
||||
|
||||
try:
|
||||
# Create renderer
|
||||
renderer = DocxRenderer()
|
||||
|
||||
# Test rendering
|
||||
docx_content, mime_type = await renderer.render(
|
||||
extracted_content=test_json,
|
||||
title="Test Document",
|
||||
user_prompt="Create a test document"
|
||||
)
|
||||
|
||||
print(f"✅ Rendering successful!")
|
||||
print(f"📊 MIME type: {mime_type}")
|
||||
print(f"📏 Content length: {len(docx_content)} characters")
|
||||
print(f"🔍 Content preview: {docx_content[:100]}...")
|
||||
|
||||
# Save test file
|
||||
import base64
|
||||
docx_bytes = base64.b64decode(docx_content)
|
||||
with open("test_json_to_docx.docx", "wb") as f:
|
||||
f.write(docx_bytes)
|
||||
|
||||
print(f"💾 Test DOCX saved as: test_json_to_docx.docx")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Rendering failed: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = asyncio.run(test_json_to_docx())
|
||||
if success:
|
||||
print("\n🎉 JSON-to-DOCX rendering test PASSED!")
|
||||
else:
|
||||
print("\n💥 JSON-to-DOCX rendering test FAILED!")
|
||||
sys.exit(1)
|
||||
Loading…
Reference in a new issue