all renderers active and using json objects

This commit is contained in:
ValueOn AG 2025-10-12 00:51:23 +02:00
parent a26553c34c
commit 99215e27fe
25 changed files with 2006 additions and 1416 deletions

View file

@ -746,8 +746,13 @@ Return only the JSON structure with actual content from the image. Do not includ
# Process any document container as text content
request_options = options if options is not None else AiCallOptions()
request_options.operationType = OperationType.GENERAL
print(f"🔍 Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
print(f"🔍 EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
# Log extraction prompt and context
print(f"🔍 EXTRACTION PROMPT: {prompt}")
print(f"🔍 EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters")
request = AiCallRequest(
prompt=prompt,
context=part.data,
@ -756,6 +761,23 @@ Return only the JSON structure with actual content from the image. Do not includ
response = await self.aiObjects.call(request)
ai_result = response.content
# Log extraction response
print(f"🔍 EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters")
# Save full extraction prompt and response to debug file
try:
import os
from datetime import datetime, UTC
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
os.makedirs(debug_root, exist_ok=True)
with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
except Exception:
pass
# If generating JSON, validate the response
if generate_json:
try:
@ -798,8 +820,13 @@ Return only the JSON structure with actual content from the image. Do not includ
request_options = options if options is not None else AiCallOptions()
# FIXED: Set operation type to general for text processing
request_options.operationType = OperationType.GENERAL
print(f"🔍 Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
print(f"🔍 EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
# Log extraction prompt and context
print(f"🔍 EXTRACTION PROMPT: {prompt}")
print(f"🔍 EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters")
request = AiCallRequest(
prompt=prompt,
context=part.data,
@ -808,6 +835,23 @@ Return only the JSON structure with actual content from the image. Do not includ
response = await self.aiObjects.call(request)
ai_result = response.content
# Log extraction response
print(f"🔍 EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters")
# Save full extraction prompt and response to debug file
try:
import os
from datetime import datetime, UTC
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
os.makedirs(debug_root, exist_ok=True)
with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
except Exception:
pass
# If generating JSON, validate the response
if generate_json:
try:

View file

@ -318,18 +318,17 @@ class GenerationService:
if "sections" not in extractedContent:
raise ValueError("extractedContent must contain 'sections' field")
# DEBUG: dump renderer input to diagnose JSON structure TODO REMOVE
# DEBUG: Log renderer input metadata only (no verbose JSON) TODO REMOVE
try:
import os
import json
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
os.makedirs(debug_dir, exist_ok=True)
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as f:
json.dump(extractedContent, f, indent=2, ensure_ascii=False)
f.write(f"content_size: {len(str(extractedContent))} characters\n")
f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
except Exception:
pass

View file

@ -1,86 +0,0 @@
"""
Base renderer class for all format renderers.
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List
import logging
logger = logging.getLogger(__name__)
class BaseRenderer(ABC):
"""Base class for all format renderers."""
def __init__(self):
self.logger = logger
@classmethod
def get_supported_formats(cls) -> List[str]:
"""
Return list of supported format names for this renderer.
Override this method in subclasses to specify supported formats.
"""
return []
@classmethod
def get_format_aliases(cls) -> List[str]:
"""
Return list of format aliases for this renderer.
Override this method in subclasses to specify format aliases.
"""
return []
@classmethod
def get_priority(cls) -> int:
"""
Return priority for this renderer (higher number = higher priority).
Used when multiple renderers support the same format.
"""
return 0
@abstractmethod
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""
Get the format-specific extraction prompt for AI content extraction.
Args:
user_prompt: User's original prompt for report generation
title: Report title
Returns:
str: Format-specific prompt for AI extraction
"""
pass
@abstractmethod
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""
Render extracted content to the target format.
Args:
extracted_content: Raw content extracted by AI using format-specific prompt
title: Report title
Returns:
tuple: (rendered_content, mime_type)
"""
pass
def _extract_sections(self, report_data: Dict[str, Any]) -> list:
"""Extract sections from report data."""
return report_data.get('sections', [])
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data."""
return report_data.get('metadata', {})
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
"""Get title from report data or use fallback."""
return report_data.get('title', fallback_title)
def _format_timestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display."""
if timestamp:
return timestamp
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -1,69 +0,0 @@
"""
HTML renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
class HtmlRenderer(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported HTML formats."""
return ['html', 'htm']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['web', 'webpage']
@classmethod
def get_priority(cls) -> int:
"""Return priority for HTML renderer."""
return 100
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only HTML-specific guidelines; global prompt is built centrally."""
return (
"HTML FORMAT GUIDELINES:\n"
"- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
"- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
"- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
"- Provide professional CSS in a <style> block; responsive, clean typography.\n"
"- Use h1/h2/h3 for headings; tables and lists for structure.\n"
"OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to HTML format."""
try:
# The extracted content should already be HTML from the AI
# Just clean it up and ensure it's valid
html_content = self._clean_html_content(extracted_content, title)
return html_content, "text/html"
except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
def _clean_html_content(self, content: str, title: str) -> str:
"""Clean and validate HTML content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
# Ensure it starts with DOCTYPE
if not content.startswith('<!DOCTYPE'):
if content.startswith('<html'):
content = '<!DOCTYPE html>\n' + content
else:
content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
return content

View file

@ -1,74 +0,0 @@
"""
JSON renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import json
class JsonRenderer(BaseRenderer):
"""Renders content to JSON format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['data']
@classmethod
def get_priority(cls) -> int:
"""Return priority for JSON renderer."""
return 80
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only JSON-specific guidelines; global prompt is built centrally."""
return (
"JSON FORMAT GUIDELINES:\n"
"- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
"- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
"- Prefer arrays/objects that map cleanly to the extracted facts.\n"
"- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
"OUTPUT: Return ONLY valid, parseable JSON."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to JSON format."""
try:
# The extracted content should already be JSON from the AI
# Just validate and format it
json_content = self._clean_json_content(extracted_content, title)
return json_content, "application/json"
except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}")
# Return minimal JSON fallback
fallback_data = {
"title": title,
"sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
"metadata": {"error": str(e)}
}
return json.dumps(fallback_data, indent=2), "application/json"
def _clean_json_content(self, content: str, title: str) -> str:
"""Clean and validate JSON content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
# Validate JSON
try:
parsed = json.loads(content)
# Re-format with proper indentation
return json.dumps(parsed, indent=2, ensure_ascii=False)
except json.JSONDecodeError:
# If not valid JSON, return as-is
return content

View file

@ -1,65 +0,0 @@
"""
Markdown renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
class MarkdownRenderer(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Markdown formats."""
return ['md', 'markdown']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['mdown', 'mkd']
@classmethod
def get_priority(cls) -> int:
"""Return priority for markdown renderer."""
return 95
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only Markdown-specific guidelines; global prompt is built centrally."""
return (
"MARKDOWN FORMAT GUIDELINES:\n"
"- Use proper Markdown syntax only (no HTML wrappers).\n"
"- # for main title, ## for sections, ### for subsections.\n"
"- Tables with | separators and a header row.\n"
"- Bullet lists with - or *.\n"
"- Emphasis with **bold** and *italic*.\n"
"- Code blocks with ```language.\n"
"- Horizontal rules (---) to separate major sections when helpful.\n"
"- Include links [text](url) and images ![alt](url) when referenced by sources.\n"
"OUTPUT: Return ONLY raw Markdown content without code fences."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to Markdown format."""
try:
# The extracted content should already be Markdown from the AI
# Just clean it up
markdown_content = self._clean_markdown_content(extracted_content, title)
return markdown_content, "text/markdown"
except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
def _clean_markdown_content(self, content: str, title: str) -> str:
"""Clean and validate Markdown content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
return content

View file

@ -1,225 +0,0 @@
"""
PDF renderer for report generation using reportlab.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class PdfRenderer(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def get_priority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only PDF-specific guidelines; global prompt is built centrally."""
return (
"PDF FORMAT GUIDELINES:\n"
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to PDF format."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .html_renderer import HtmlRenderer
html_renderer = HtmlRenderer()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
# Generate PDF using reportlab
pdf_content = self._generate_pdf(extracted_content, title)
return pdf_content, "application/pdf"
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
def _generate_pdf(self, content: str, title: str) -> str:
"""Generate PDF content using reportlab."""
try:
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Get styles
styles = getSampleStyleSheet()
# Create custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=TA_CENTER,
textColor=colors.darkblue
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=16,
spaceAfter=12,
spaceBefore=12,
textColor=colors.darkblue
)
# Build PDF content
story = []
# Title page
story.append(Paragraph(title, title_style))
story.append(Spacer(1, 20))
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
story.append(PageBreak())
# Process content
lines = content.split('\n')
current_section = []
for line in lines:
line = line.strip()
if not line:
continue
# Check for headings
if line.startswith('# '):
# H1 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[2:], title_style))
story.append(Spacer(1, 12))
elif line.startswith('## '):
# H2 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[3:], heading_style))
story.append(Spacer(1, 8))
elif line.startswith('### '):
# H3 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[4:], styles['Heading3']))
story.append(Spacer(1, 6))
else:
current_section.append(line)
# Process remaining content
if current_section:
story.extend(self._process_section(current_section, styles))
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF: {str(e)}")
raise
def _process_section(self, lines: list, styles) -> list:
"""Process a section of content into PDF elements."""
elements = []
for line in lines:
if not line.strip():
continue
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
table_data = self._extract_table_data(lines)
if table_data:
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 14),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
elements.append(table)
elements.append(Spacer(1, 12))
return elements
# Check for lists
if line.startswith('- ') or line.startswith('* '):
# This is a list item
elements.append(Paragraph(f"{line[2:]}", styles['Normal']))
else:
# Regular paragraph
elements.append(Paragraph(line, styles['Normal']))
elements.append(Spacer(1, 6))
return elements
def _extract_table_data(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
for line in lines:
if '|' in line:
if not in_table:
in_table = True
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
if cells:
table_data.append(cells)
elif in_table and not line.strip():
# Empty line, might be end of table
break
return table_data if len(table_data) > 1 else []

View file

@ -6,7 +6,7 @@ import logging
import importlib
import pkgutil
from typing import Dict, Type, List, Optional
from .base_renderer import BaseRenderer
from .rendererBaseTemplate import BaseRenderer
logger = logging.getLogger(__name__)
@ -37,7 +37,7 @@ class RendererRegistry:
# Scan all Python files in the renderers directory
for file_path in renderers_dir.glob("*.py"):
if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
continue
# Extract module name from filename

View file

@ -0,0 +1,285 @@
"""
Base renderer class for all format renderers.
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List
import logging
import json
logger = logging.getLogger(__name__)
class BaseRenderer(ABC):
"""Base class for all format renderers."""
def __init__(self):
self.logger = logger
@classmethod
def get_supported_formats(cls) -> List[str]:
"""
Return list of supported format names for this renderer.
Override this method in subclasses to specify supported formats.
"""
return []
@classmethod
def get_format_aliases(cls) -> List[str]:
"""
Return list of format aliases for this renderer.
Override this method in subclasses to specify format aliases.
"""
return []
@classmethod
def get_priority(cls) -> int:
"""
Return priority for this renderer (higher number = higher priority).
Used when multiple renderers support the same format.
"""
return 0
@abstractmethod
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""
Render extracted JSON content to the target format.
Args:
extracted_content: Structured JSON content with sections and metadata
title: Report title
user_prompt: Original user prompt for context
ai_service: AI service instance for additional processing
Returns:
tuple: (rendered_content, mime_type)
"""
pass
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract sections from report data."""
return report_data.get('sections', [])
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data."""
return report_data.get('metadata', {})
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
"""Get title from report data or use fallback."""
metadata = report_data.get('metadata', {})
return metadata.get('title', fallback_title)
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
"""Validate that JSON content has the expected structure."""
if not isinstance(json_content, dict):
return False
if "sections" not in json_content:
return False
sections = json_content.get("sections", [])
if not isinstance(sections, list):
return False
# Validate each section has type and data
for section in sections:
if not isinstance(section, dict):
return False
if "type" not in section or "data" not in section:
return False
return True
def _get_section_type(self, section: Dict[str, Any]) -> str:
"""Get the type of a section."""
return section.get("type", "paragraph")
def _get_section_data(self, section: Dict[str, Any]) -> Dict[str, Any]:
"""Get the data of a section."""
return section.get("data", {})
def _get_section_id(self, section: Dict[str, Any]) -> str:
"""Get the ID of a section (if available)."""
return section.get("id", "unknown")
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
"""Extract table headers and rows from section data."""
headers = section_data.get("headers", [])
rows = section_data.get("rows", [])
return headers, rows
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
"""Extract bullet list items from section data."""
items = section_data.get("items", [])
result = []
for item in items:
if isinstance(item, str):
result.append(item)
elif isinstance(item, dict) and "text" in item:
result.append(item["text"])
return result
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
"""Extract heading level and text from section data."""
level = section_data.get("level", 1)
text = section_data.get("text", "")
return level, text
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
"""Extract paragraph text from section data."""
return section_data.get("text", "")
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
"""Extract code and language from section data."""
code = section_data.get("code", "")
language = section_data.get("language", "")
return code, language
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
"""Extract base64 data and alt text from section data."""
base64_data = section_data.get("base64Data", "")
alt_text = section_data.get("altText", "Image")
return base64_data, alt_text
def _get_supported_section_types(self) -> List[str]:
"""Return list of supported section types."""
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
def _is_valid_section_type(self, section_type: str) -> bool:
"""Check if a section type is valid."""
return section_type in self._get_supported_section_types()
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
"""Process a section and return structured data based on its type."""
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
headers, rows = self._extract_table_data(section_data)
return {"type": "table", "headers": headers, "rows": rows}
elif section_type == "bullet_list":
items = self._extract_bullet_list_items(section_data)
return {"type": "bullet_list", "items": items}
elif section_type == "heading":
level, text = self._extract_heading_data(section_data)
return {"type": "heading", "level": level, "text": text}
elif section_type == "paragraph":
text = self._extract_paragraph_text(section_data)
return {"type": "paragraph", "text": text}
elif section_type == "code_block":
code, language = self._extract_code_block_data(section_data)
return {"type": "code_block", "code": code, "language": language}
elif section_type == "image":
base64_data, alt_text = self._extract_image_data(section_data)
return {"type": "image", "base64Data": base64_data, "altText": alt_text}
else:
# Fallback to paragraph
text = self._extract_paragraph_text(section_data)
return {"type": "paragraph", "text": text}
def _format_timestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display."""
if timestamp:
return timestamp
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
# ===== GENERIC AI STYLING HELPERS =====
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""
Generic AI styling method that can be used by all renderers.
Args:
ai_service: AI service instance
style_template: Format-specific style template
default_styles: Default styles to fall back to
Returns:
Dict with styling definitions
"""
if not ai_service:
return default_styles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=style_template, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
import json
import re
# Debug output
print(f"🔍 AI STYLING RESPONSE TYPE: {type(response)}")
print(f"🔍 AI STYLING RESPONSE LENGTH: {len(response.content) if response and hasattr(response, 'content') and response.content else 0}")
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return default_styles
# Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
print(f"🔍 EXTRACTED JSON FROM MARKDOWN: {result[:100]}...")
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
print(f"🔍 CLEANED JSON FROM MARKDOWN: {result[:100]}...")
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
print(f"🔍 CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...")
# Try to parse JSON
try:
styles = json.loads(result)
print(f"🔍 AI STYLING PARSED KEYS: {list(styles.keys()) if isinstance(styles, dict) else 'Not a dict'}")
except json.JSONDecodeError as json_error:
print(f"🔍 AI STYLING JSON ERROR: {json_error}")
print(f"🔍 AI STYLING RAW RESULT: {result[:200]}...")
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
return default_styles
# Convert colors to appropriate format
styles = self._convert_colors_format(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert colors to appropriate format based on renderer type.
Override this method in subclasses for format-specific color handling.
"""
return styles
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
"""
Create a standardized AI style template for any format.
Args:
format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
user_prompt: User's original prompt
style_schema: Format-specific style schema
Returns:
Formatted prompt string
"""
schema_json = json.dumps(style_schema, indent=4)
return f"""Return this exact JSON structure with your styling customizations:
{schema_json}
NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE."""

View file

@ -2,12 +2,12 @@
CSV renderer for report generation.
"""
from .base_renderer import BaseRenderer
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import csv
import io
class CsvRenderer(BaseRenderer):
class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@classmethod
@ -25,20 +25,6 @@ class CsvRenderer(BaseRenderer):
"""Return priority for CSV renderer."""
return 70
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only CSV-specific guidelines; global prompt is built centrally."""
return (
"CSV FORMAT GUIDELINES:\n"
"- Extract structured data from source documents into JSON format\n"
"- Focus on tabular data, lists, and structured information\n"
"- For tables: Extract headers and rows as separate arrays\n"
"- For lists: Extract items with optional sub-items\n"
"- Structure content into sections with clear content types\n"
"- Use proper JSON structure with metadata, sections, and elements\n"
"- Ensure data is clean and ready for CSV conversion\n"
"OUTPUT: Return structured JSON that can be converted to CSV format."
)
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to CSV format."""
try:

View file

@ -2,7 +2,7 @@
DOCX renderer for report generation using python-docx.
"""
from .base_renderer import BaseRenderer
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
@ -22,7 +22,7 @@ try:
except ImportError:
DOCX_AVAILABLE = False
class DocxRenderer(BaseRenderer):
class RendererDocx(BaseRenderer):
"""Renders content to DOCX format using python-docx."""
@classmethod
@ -40,30 +40,14 @@ class DocxRenderer(BaseRenderer):
"""Return priority for DOCX renderer."""
return 115
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only DOCX-specific guidelines; global prompt is built centrally."""
return (
"DOCX FORMAT GUIDELINES:\n"
"- Extract the ACTUAL table data, lists, and content from the source documents\n"
"- For tables: Extract all rows and columns in pipe-separated format (Column1 | Column2 | Column3)\n"
"- For lists: Extract the actual list items, not summaries\n"
"- Structure your response with clear headings using numbered format: 1) Heading, 2) Heading, etc.\n"
"- Use bullet points (-) for lists and sub-items\n"
"- Use **bold** for emphasis on key terms\n"
"- Provide clean, structured content that can be directly converted to Word formatting\n"
"- Do NOT include debug information, separators (---), metadata, or FILENAME headers\n"
"- Start directly with your content - no introductory text or separators\n"
"- Extract raw data, not analysis or summaries\n"
"OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
)
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
print(f"🔍 DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...")
try:
if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available
from .html_renderer import HtmlRenderer
html_renderer = HtmlRenderer()
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
@ -84,7 +68,10 @@ class DocxRenderer(BaseRenderer):
doc = Document()
# Get AI-generated styling definitions
print(f"🔍 ABOUT TO CALL AI STYLING: user_prompt={user_prompt[:50] if user_prompt else 'None'}...")
self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
styles = await self._get_docx_styles(user_prompt, ai_service)
print(f"🔍 AI STYLING RESULT: {type(styles)}")
# Apply basic document setup
self._setup_basic_document_styles(doc)
@ -125,61 +112,24 @@ class DocxRenderer(BaseRenderer):
raise Exception(f"DOCX generation failed: {str(e)}")
async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Simple AI call to get DOCX styling definitions."""
if not ai_service:
return self._get_default_styles()
"""Get DOCX styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
"table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
}
try:
prompt = f"""
For this DOCX document request: "{user_prompt}"
Provide styling definitions for DOCX elements. IMPORTANT: Ensure proper contrast - never use white text on white background or dark text on dark background. Respond with ONLY JSON:
{{
"title": {{"font_size": 24, "color": "#1F4E79", "bold": true, "align": "center"}},
"heading1": {{"font_size": 18, "color": "#2F2F2F", "bold": true, "align": "left"}},
"heading2": {{"font_size": 14, "color": "#4F4F4F", "bold": true, "align": "left"}},
"paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}},
"table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}},
"table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}},
"table_border": {{"style": "horizontal_only", "color": "#000000", "thickness": "thin"}},
"bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 20}},
"code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}}
}}
CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability.
"""
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=prompt, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
import json
import re
# Clean and parse JSON
result = response.content.strip()
if result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
styles = json.loads(result)
# Validate and fix contrast issues
styles = self._validate_styles_contrast(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return self._get_default_styles()
style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
# Validate and fix contrast issues
return self._validate_styles_contrast(styles)
def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
@ -1005,145 +955,4 @@ CRITICAL: Table headers must have dark background with light text, table cells m
# Bold text
if part:
run = para.add_run(part)
run.bold = True
def _add_bullet_point(self, doc, text: str):
"""Add a bullet point to the document."""
if not text.strip():
return
# Create paragraph with bullet style
para = doc.add_paragraph(text, style='List Bullet')
# Check for Markdown formatting in bullet point
if '**' in text or '*' in text:
# Clear the paragraph and rebuild with formatting
para.clear()
self._add_paragraph_to_doc(doc, text)
def _style_table(self, table):
"""Apply styling to the table."""
try:
# Style header row
if len(table.rows) > 0:
header_cells = table.rows[0].cells
for cell in header_cells:
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.bold = True
except Exception as e:
self.logger.warning(f"Could not style table: {str(e)}")
def _format_timestamp(self) -> str:
"""Format current timestamp for document generation."""
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
"""Process a table row and add it to the document."""
if not line.strip():
return
# Clean the line - remove bullet point markers and bold markers
clean_line = line.strip()
if clean_line.startswith(''):
clean_line = clean_line[1:] # Remove "•"
elif clean_line.startswith('- **'):
clean_line = clean_line[4:] # Remove "- **"
elif clean_line.startswith('- '):
clean_line = clean_line[2:] # Remove "- "
elif clean_line.startswith('**'):
clean_line = clean_line[2:] # Remove "**"
# Remove trailing ** if present
if clean_line.endswith('**'):
clean_line = clean_line[:-2]
# Split by pipe separator
parts = [part.strip() for part in clean_line.split('|')]
if len(parts) >= 2:
# This is a table row - create a table if it doesn't exist
if not hasattr(self, '_current_table') or self._current_table is None:
# Create new table
self._current_table = doc.add_table(rows=1, cols=len(parts))
self._current_table.style = 'Table Grid'
# Check if this looks like a header row (contains common header words)
is_header = any(word.lower() in clean_line.lower() for word in ['name', 'quantity', 'part', 'number', 'description', 'tag', 'item', 'status'])
# Add header row
for i, part in enumerate(parts):
if i < len(self._current_table.rows[0].cells):
cell = self._current_table.rows[0].cells[i]
cell.text = part
# Make header bold if it looks like a header
if is_header:
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.bold = True
else:
# Add data row to existing table
row = self._current_table.add_row()
for i, part in enumerate(parts):
if i < len(row.cells):
row.cells[i].text = part
else:
# Not a table row, treat as regular text
doc.add_paragraph(line)
def _add_bullet_point(self, doc, text: str):
"""Add a bullet point to the document."""
if not text.strip():
return
# Create paragraph with bullet style
para = doc.add_paragraph(text, style='List Bullet')
# Check for bold text in bullet point
if '**' in text:
# Clear the paragraph and rebuild with formatting
para.clear()
parts = text.split('**')
for i, part in enumerate(parts):
if i % 2 == 0:
# Regular text
if part:
para.add_run(part)
else:
# Bold text
if part:
run = para.add_run(part)
run.bold = True
def _process_table_row(self, doc, line: str):
"""Process a table row and add it to the document."""
if not line.strip():
return
# Split by pipe separator
parts = [part.strip() for part in line.split('|')]
if len(parts) >= 2:
# This is a table row - create a table if it doesn't exist
if not hasattr(self, '_current_table') or self._current_table is None:
# Create new table
self._current_table = doc.add_table(rows=1, cols=len(parts))
self._current_table.style = 'Table Grid'
# Add header row
for i, part in enumerate(parts):
if i < len(self._current_table.rows[0].cells):
cell = self._current_table.rows[0].cells[i]
cell.text = part
# Make header bold
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.bold = True
else:
# Add data row to existing table
row = self._current_table.add_row()
for i, part in enumerate(parts):
if i < len(row.cells):
row.cells[i].text = part
else:
# Not a table row, treat as regular text
doc.add_paragraph(line)
run.bold = True

View file

@ -2,7 +2,7 @@
Excel renderer for report generation using openpyxl.
"""
from .base_renderer import BaseRenderer
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
@ -17,7 +17,7 @@ try:
except ImportError:
OPENPYXL_AVAILABLE = False
class ExcelRenderer(BaseRenderer):
class RendererExcel(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
@ -35,27 +35,13 @@ class ExcelRenderer(BaseRenderer):
"""Return priority for Excel renderer."""
return 110
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only Excel-specific guidelines; global prompt is built centrally."""
return (
"EXCEL FORMAT GUIDELINES:\n"
"- Extract structured data from source documents into JSON format\n"
"- Focus on tabular data, lists, and structured information suitable for spreadsheets\n"
"- For tables: Extract headers and rows as separate arrays with clear column names\n"
"- For lists: Extract items with optional sub-items and metadata\n"
"- Structure content into sections with clear content types (table, list, paragraph)\n"
"- Use proper JSON structure with metadata, sections, and elements\n"
"- Ensure data is clean and ready for Excel conversion with proper formatting\n"
"OUTPUT: Return structured JSON that can be converted to Excel format."
)
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .csv_renderer import CsvRenderer
csv_renderer = CsvRenderer()
from .rendererCsv import RendererCsv
csv_renderer = RendererCsv()
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
return csv_content, "text/csv"
@ -215,6 +201,10 @@ class ExcelRenderer(BaseRenderer):
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Debug output
print(f"🔍 EXCEL JSON CONTENT TYPE: {type(json_content)}")
print(f"🔍 EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}")
# Get AI-generated styling definitions
styles = await self._get_excel_styles(user_prompt, ai_service)
@ -231,11 +221,9 @@ class ExcelRenderer(BaseRenderer):
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets based on content
sheets = self._create_excel_sheets(wb, json_content, styles)
print(f"🔍 EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}")
# Populate sheets with content
self._populate_excel_sheets(sheets, json_content, styles)
@ -247,7 +235,13 @@ class ExcelRenderer(BaseRenderer):
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
print(f"🔍 EXCEL BYTES LENGTH: {len(excel_bytes)}")
try:
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
print(f"🔍 EXCEL BASE64 LENGTH: {len(excel_base64)}")
except Exception as b64_error:
print(f"🔍 BASE64 ENCODING ERROR: {b64_error}")
raise
return excel_base64
@ -256,59 +250,38 @@ class ExcelRenderer(BaseRenderer):
raise Exception(f"Excel generation failed: {str(e)}")
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Simple AI call to get Excel styling definitions."""
if not ai_service:
return self._get_default_excel_styles()
"""Get Excel styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_excel_styles())
# Convert colors to aRGB format and validate
styles = self._convert_colors_format(styles)
return self._validate_excel_styles_contrast(styles)
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility."""
try:
prompt = f"""
For this Excel document request: "{user_prompt}"
Provide styling definitions for Excel elements. Respond with ONLY JSON:
{{
"title": {{"font_size": 16, "color": "#1F4E79", "bold": true, "align": "center"}},
"heading": {{"font_size": 14, "color": "#2F2F2F", "bold": true, "align": "left"}},
"table_header": {{"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": true, "align": "center"}},
"table_cell": {{"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": false, "align": "left"}},
"bullet_list": {{"font_size": 11, "color": "#2F2F2F", "indent": 2}},
"paragraph": {{"font_size": 11, "color": "#2F2F2F", "bold": false, "align": "left"}},
"code_block": {{"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}}
}}
CRITICAL: Table headers must have dark background with light text, table cells must have light background with dark text for readability.
"""
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=prompt, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
import json
import re
# Clean and parse JSON
result = response.content.strip()
if result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
styles = json.loads(result)
# Validate and fix contrast issues
styles = self._validate_excel_styles_contrast(styles)
for style_name, style_config in styles.items():
if isinstance(style_config, dict):
for prop, value in style_config.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
styles[style_name][prop] = f"FF{value[1:]}"
print(f"🔍 CONVERTED COLOR: {value}{styles[style_name][prop]}")
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return self._get_default_excel_styles()
print(f"🔍 COLOR CONVERSION ERROR: {e}")
return styles
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
@ -348,15 +321,15 @@ CRITICAL: Table headers must have dark background with light text, table cells m
return self._get_default_excel_styles()
def _get_default_excel_styles(self) -> Dict[str, Any]:
"""Default Excel styles."""
"""Default Excel styles with aRGB color format."""
return {
"title": {"font_size": 16, "color": "#1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
"paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
@ -365,13 +338,16 @@ CRITICAL: Table headers must have dark background with light text, table cells m
# Get sheet names from AI styles or generate based on content
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
print(f"🔍 EXCEL SHEET NAMES: {sheet_names}")
# Create sheets
for i, sheet_name in enumerate(sheet_names):
if i == 0:
# Use the default sheet for the first sheet
sheet = wb.active
sheet.title = sheet_name
else:
# Create additional sheets
sheet = wb.create_sheet(sheet_name, i)
sheets[sheet_name.lower()] = sheet
@ -437,7 +413,9 @@ CRITICAL: Table headers must have dark background with light text, table cells m
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = document_title
title_style = styles["title"]
# Safety check for title style
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
print(f"🔍 EXCEL TITLE STYLE: {title_style}")
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=title_style["color"])
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
@ -560,6 +538,107 @@ CRITICAL: Table headers must have dark background with light text, table cells m
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return start_row + 1
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a table element to Excel sheet."""
try:
table_data = element.get("data", {})
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers and not rows:
return start_row
# Add headers
header_style = styles.get("table_header", {})
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=start_row, column=col, value=header)
if header_style.get("bold"):
cell.font = Font(bold=True, color=header_style.get("text_color", "#FF000000"))
if header_style.get("background"):
cell.fill = PatternFill(start_color=header_style["background"], end_color=header_style["background"], fill_type="solid")
start_row += 1
# Add rows
cell_style = styles.get("table_cell", {})
for row_data in rows:
for col, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=start_row, column=col, value=cell_value)
if cell_style.get("text_color"):
cell.font = Font(color=cell_style["text_color"])
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add table to Excel: {str(e)}")
return start_row + 1
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a list element to Excel sheet."""
try:
list_items = element.get("items", [])
list_style = styles.get("bullet_list", {})
for item in list_items:
sheet.cell(row=start_row, column=1, value=f"{item}")
if list_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=list_style["color"])
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add list to Excel: {str(e)}")
return start_row + 1
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a paragraph element to Excel sheet."""
try:
text = element.get("text", "")
if text:
sheet.cell(row=start_row, column=1, value=text)
paragraph_style = styles.get("paragraph", {})
if paragraph_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=paragraph_style["color"])
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
return start_row + 1
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
"""Add a heading element to Excel sheet."""
try:
text = element.get("text", "")
level = element.get("level", 1)
if text:
sheet.cell(row=start_row, column=1, value=text)
heading_style = styles.get("heading", {})
font_size = heading_style.get("font_size", 14)
if level > 1:
font_size = max(10, font_size - (level - 1) * 2)
sheet.cell(row=start_row, column=1).font = Font(
size=font_size,
bold=True,
color=heading_style.get("color", "#FF000000")
)
start_row += 1
return start_row
except Exception as e:
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
return start_row + 1
def _format_timestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -0,0 +1,463 @@
"""
HTML renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported HTML formats."""
return ['html', 'htm']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['web', 'webpage']
@classmethod
def get_priority(cls) -> int:
"""Return priority for HTML renderer."""
return 100
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
try:
# Generate HTML using AI-analyzed styling
html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
return html_content, "text/html"
except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
styles = await self._get_html_styles(user_prompt, ai_service)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Build HTML document
html_parts = []
# HTML document structure
html_parts.append('<!DOCTYPE html>')
html_parts.append('<html lang="en">')
html_parts.append('<head>')
html_parts.append('<meta charset="UTF-8">')
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
html_parts.append(f'<title>{document_title}</title>')
html_parts.append('<style>')
html_parts.append(self._generate_css_styles(styles))
html_parts.append('</style>')
html_parts.append('</head>')
html_parts.append('<body>')
# Document header
html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
# Main content
html_parts.append('<main>')
# Process each section
sections = json_content.get("sections", [])
for section in sections:
section_html = self._render_json_section(section, styles)
if section_html:
html_parts.append(section_html)
html_parts.append('</main>')
# Footer
html_parts.append('<footer>')
html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
html_parts.append('</footer>')
html_parts.append('</body>')
html_parts.append('</html>')
return '\n'.join(html_parts)
except Exception as e:
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
raise Exception(f"HTML generation failed: {str(e)}")
async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Simple AI call to get HTML styling definitions."""
if not ai_service:
return self._get_default_html_styles()
try:
prompt = f"""Return this exact JSON structure with your styling customizations:
{{
"title": {{"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}},
"heading1": {{"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}},
"heading2": {{"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}},
"paragraph": {{"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"}},
"table": {{"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"}},
"table_header": {{"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"}},
"table_cell": {{"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"}},
"bullet_list": {{"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"}},
"code_block": {{"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"}},
"image": {{"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"}},
"body": {{"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}}
}}
NO TEXT. NO EXPLANATIONS. NO MARKDOWN. NO WRAPPER OBJECTS. ONLY THE JSON ABOVE."""
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=prompt, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
import json
import re
# Clean and parse JSON
result = response.content.strip() if response and response.content else ""
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return self._get_default_html_styles()
# Extract JSON from markdown code blocks
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
# Try to parse JSON
try:
styles = json.loads(result)
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
return self._get_default_html_styles()
# Validate and fix contrast issues
styles = self._validate_html_styles_contrast(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return self._get_default_html_styles()
def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F"
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_html_styles()
def _get_default_html_styles(self) -> Dict[str, Any]:
"""Default HTML styles."""
return {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
"paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
"table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
"table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
"table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
"bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
"code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
"image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
}
def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions."""
css_parts = []
# Body styles
body_style = styles.get("body", {})
css_parts.append("body {")
for property_name, value in body_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Document title
title_style = styles.get("title", {})
css_parts.append(".document-title {")
for property_name, value in title_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Headings
for heading_level in ["heading1", "heading2"]:
heading_style = styles.get(heading_level, {})
css_class = f"h{heading_level[-1]}"
css_parts.append(f"{css_class} {{")
for property_name, value in heading_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Paragraphs
paragraph_style = styles.get("paragraph", {})
css_parts.append("p {")
for property_name, value in paragraph_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Tables
table_style = styles.get("table", {})
css_parts.append("table {")
for property_name, value in table_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Table headers
table_header_style = styles.get("table_header", {})
css_parts.append("th {")
for property_name, value in table_header_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Table cells
table_cell_style = styles.get("table_cell", {})
css_parts.append("td {")
for property_name, value in table_cell_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Lists
bullet_list_style = styles.get("bullet_list", {})
css_parts.append("ul {")
for property_name, value in bullet_list_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Code blocks
code_block_style = styles.get("code_block", {})
css_parts.append("pre {")
for property_name, value in code_block_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Images
image_style = styles.get("image", {})
css_parts.append("img {")
for property_name, value in image_style.items():
css_property = property_name.replace("_", "-")
css_parts.append(f" {css_property}: {value};")
css_parts.append("}")
# Generated info
css_parts.append(".generated-info {")
css_parts.append(" font-size: 0.9em;")
css_parts.append(" color: #666;")
css_parts.append(" text-align: center;")
css_parts.append(" margin-top: 2em;")
css_parts.append(" padding-top: 1em;")
css_parts.append(" border-top: 1px solid #ddd;")
css_parts.append("}")
return '\n'.join(css_parts)
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data, styles)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data, styles)
elif section_type == "heading":
return self._render_json_heading(section_data, styles)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data, styles)
elif section_type == "code_block":
return self._render_json_code_block(section_data, styles)
elif section_type == "image":
return self._render_json_image(section_data, styles)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
return f'<div class="error">[Error rendering section: {str(e)}]</div>'
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON table to HTML using AI-generated styles."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers or not rows:
return ""
html_parts = ['<table>']
# Table header
html_parts.append('<thead><tr>')
for header in headers:
html_parts.append(f'<th>{header}</th>')
html_parts.append('</tr></thead>')
# Table body
html_parts.append('<tbody>')
for row in rows:
html_parts.append('<tr>')
for cell_data in row:
html_parts.append(f'<td>{cell_data}</td>')
html_parts.append('</tr>')
html_parts.append('</tbody>')
html_parts.append('</table>')
return '\n'.join(html_parts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON bullet list to HTML using AI-generated styles."""
try:
items = list_data.get("items", [])
if not items:
return ""
html_parts = ['<ul>']
for item in items:
if isinstance(item, str):
html_parts.append(f'<li>{item}</li>')
elif isinstance(item, dict) and "text" in item:
html_parts.append(f'<li>{item["text"]}</li>')
html_parts.append('</ul>')
return '\n'.join(html_parts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
if text:
level = max(1, min(6, level))
return f'<h{level}>{text}</h{level}>'
return ""
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON paragraph to HTML using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
if text:
return f'<p>{text}</p>'
return ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON code block to HTML using AI-generated styles."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
if code:
if language:
return f'<pre><code class="language-{language}">{code}</code></pre>'
else:
return f'<pre><code>{code}</code></pre>'
return ""
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON image to HTML."""
try:
base64_data = image_data.get("base64Data", "")
alt_text = image_data.get("altText", "Image")
if base64_data:
return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
return ""
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'

View file

@ -0,0 +1,79 @@
"""
JSON renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import json
class RendererJson(BaseRenderer):
"""Renders content to JSON format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['data']
@classmethod
def get_priority(cls) -> int:
"""Return priority for JSON renderer."""
return 80
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to JSON format."""
try:
# The extracted content should already be JSON from the AI
# Just validate and format it
json_content = self._clean_json_content(extracted_content, title)
return json_content, "application/json"
except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}")
# Return minimal JSON fallback
fallback_data = {
"title": title,
"sections": [{"type": "paragraph", "data": {"text": f"Error rendering report: {str(e)}"}}],
"metadata": {"error": str(e)}
}
return json.dumps(fallback_data, indent=2), "application/json"
def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
"""Clean and validate JSON content from AI."""
try:
# Validate JSON structure
if not isinstance(content, dict):
raise ValueError("Content must be a dictionary")
# Ensure it has the expected structure
if "sections" not in content:
# Convert old format to new format
content = {
"sections": [{"type": "paragraph", "data": {"text": str(content)}}],
"metadata": {"title": title}
}
# Ensure metadata exists
if "metadata" not in content:
content["metadata"] = {}
# Set title in metadata if not present
if "title" not in content["metadata"]:
content["metadata"]["title"] = title
# Re-format with proper indentation
return json.dumps(content, indent=2, ensure_ascii=False)
except Exception as e:
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
# Return minimal valid JSON
fallback_data = {
"sections": [{"type": "paragraph", "data": {"text": str(content)}}],
"metadata": {"title": title, "error": str(e)}
}
return json.dumps(fallback_data, indent=2, ensure_ascii=False)

View file

@ -0,0 +1,213 @@
"""
Markdown renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Markdown formats."""
return ['md', 'markdown']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['mdown', 'mkd']
@classmethod
def get_priority(cls) -> int:
"""Return priority for markdown renderer."""
return 95
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to Markdown format."""
try:
# Generate markdown from JSON structure
markdown_content = self._generate_markdown_from_json(extracted_content, title)
return markdown_content, "text/markdown"
except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Build markdown content
markdown_parts = []
# Document title
markdown_parts.append(f"# {document_title}")
markdown_parts.append("")
# Process each section
sections = json_content.get("sections", [])
for section in sections:
section_markdown = self._render_json_section(section)
if section_markdown:
markdown_parts.append(section_markdown)
markdown_parts.append("") # Add spacing between sections
# Add generation info
markdown_parts.append("---")
markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
return '\n'.join(markdown_parts)
except Exception as e:
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
raise Exception(f"Markdown generation failed: {str(e)}")
def _render_json_section(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to markdown."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data)
elif section_type == "heading":
return self._render_json_heading(section_data)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data)
elif section_type == "code_block":
return self._render_json_code_block(section_data)
elif section_type == "image":
return self._render_json_image(section_data)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
return f"*[Error rendering section: {str(e)}]*"
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
"""Render a JSON table to markdown."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers or not rows:
return ""
markdown_parts = []
# Create table header
header_line = " | ".join(str(header) for header in headers)
markdown_parts.append(header_line)
# Add separator line
separator_line = " | ".join("---" for _ in headers)
markdown_parts.append(separator_line)
# Add data rows
for row in rows:
row_line = " | ".join(str(cell_data) for cell_data in row)
markdown_parts.append(row_line)
return '\n'.join(markdown_parts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
"""Render a JSON bullet list to markdown."""
try:
items = list_data.get("items", [])
if not items:
return ""
markdown_parts = []
for item in items:
if isinstance(item, str):
markdown_parts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item:
markdown_parts.append(f"- {item['text']}")
return '\n'.join(markdown_parts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
"""Render a JSON heading to markdown."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
if text:
level = max(1, min(6, level))
return f"{'#' * level} {text}"
return ""
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
"""Render a JSON paragraph to markdown."""
try:
text = paragraph_data.get("text", "")
return text if text else ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
"""Render a JSON code block to markdown."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
if code:
if language:
return f"```{language}\n{code}\n```"
else:
return f"```\n{code}\n```"
return ""
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
"""Render a JSON image to markdown."""
try:
alt_text = image_data.get("altText", "Image")
base64_data = image_data.get("base64Data", "")
if base64_data:
# For base64 images, we can't embed them directly in markdown
# So we'll use a placeholder with the alt text
return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)"
else:
return f"![{alt_text}](image-placeholder)"
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f"![{image_data.get('altText', 'Image')}](image-error)"

View file

@ -0,0 +1,416 @@
"""
PDF renderer for report generation using reportlab.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class RendererPdf(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def get_priority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
return html_content, "text/html"
# Generate PDF using AI-analyzed styling
pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
return pdf_content, "application/pdf"
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
styles = await self._get_pdf_styles(user_prompt, ai_service)
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Build PDF content
story = []
# Title page
title_style = self._create_title_style(styles)
story.append(Paragraph(document_title, title_style))
story.append(Spacer(1, 20))
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
story.append(PageBreak())
# Process each section
sections = json_content.get("sections", [])
for section in sections:
section_elements = self._render_json_section(section, styles)
story.extend(section_elements)
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}")
async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get PDF styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
# Validate and fix contrast issues
return self._validate_pdf_styles_contrast(styles)
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
return styles
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_pdf_styles()
def _get_default_pdf_styles(self) -> Dict[str, Any]:
"""Default PDF styles."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
"heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
"heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
"paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
"table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
"table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
"bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create title style from style definitions."""
title_style_def = styles.get("title", {})
return ParagraphStyle(
'CustomTitle',
fontSize=title_style_def.get("font_size", 24),
spaceAfter=title_style_def.get("space_after", 30),
alignment=self._get_alignment(title_style_def.get("align", "center")),
textColor=self._hex_to_color(title_style_def.get("color", "#1F4E79"))
)
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
"""Create heading style from style definitions."""
heading_key = f"heading{level}"
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
return ParagraphStyle(
f'CustomHeading{level}',
fontSize=heading_style_def.get("font_size", 18 - level * 2),
spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12),
alignment=self._get_alignment(heading_style_def.get("align", "left")),
textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
)
def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {})
return ParagraphStyle(
'CustomNormal',
fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6),
alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
)
def _get_alignment(self, align: str) -> int:
"""Convert alignment string to reportlab alignment constant."""
align_map = {
"center": TA_CENTER,
"left": TA_LEFT,
"justify": TA_JUSTIFY
}
return align_map.get(align.lower(), TA_LEFT)
def _hex_to_color(self, hex_color: str) -> colors.Color:
"""Convert hex color to reportlab color."""
try:
hex_color = hex_color.lstrip('#')
r = int(hex_color[0:2], 16) / 255.0
g = int(hex_color[2:4], 16) / 255.0
b = int(hex_color[4:6], 16) / 255.0
return colors.Color(r, g, b)
except:
return colors.black
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data, styles)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data, styles)
elif section_type == "heading":
return self._render_json_heading(section_data, styles)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data, styles)
elif section_type == "code_block":
return self._render_json_code_block(section_data, styles)
elif section_type == "image":
return self._render_json_image(section_data, styles)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers or not rows:
return []
# Prepare table data
table_data_list = [headers] + rows
# Create table
table = Table(table_data_list)
# Apply styling
table_header_style = styles.get("table_header", {})
table_cell_style = styles.get("table_cell", {})
table_style = [
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
('ALIGN', (0, 0), (-1, -1), self._get_alignment(table_cell_style.get("align", "left"))),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))),
('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]
table.setStyle(TableStyle(table_style))
return [table, Spacer(1, 12)]
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return []
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
try:
items = list_data.get("items", [])
bullet_style_def = styles.get("bullet_list", {})
elements = []
for item in items:
if isinstance(item, str):
elements.append(Paragraph(f"{item}", self._create_normal_style(styles)))
elif isinstance(item, dict) and "text" in item:
elements.append(Paragraph(f"{item['text']}", self._create_normal_style(styles)))
if elements:
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
return elements
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return []
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON heading to PDF elements using AI-generated styles."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
if text:
level = max(1, min(6, level))
heading_style = self._create_heading_style(styles, level)
return [Paragraph(text, heading_style)]
return []
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
return []
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
if text:
return [Paragraph(text, self._create_normal_style(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return []
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON code block to PDF elements using AI-generated styles."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
code_style_def = styles.get("code_block", {})
if code:
elements = []
if language:
lang_style = ParagraphStyle(
'CodeLanguage',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
fontName='Helvetica-Bold'
)
elements.append(Paragraph(f"Code ({language}):", lang_style))
code_style = ParagraphStyle(
'CodeBlock',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
fontName=code_style_def.get("font", "Courier"),
backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
spaceAfter=code_style_def.get("space_after", 6)
)
elements.append(Paragraph(code, code_style))
return elements
return []
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return []
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements."""
try:
base64_data = image_data.get("base64Data", "")
alt_text = image_data.get("altText", "Image")
if base64_data:
# For now, just add a placeholder since reportlab image handling is complex
return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]

View file

@ -1,13 +1,13 @@
import logging
import base64
import io
from typing import Dict, Any, Optional, Tuple
from .base_renderer import BaseRenderer
from typing import Dict, Any, Optional, Tuple, List
from .rendererBaseTemplate import BaseRenderer
logger = logging.getLogger(__name__)
class PptxRenderer(BaseRenderer):
class RendererPptx(BaseRenderer):
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
def __init__(self):
@ -258,76 +258,25 @@ class PptxRenderer(BaseRenderer):
"""Get MIME type for rendered output."""
return self.output_mime_type
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only PowerPoint-specific guidelines; global prompt is built centrally."""
return (
"POWERPOINT FORMAT GUIDELINES:\n"
"- Extract structured data from source documents into JSON format\n"
"- Focus on presentation-ready content with clear sections and visual elements\n"
"- For tables: Extract headers and rows as separate arrays suitable for slides\n"
"- For lists: Extract items with optional sub-items for bullet points\n"
"- Structure content into sections with clear content types (heading, paragraph, table, list)\n"
"- Use proper JSON structure with metadata, sections, and elements\n"
"- Ensure content is concise and suitable for slide presentation\n"
"OUTPUT: Return structured JSON that can be converted to PowerPoint slides."
)
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Simple AI call to get PowerPoint styling definitions."""
if not ai_service:
return self._get_default_pptx_styles()
"""Get PowerPoint styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 44, "color": "#1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 32, "color": "#2F2F2F", "bold": True, "align": "left"},
"subheading": {"font_size": 24, "color": "#4F4F4F", "bold": True, "align": "left"},
"paragraph": {"font_size": 18, "color": "#2F2F2F", "bold": False, "align": "left"},
"bullet_list": {"font_size": 18, "color": "#2F2F2F", "indent": 20},
"table_header": {"font_size": 16, "color": "#FFFFFF", "bold": True, "background": "#4F4F4F"},
"table_cell": {"font_size": 14, "color": "#2F2F2F", "bold": False, "background": "#FFFFFF"},
"slide_size": "16:9",
"content_per_slide": "concise"
}
try:
prompt = f"""
For this PowerPoint presentation request: "{user_prompt}"
Provide styling definitions for PowerPoint elements. Respond with ONLY JSON:
{{
"title": {{"font_size": 44, "color": "#1F4E79", "bold": true, "align": "center"}},
"heading": {{"font_size": 32, "color": "#2F2F2F", "bold": true, "align": "left"}},
"subheading": {{"font_size": 24, "color": "#4F4F4F", "bold": true, "align": "left"}},
"paragraph": {{"font_size": 18, "color": "#2F2F2F", "bold": false, "align": "left"}},
"bullet_list": {{"font_size": 18, "color": "#2F2F2F", "indent": 20}},
"table_header": {{"font_size": 16, "color": "#FFFFFF", "bold": true, "background": "#4F4F4F"}},
"table_cell": {{"font_size": 14, "color": "#2F2F2F", "bold": false, "background": "#FFFFFF"}},
"slide_size": "16:9",
"content_per_slide": "concise"
}}
CRITICAL: PowerPoint text must be large enough to read from a distance. Minimum font size should be 14pt for body text.
"""
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=prompt, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
import json
import re
# Clean and parse JSON
result = response.content.strip()
if result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
styles = json.loads(result)
# Validate font sizes for PowerPoint readability
styles = self._validate_pptx_styles_readability(styles)
return styles
except Exception as e:
logger.warning(f"AI styling failed: {str(e)}, using defaults")
return self._get_default_pptx_styles()
style_template = self._create_ai_style_template("pptx", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pptx_styles())
# Validate PowerPoint-specific requirements
return self._validate_pptx_styles_readability(styles)
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix readability issues in AI-generated styles."""

View file

@ -0,0 +1,234 @@
"""
Text renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List
class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported text formats (excluding formats with dedicated renderers)."""
return [
'txt', 'text', 'plain',
# Programming languages
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
# Web technologies (excluding html/htm which have dedicated renderer)
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
# Data formats (excluding csv, md/markdown which have dedicated renderers)
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
# Configuration files
'env', 'properties', 'conf', 'config', 'rc',
'gitattributes', 'editorconfig', 'eslintrc',
# Documentation
'readme', 'changelog', 'license', 'authors',
'contributing', 'todo', 'notes', 'docs'
]
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return [
'ascii', 'utf8', 'utf-8', 'code', 'source',
'script', 'program', 'file', 'document',
'raw', 'unformatted', 'plaintext'
]
@classmethod
def get_priority(cls) -> int:
"""Return priority for text renderer."""
return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
"""Render extracted JSON content to plain text format."""
try:
# Generate text from JSON structure
text_content = self._generate_text_from_json(extracted_content, title)
return text_content, "text/plain"
except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
"""Generate text content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(json_content, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
# Build text content
text_parts = []
# Document title
text_parts.append(document_title)
text_parts.append("=" * len(document_title))
text_parts.append("")
# Process each section
sections = json_content.get("sections", [])
for section in sections:
section_text = self._render_json_section(section)
if section_text:
text_parts.append(section_text)
text_parts.append("") # Add spacing between sections
# Add generation info
text_parts.append("")
text_parts.append(f"Generated: {self._format_timestamp()}")
return '\n'.join(text_parts)
except Exception as e:
self.logger.error(f"Error generating text from JSON: {str(e)}")
raise Exception(f"Text generation failed: {str(e)}")
def _render_json_section(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to text."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data)
elif section_type == "heading":
return self._render_json_heading(section_data)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data)
elif section_type == "code_block":
return self._render_json_code_block(section_data)
elif section_type == "image":
return self._render_json_image(section_data)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
return f"[Error rendering section: {str(e)}]"
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
"""Render a JSON table to text."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
if not headers or not rows:
return ""
text_parts = []
# Create table header
header_line = " | ".join(str(header) for header in headers)
text_parts.append(header_line)
# Add separator line
separator_line = " | ".join("-" * len(str(header)) for header in headers)
text_parts.append(separator_line)
# Add data rows
for row in rows:
row_line = " | ".join(str(cell_data) for cell_data in row)
text_parts.append(row_line)
return '\n'.join(text_parts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
"""Render a JSON bullet list to text."""
try:
items = list_data.get("items", [])
if not items:
return ""
text_parts = []
for item in items:
if isinstance(item, str):
text_parts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item:
text_parts.append(f"- {item['text']}")
return '\n'.join(text_parts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
"""Render a JSON heading to text."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
if text:
level = max(1, min(6, level))
if level == 1:
return f"{text}\n{'=' * len(text)}"
elif level == 2:
return f"{text}\n{'-' * len(text)}"
else:
return f"{'#' * level} {text}"
return ""
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
"""Render a JSON paragraph to text."""
try:
text = paragraph_data.get("text", "")
return text if text else ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
"""Render a JSON code block to text."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
if code:
if language:
return f"Code ({language}):\n{code}"
else:
return code
return ""
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
"""Render a JSON image to text."""
try:
alt_text = image_data.get("altText", "Image")
return f"[Image: {alt_text}]"
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f"[Image: {image_data.get('altText', 'Image')}]"

View file

@ -1,94 +0,0 @@
"""
Text renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
class TextRenderer(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported text formats (excluding formats with dedicated renderers)."""
return [
'txt', 'text', 'plain',
# Programming languages
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
# Web technologies (excluding html/htm which have dedicated renderer)
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
# Data formats (excluding csv, md/markdown which have dedicated renderers)
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
# Configuration files
'env', 'properties', 'conf', 'config', 'rc',
'gitattributes', 'editorconfig', 'eslintrc',
# Documentation
'readme', 'changelog', 'license', 'authors',
'contributing', 'todo', 'notes', 'docs'
]
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return [
'ascii', 'utf8', 'utf-8', 'code', 'source',
'script', 'program', 'file', 'document',
'raw', 'unformatted', 'plaintext'
]
@classmethod
def get_priority(cls) -> int:
"""Return priority for text renderer."""
return 90
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only plain-text guidelines; global prompt is built centrally."""
return (
"TEXT FORMAT GUIDELINES:\n"
"- Output ONLY plain text (no markdown or HTML).\n"
"- Use clear headings (you may underline with === or --- when helpful).\n"
"- Use simple bullet lists with '-' and tables with '|' when needed.\n"
"- Preserve indentation for code-like content if present.\n"
"OUTPUT: Return ONLY the raw text content."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to plain text format."""
try:
# The extracted content should already be formatted text from the AI
# Just clean it up
text_content = self._clean_text_content(extracted_content, title)
return text_content, "text/plain"
except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
def _clean_text_content(self, content: str, title: str) -> str:
"""Clean and validate text content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
# Remove any remaining markdown formatting
content = content.replace('**', '').replace('*', '')
content = content.replace('__', '').replace('_', '')
# Clean up any HTML-like tags that might have slipped through
import re
content = re.sub(r'<[^>]+>', '', content)
# Ensure proper line endings
content = content.replace('\r\n', '\n').replace('\r', '\n')
return content

View file

@ -103,8 +103,20 @@ Return only the JSON structure with actual data from the documents. Do not inclu
finalPrompt = genericIntro
# Debug output
print(f"🔍 DEBUG: Extraction Prompt: {finalPrompt}")
print(f"🔍 DEBUG: Extraction Intent: {extractionIntent}")
print(f"🔍 EXTRACTION INTENT: {extractionIntent}")
# Save full extraction prompt to debug file
try:
import os
from datetime import datetime, UTC
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
os.makedirs(debug_root, exist_ok=True)
with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
f.write(f"EXTRACTION PROMPT:\n{finalPrompt}\n\n")
f.write(f"EXTRACTION INTENT:\n{extractionIntent}\n")
except Exception:
pass
return finalPrompt
@ -127,6 +139,9 @@ async def buildGenerationPrompt(
# Protect userPrompt from injection
safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
# Debug output
print(f"🔍 GENERATION PROMPT REQUEST: buildGenerationPrompt called with outputFormat='{outputFormat}', title='{title}'")
# AI call to generate the appropriate generation prompt
generationPromptRequest = f"""
Based on this user request, create a detailed generation prompt for creating a {outputFormat} document.
@ -144,17 +159,23 @@ Create a generation prompt that:
IMPORTANT: Always generate content in STANDARDIZED JSON FORMAT. In your response, include the exact text "PLACEHOLDER_FOR_FORMAT_RULES" where specific format rules will be inserted afterwards automatically.
CRITICAL: You MUST start your response with exactly "Generate a {outputFormat} document that:" - do NOT use "docx" or any other format. Use the exact format specified: {outputFormat}
Return only the generation prompt, starting with "Generate a {outputFormat} document that..."
"""
# Call AI service to generate the prompt
print(f"🔍 DEBUG: Calling AI for generation prompt...")
result = await aiService.callAi(
prompt=generationPromptRequest,
documents=None,
options=None
)
print(f"🔍 DEBUG: AI generation prompt result: '{result}'")
print(f"🔍 GENERATION PROMPT REQUEST: Calling AI for generation prompt...")
# Import and set proper options for AI call
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=generationPromptRequest, context="", options=request_options)
response = await aiService.aiObjects.call(request)
result = response.content if response else ""
print(f"🔍 GENERATION PROMPT AI RESPONSE: '{result}'")
# Replace the placeholder that the AI created with actual format rules
if result:
@ -162,7 +183,21 @@ Return only the generation prompt, starting with "Generate a {outputFormat} docu
result = result.replace("PLACEHOLDER_FOR_FORMAT_RULES", formatRules)
# Debug output
print(f"🔍 DEBUG: Generation Prompt: {result if result else 'None'}")
print(f"🔍 GENERATION PROMPT FINAL: {result if result else 'None'}")
# Save full generation prompt and AI response to debug file
try:
import os
from datetime import datetime, UTC
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
os.makedirs(debug_root, exist_ok=True)
with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f:
f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n")
f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n")
f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n")
except Exception:
pass
return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}"
@ -216,11 +251,15 @@ Do not include formatting instructions, file types, or output methods.
# Call AI service to extract intention
print(f"🔍 DEBUG: Calling AI for extraction intent...")
result = await aiService.callAi(
prompt=extractionPrompt,
documents=None,
options=None
)
# Import and set proper options for AI call
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=extractionPrompt, context="", options=request_options)
response = await aiService.aiObjects.call(request)
result = response.content if response else ""
print(f"🔍 DEBUG: AI extraction intent result: '{result}'")
return result if result else f"Extract all relevant content from the document according to the user's requirements: {userPrompt}"

View file

@ -1,197 +0,0 @@
#!/usr/bin/env python3
"""
Script to rename renderer files from <name>_renderer.py to renderer<Name>.py
and update all references in the codebase.
"""
import os
import re
import shutil
from pathlib import Path
from typing import Dict, List, Tuple
def get_renderer_files(renderers_dir: Path) -> List[Tuple[str, str]]:
"""Get list of renderer files to rename."""
renderer_files = []
for file_path in renderers_dir.glob("*_renderer.py"):
if file_path.name not in ['base_renderer.py', 'registry.py']:
old_name = file_path.name
# Extract the name part (e.g., "csv" from "csv_renderer.py")
name_part = old_name.replace('_renderer.py', '')
# Create new name (e.g., "rendererCsv.py")
new_name = f"renderer{name_part.capitalize()}.py"
renderer_files.append((old_name, new_name))
return renderer_files
def update_file_imports(file_path: Path, old_to_new: Dict[str, str]) -> bool:
"""Update import statements in a file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
original_content = content
changes_made = False
# Update import statements
for old_name, new_name in old_to_new.items():
old_module = old_name.replace('.py', '')
new_module = new_name.replace('.py', '')
# Pattern for from .old_module import
pattern1 = rf'from \.{re.escape(old_module)} import'
replacement1 = f'from .{new_module} import'
if re.search(pattern1, content):
content = re.sub(pattern1, replacement1, content)
changes_made = True
# Pattern for from modules.services.serviceGeneration.renderers.old_module import
pattern2 = rf'from modules\.services\.serviceGeneration\.renderers\.{re.escape(old_module)} import'
replacement2 = f'from modules.services.serviceGeneration.renderers.{new_module} import'
if re.search(pattern2, content):
content = re.sub(pattern2, replacement2, content)
changes_made = True
if changes_made:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✅ Updated imports in: {file_path}")
return True
else:
print(f" No imports to update in: {file_path}")
return False
except Exception as e:
print(f"❌ Error updating {file_path}: {str(e)}")
return False
def update_class_names_in_file(file_path: Path, old_to_new: Dict[str, str]) -> bool:
"""Update class names in renderer files."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
original_content = content
changes_made = False
# Update class names
for old_name, new_name in old_to_new.items():
old_module = old_name.replace('.py', '')
new_module = new_name.replace('.py', '')
# Extract the name part for class name
name_part = old_module.replace('_renderer', '')
old_class = f"{name_part.capitalize()}Renderer"
new_class = f"Renderer{name_part.capitalize()}"
# Update class definition
pattern1 = rf'class {re.escape(old_class)}\('
replacement1 = f'class {new_class}('
if re.search(pattern1, content):
content = re.sub(pattern1, replacement1, content)
changes_made = True
# Update class instantiation
pattern2 = rf'{re.escape(old_class)}\('
replacement2 = f'{new_class}('
if re.search(pattern2, content):
content = re.sub(pattern2, replacement2, content)
changes_made = True
if changes_made:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✅ Updated class names in: {file_path}")
return True
else:
print(f" No class names to update in: {file_path}")
return False
except Exception as e:
print(f"❌ Error updating class names in {file_path}: {str(e)}")
return False
def main():
"""Main function to rename renderer files and update references."""
print("🔄 Starting renderer file renaming process...")
# Get the gateway directory
gateway_dir = Path(__file__).parent
renderers_dir = gateway_dir / "modules" / "services" / "serviceGeneration" / "renderers"
if not renderers_dir.exists():
print(f"❌ Renderers directory not found: {renderers_dir}")
return
print(f"📁 Working in directory: {renderers_dir}")
# Get list of files to rename
renderer_files = get_renderer_files(renderers_dir)
if not renderer_files:
print(" No renderer files found to rename.")
return
print(f"📋 Found {len(renderer_files)} renderer files to rename:")
for old_name, new_name in renderer_files:
print(f" {old_name}{new_name}")
# Create mapping dictionary
old_to_new = {old_name: new_name for old_name, new_name in renderer_files}
# Step 1: Update imports in all Python files
print("\n🔄 Step 1: Updating import statements...")
updated_files = []
# Search in gateway directory
for py_file in gateway_dir.rglob("*.py"):
if py_file.name != "rename_renderers.py": # Skip this script
if update_file_imports(py_file, old_to_new):
updated_files.append(py_file)
print(f"✅ Updated imports in {len(updated_files)} files")
# Step 2: Update class names in renderer files
print("\n🔄 Step 2: Updating class names in renderer files...")
class_updated_files = []
for old_name, new_name in renderer_files:
old_file_path = renderers_dir / old_name
if old_file_path.exists():
if update_class_names_in_file(old_file_path, old_to_new):
class_updated_files.append(old_file_path)
print(f"✅ Updated class names in {len(class_updated_files)} files")
# Step 3: Rename the files
print("\n🔄 Step 3: Renaming files...")
renamed_files = []
for old_name, new_name in renderer_files:
old_file_path = renderers_dir / old_name
new_file_path = renderers_dir / new_name
if old_file_path.exists():
try:
shutil.move(str(old_file_path), str(new_file_path))
renamed_files.append((old_name, new_name))
print(f"✅ Renamed: {old_name}{new_name}")
except Exception as e:
print(f"❌ Error renaming {old_name}: {str(e)}")
else:
print(f"⚠️ File not found: {old_name}")
print(f"\n🎉 Renaming process completed!")
print(f"📊 Summary:")
print(f" - Files renamed: {len(renamed_files)}")
print(f" - Import statements updated: {len(updated_files)}")
print(f" - Class names updated: {len(class_updated_files)}")
if renamed_files:
print(f"\n📋 Renamed files:")
for old_name, new_name in renamed_files:
print(f"{old_name}{new_name}")
if __name__ == "__main__":
main()

View file

@ -154,9 +154,11 @@ async def process_documents_and_generate_summary():
# userPrompt = "Analyze these documents and create a comprehensive DOCX summary document including: 1) Document types and purposes, 2) Key information and main points, 3) Important details and numbers, 4) Notable sections, 5) Overall assessment and recommendations."
userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
# userPrompt = "Create a docx file containing a summary and the COMPLETE list from the pdf file, having one additional column with a 'x' marker for all items, which are yellow highlighted."
userPrompt = "Create a docx file containing the combined documents in french language."
# userPrompt = "Create a docx file containing the combined documents in french language."
try:
# Single AI call with DOCX generation
@ -164,7 +166,7 @@ async def process_documents_and_generate_summary():
prompt=userPrompt,
documents=documents,
options=ai_options,
outputFormat="docx",
outputFormat="xlsx",
title="Document Analysis Summary"
)

View file

@ -1,77 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify the fallback mechanism in interfaceAiObjects.py
"""
import asyncio
import sys
import os
import logging
from pathlib import Path
# Add the gateway directory to the Python path
gateway_dir = Path(__file__).parent
sys.path.insert(0, str(gateway_dir))
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
async def test_fallback_mechanism():
"""Test the fallback mechanism by simulating a failing primary model."""
try:
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
logger.info("🧪 Testing fallback mechanism...")
# Create AiObjects instance
ai_objects = await AiObjects.create()
logger.info("✅ AiObjects created successfully")
# Test 1: Normal operation (should work with primary model)
logger.info("📝 Test 1: Normal operation")
request = AiCallRequest(
prompt="Hello, this is a test prompt. Please respond with 'Test successful'.",
context="",
options=AiCallOptions(operationType=OperationType.GENERAL)
)
try:
response = await ai_objects.call(request)
logger.info(f"✅ Test 1 successful: {response.modelName} - {response.content[:50]}...")
except Exception as e:
logger.warning(f"⚠️ Test 1 failed: {str(e)}")
# Test 2: Image analysis fallback
logger.info("🖼️ Test 2: Image analysis fallback")
try:
# Create a dummy image data (base64 encoded 1x1 pixel)
dummy_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
result = await ai_objects.callImage(
prompt="Describe this image",
imageData=dummy_image,
mimeType="image/png",
options=AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
)
logger.info(f"✅ Test 2 successful: {result[:50]}...")
except Exception as e:
logger.warning(f"⚠️ Test 2 failed: {str(e)}")
# Test 3: Test fallback model selection
logger.info("🔄 Test 3: Fallback model selection")
fallback_models = ai_objects._getFallbackModels(OperationType.GENERAL)
logger.info(f"✅ Fallback models for GENERAL: {fallback_models}")
fallback_models_image = ai_objects._getFallbackModels(OperationType.IMAGE_ANALYSIS)
logger.info(f"✅ Fallback models for IMAGE_ANALYSIS: {fallback_models_image}")
logger.info("🎉 Fallback mechanism test completed!")
except Exception as e:
logger.error(f"❌ Test failed: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(test_fallback_mechanism())

Binary file not shown.

View file

@ -1,120 +0,0 @@
#!/usr/bin/env python3
"""
Test script for JSON-to-DOCX rendering pipeline.
"""
import asyncio
import json
import sys
import os
from modules.services.serviceGeneration.renderers.docx_renderer import DocxRenderer
async def test_json_to_docx():
"""Test the JSON-to-DOCX rendering pipeline."""
# Create test JSON document
test_json = {
"metadata": {
"title": "Test Document",
"version": "1.0"
},
"sections": [
{
"id": "heading1",
"type": "heading",
"data": {
"level": 1,
"text": "Document Overview"
}
},
{
"id": "paragraph1",
"type": "paragraph",
"data": {
"text": "This is a test paragraph to verify JSON-to-DOCX rendering works correctly."
}
},
{
"id": "table1",
"type": "table",
"data": {
"headers": ["Name", "Quantity", "Status"],
"rows": [
["Item 1", "5", "Active"],
["Item 2", "3", "Inactive"],
["Item 3", "10", "Active"]
]
}
},
{
"id": "list1",
"type": "bullet_list",
"data": {
"items": [
"First bullet point",
"Second bullet point",
"Third bullet point"
]
}
},
{
"id": "heading2",
"type": "heading",
"data": {
"level": 2,
"text": "Summary"
}
},
{
"id": "paragraph2",
"type": "paragraph",
"data": {
"text": "This document demonstrates the new JSON-based rendering system."
}
}
]
}
print("🧪 Testing JSON-to-DOCX rendering...")
print(f"📄 Test document has {len(test_json['sections'])} sections")
try:
# Create renderer
renderer = DocxRenderer()
# Test rendering
docx_content, mime_type = await renderer.render(
extracted_content=test_json,
title="Test Document",
user_prompt="Create a test document"
)
print(f"✅ Rendering successful!")
print(f"📊 MIME type: {mime_type}")
print(f"📏 Content length: {len(docx_content)} characters")
print(f"🔍 Content preview: {docx_content[:100]}...")
# Save test file
import base64
docx_bytes = base64.b64decode(docx_content)
with open("test_json_to_docx.docx", "wb") as f:
f.write(docx_bytes)
print(f"💾 Test DOCX saved as: test_json_to_docx.docx")
return True
except Exception as e:
print(f"❌ Rendering failed: {str(e)}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = asyncio.run(test_json_to_docx())
if success:
print("\n🎉 JSON-to-DOCX rendering test PASSED!")
else:
print("\n💥 JSON-to-DOCX rendering test FAILED!")
sys.exit(1)