119 lines
4.9 KiB
Python
119 lines
4.9 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
JSON renderer for report generation.
|
|
"""
|
|
|
|
from .rendererBaseTemplate import BaseRenderer
|
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
|
from typing import Dict, Any, List, Optional
|
|
import json
|
|
|
|
class RendererJson(BaseRenderer):
|
|
"""Renders content to JSON format with format-specific extraction."""
|
|
|
|
@classmethod
|
|
def getSupportedFormats(cls) -> List[str]:
|
|
"""Return supported JSON formats."""
|
|
return ['json']
|
|
|
|
@classmethod
|
|
def getFormatAliases(cls) -> List[str]:
|
|
"""Return format aliases."""
|
|
return ['data']
|
|
|
|
@classmethod
|
|
def getPriority(cls) -> int:
|
|
"""Return priority for JSON renderer."""
|
|
return 80
|
|
|
|
@classmethod
|
|
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
|
|
"""Return output style classification: JSON is structured data format."""
|
|
return 'code'
|
|
|
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
|
"""Render extracted JSON content to JSON format."""
|
|
try:
|
|
# The extracted content should already be JSON from the AI
|
|
# Just validate and format it
|
|
jsonContent = self._cleanJsonContent(extractedContent, title)
|
|
|
|
# Determine filename from document or title
|
|
documents = extractedContent.get("documents", [])
|
|
if documents and isinstance(documents[0], dict):
|
|
filename = documents[0].get("filename")
|
|
if not filename:
|
|
filename = self._determineFilename(title, "application/json")
|
|
else:
|
|
filename = self._determineFilename(title, "application/json")
|
|
|
|
# Extract metadata for document type and other info
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
|
|
return [
|
|
RenderedDocument(
|
|
documentData=jsonContent.encode('utf-8'),
|
|
mimeType="application/json",
|
|
filename=filename,
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error rendering JSON: {str(e)}")
|
|
# Return minimal JSON fallback
|
|
fallbackData = {
|
|
"title": title,
|
|
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
|
"metadata": {"error": str(e)}
|
|
}
|
|
fallbackContent = json.dumps(fallbackData, indent=2)
|
|
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
|
documentType = metadata.get("documentType") if isinstance(metadata, dict) else None
|
|
return [
|
|
RenderedDocument(
|
|
documentData=fallbackContent.encode('utf-8'),
|
|
mimeType="application/json",
|
|
filename=self._determineFilename(title, "application/json"),
|
|
documentType=documentType,
|
|
metadata=metadata if isinstance(metadata, dict) else None
|
|
)
|
|
]
|
|
|
|
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
|
|
"""Clean and validate JSON content from AI."""
|
|
try:
|
|
# Validate JSON structure
|
|
if not isinstance(content, dict):
|
|
raise ValueError("Content must be a dictionary")
|
|
|
|
# Ensure it has the expected structure
|
|
if "sections" not in content:
|
|
# Convert old format to new format
|
|
content = {
|
|
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
|
|
"metadata": {"title": title}
|
|
}
|
|
|
|
# Ensure metadata exists
|
|
if "metadata" not in content:
|
|
content["metadata"] = {}
|
|
|
|
# Set title in metadata if not present
|
|
if "title" not in content["metadata"]:
|
|
content["metadata"]["title"] = title
|
|
|
|
# Re-format with proper indentation
|
|
return json.dumps(content, indent=2, ensure_ascii=False)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
|
|
# Return minimal valid JSON
|
|
fallbackData = {
|
|
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
|
|
"metadata": {"title": title, "error": str(e)}
|
|
}
|
|
return json.dumps(fallbackData, indent=2, ensure_ascii=False)
|