From 99215e27febf4e66a693eb361b90c59aead9013a Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 12 Oct 2025 00:51:23 +0200
Subject: [PATCH] all renderers active and using json objects
---
modules/services/serviceAi/mainServiceAi.py | 48 +-
.../mainServiceGeneration.py | 7 +-
.../renderers/base_renderer.py | 86 ----
.../renderers/html_renderer.py | 69 ---
.../renderers/json_renderer.py | 74 ---
.../renderers/markdown_renderer.py | 65 ---
.../renderers/pdf_renderer.py | 225 ---------
.../serviceGeneration/renderers/registry.py | 4 +-
.../renderers/rendererBaseTemplate.py | 285 +++++++++++
.../{csv_renderer.py => rendererCsv.py} | 18 +-
.../{docx_renderer.py => rendererDocx.py} | 243 +--------
.../{excel_renderer.py => rendererExcel.py} | 239 ++++++---
.../renderers/rendererHtml.py | 463 ++++++++++++++++++
.../renderers/rendererJson.py | 79 +++
.../renderers/rendererMarkdown.py | 213 ++++++++
.../renderers/rendererPdf.py | 416 ++++++++++++++++
.../{pptx_renderer.py => rendererPptx.py} | 91 +---
.../renderers/rendererText.py | 234 +++++++++
.../renderers/text_renderer.py | 94 ----
.../serviceGeneration/subPromptBuilder.py | 69 ++-
rename_renderers.py | 197 --------
test_document_processing.py | 6 +-
test_fallback_mechanism.py | 77 ---
test_json_to_docx.docx | Bin 37131 -> 0 bytes
test_json_to_docx.py | 120 -----
25 files changed, 2006 insertions(+), 1416 deletions(-)
delete mode 100644 modules/services/serviceGeneration/renderers/base_renderer.py
delete mode 100644 modules/services/serviceGeneration/renderers/html_renderer.py
delete mode 100644 modules/services/serviceGeneration/renderers/json_renderer.py
delete mode 100644 modules/services/serviceGeneration/renderers/markdown_renderer.py
delete mode 100644 modules/services/serviceGeneration/renderers/pdf_renderer.py
create mode 100644 modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
rename modules/services/serviceGeneration/renderers/{csv_renderer.py => rendererCsv.py} (91%)
rename modules/services/serviceGeneration/renderers/{docx_renderer.py => rendererDocx.py} (80%)
rename modules/services/serviceGeneration/renderers/{excel_renderer.py => rendererExcel.py} (72%)
create mode 100644 modules/services/serviceGeneration/renderers/rendererHtml.py
create mode 100644 modules/services/serviceGeneration/renderers/rendererJson.py
create mode 100644 modules/services/serviceGeneration/renderers/rendererMarkdown.py
create mode 100644 modules/services/serviceGeneration/renderers/rendererPdf.py
rename modules/services/serviceGeneration/renderers/{pptx_renderer.py => rendererPptx.py} (88%)
create mode 100644 modules/services/serviceGeneration/renderers/rendererText.py
delete mode 100644 modules/services/serviceGeneration/renderers/text_renderer.py
delete mode 100644 rename_renderers.py
delete mode 100644 test_fallback_mechanism.py
delete mode 100644 test_json_to_docx.docx
delete mode 100644 test_json_to_docx.py
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 6c6f76e2..5f24e158 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -746,8 +746,13 @@ Return only the JSON structure with actual content from the image. Do not includ
# Process any document container as text content
request_options = options if options is not None else AiCallOptions()
request_options.operationType = OperationType.GENERAL
- print(f"๐ Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
+ print(f"๐ EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
+
+ # Log extraction prompt and context
+ print(f"๐ EXTRACTION PROMPT: {prompt}")
+ print(f"๐ EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters")
+
request = AiCallRequest(
prompt=prompt,
context=part.data,
@@ -756,6 +761,23 @@ Return only the JSON structure with actual content from the image. Do not includ
response = await self.aiObjects.call(request)
ai_result = response.content
+ # Log extraction response
+ print(f"๐ EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters")
+
+ # Save full extraction prompt and response to debug file
+ try:
+ import os
+ from datetime import datetime, UTC
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ debug_root = "./test-chat/ai"
+ os.makedirs(debug_root, exist_ok=True)
+ with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
+ f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
+ f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
+ f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
+ except Exception:
+ pass
+
# If generating JSON, validate the response
if generate_json:
try:
@@ -798,8 +820,13 @@ Return only the JSON structure with actual content from the image. Do not includ
request_options = options if options is not None else AiCallOptions()
# FIXED: Set operation type to general for text processing
request_options.operationType = OperationType.GENERAL
- print(f"๐ Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
+ print(f"๐ EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
+
+ # Log extraction prompt and context
+ print(f"๐ EXTRACTION PROMPT: {prompt}")
+ print(f"๐ EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters")
+
request = AiCallRequest(
prompt=prompt,
context=part.data,
@@ -808,6 +835,23 @@ Return only the JSON structure with actual content from the image. Do not includ
response = await self.aiObjects.call(request)
ai_result = response.content
+ # Log extraction response
+ print(f"๐ EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters")
+
+ # Save full extraction prompt and response to debug file
+ try:
+ import os
+ from datetime import datetime, UTC
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ debug_root = "./test-chat/ai"
+ os.makedirs(debug_root, exist_ok=True)
+ with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
+ f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
+ f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
+ f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
+ except Exception:
+ pass
+
# If generating JSON, validate the response
if generate_json:
try:
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index 13c20fad..2d3aa21f 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -318,18 +318,17 @@ class GenerationService:
if "sections" not in extractedContent:
raise ValueError("extractedContent must contain 'sections' field")
- # DEBUG: dump renderer input to diagnose JSON structure TODO REMOVE
+ # DEBUG: Log renderer input metadata only (no verbose JSON) TODO REMOVE
try:
import os
- import json
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
os.makedirs(debug_dir, exist_ok=True)
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
- with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as f:
- json.dump(extractedContent, f, indent=2, ensure_ascii=False)
+ f.write(f"content_size: {len(str(extractedContent))} characters\n")
+ f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
except Exception:
pass
diff --git a/modules/services/serviceGeneration/renderers/base_renderer.py b/modules/services/serviceGeneration/renderers/base_renderer.py
deleted file mode 100644
index dd91be09..00000000
--- a/modules/services/serviceGeneration/renderers/base_renderer.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""
-Base renderer class for all format renderers.
-"""
-
-from abc import ABC, abstractmethod
-from typing import Dict, Any, Tuple, List
-import logging
-
-logger = logging.getLogger(__name__)
-
-class BaseRenderer(ABC):
- """Base class for all format renderers."""
-
- def __init__(self):
- self.logger = logger
-
- @classmethod
- def get_supported_formats(cls) -> List[str]:
- """
- Return list of supported format names for this renderer.
- Override this method in subclasses to specify supported formats.
- """
- return []
-
- @classmethod
- def get_format_aliases(cls) -> List[str]:
- """
- Return list of format aliases for this renderer.
- Override this method in subclasses to specify format aliases.
- """
- return []
-
- @classmethod
- def get_priority(cls) -> int:
- """
- Return priority for this renderer (higher number = higher priority).
- Used when multiple renderers support the same format.
- """
- return 0
-
- @abstractmethod
- def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
- """
- Get the format-specific extraction prompt for AI content extraction.
-
- Args:
- user_prompt: User's original prompt for report generation
- title: Report title
-
- Returns:
- str: Format-specific prompt for AI extraction
- """
- pass
-
- @abstractmethod
- async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
- """
- Render extracted content to the target format.
-
- Args:
- extracted_content: Raw content extracted by AI using format-specific prompt
- title: Report title
-
- Returns:
- tuple: (rendered_content, mime_type)
- """
- pass
-
- def _extract_sections(self, report_data: Dict[str, Any]) -> list:
- """Extract sections from report data."""
- return report_data.get('sections', [])
-
- def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
- """Extract metadata from report data."""
- return report_data.get('metadata', {})
-
- def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
- """Get title from report data or use fallback."""
- return report_data.get('title', fallback_title)
-
- def _format_timestamp(self, timestamp: str = None) -> str:
- """Format timestamp for display."""
- if timestamp:
- return timestamp
- from datetime import datetime, UTC
- return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
diff --git a/modules/services/serviceGeneration/renderers/html_renderer.py b/modules/services/serviceGeneration/renderers/html_renderer.py
deleted file mode 100644
index c2b7e586..00000000
--- a/modules/services/serviceGeneration/renderers/html_renderer.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-HTML renderer for report generation.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-
-class HtmlRenderer(BaseRenderer):
- """Renders content to HTML format with format-specific extraction."""
-
- @classmethod
- def get_supported_formats(cls) -> List[str]:
- """Return supported HTML formats."""
- return ['html', 'htm']
-
- @classmethod
- def get_format_aliases(cls) -> List[str]:
- """Return format aliases."""
- return ['web', 'webpage']
-
- @classmethod
- def get_priority(cls) -> int:
- """Return priority for HTML renderer."""
- return 100
-
- def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
- """Return only HTML-specific guidelines; global prompt is built centrally."""
- return (
- "HTML FORMAT GUIDELINES:\n"
- "- Output a complete HTML5 document starting with .\n"
- "- Include , with and , and .\n"
- "- Use semantic elements: , , , ,