From df15f54f4b762e17afcf6c9bc5b52c4b27277736 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 12 Oct 2025 02:27:55 +0200
Subject: [PATCH] renderers html and md tested and fixed
---
.../renderers/rendererHtml.py | 1 +
test_document_processing.py | 23 +++++++++++++------
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 79d9df1e..5cd9b691 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -156,6 +156,7 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_html_styles()
+
def _get_default_html_styles(self) -> Dict[str, Any]:
"""Default HTML styles."""
return {
diff --git a/test_document_processing.py b/test_document_processing.py
index b89b238f..53fbd80d 100644
--- a/test_document_processing.py
+++ b/test_document_processing.py
@@ -168,7 +168,7 @@ async def process_documents_and_generate_summary():
prompt=userPrompt,
documents=documents,
options=ai_options,
- outputFormat="pdf",
+ outputFormat="html",
title="Formulaire"
)
@@ -270,13 +270,15 @@ async def process_documents_and_generate_summary():
file_ext = '.xlsx'
elif 'pptx' in doc_mime.lower() or 'presentationml' in doc_mime.lower():
file_ext = '.pptx'
+ elif 'markdown' in doc_mime.lower() or 'md' in doc_mime.lower():
+ file_ext = '.md'
else:
logger.warning(f"⚠️ Unknown MIME type: {doc_mime}, using .bin")
# Also check filename for hints
if doc_name and '.' in doc_name:
name_ext = '.' + doc_name.split('.')[-1].lower()
- if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx']:
+ if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md']:
file_ext = name_ext
logger.info(f"📄 Using extension from filename: {file_ext}")
@@ -284,12 +286,19 @@ async def process_documents_and_generate_summary():
# Save document
output_path = output_dir / f"{test_name}_{timestamp}{file_ext}"
- doc_bytes = base64.b64decode(doc_data)
- with open(output_path, 'wb') as f:
- f.write(doc_bytes)
-
- logger.info(f"✅ Document saved: {output_path} ({len(doc_bytes)} bytes)")
+ # Handle different content types
+ if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
+ # Text-based formats - save directly as text
+ with open(output_path, 'w', encoding='utf-8') as f:
+ f.write(doc_data)
+ logger.info(f"✅ Document saved as text: {output_path} ({len(doc_data)} characters)")
+ else:
+ # Binary formats - decode from base64
+ doc_bytes = base64.b64decode(doc_data)
+ with open(output_path, 'wb') as f:
+ f.write(doc_bytes)
+ logger.info(f"✅ Document saved as binary: {output_path} ({len(doc_bytes)} bytes)")
# Also save raw content as text
content = response.get('content', '')