From 2eb1a5589d58e74fcf2dda5551a081127301dccb Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 3 Jun 2026 16:45:17 +0200
Subject: [PATCH] fixes doc generation and renderers
---
app.py | 23 +-
modules/aicore/aicoreModelSelector.py | 5 +-
modules/aicore/aicorePluginAnthropic.py | 4 +
modules/connectors/_httpResilience.py | 241 +++
.../providerGoogle/connectorGoogle.py | 65 +-
.../providerInfomaniak/connectorInfomaniak.py | 31 +-
.../connectors/providerMsft/connectorMsft.py | 48 +-
modules/features/trustee/mainTrustee.py | 3 +-
modules/interfaces/interfaceAiObjects.py | 12 +
.../core/serviceStreaming/eventManager.py | 13 +
.../serviceAgent/coreTools/_mediaTools.py | 12 -
.../services/serviceAgent/sandboxExecutor.py | 46 +
.../services/serviceAi/mainServiceAi.py | 20 +-
.../services/serviceAi/subStructureFilling.py | 23 +-
.../mainServiceExtraction.py | 31 +
.../mainServiceGeneration.py | 84 +-
.../serviceGeneration/paths/documentPath.py | 6 +-
.../renderers/documentRendererBaseTemplate.py | 139 +-
.../renderers/rendererDocx.py | 1116 ++++----------
.../renderers/rendererHtml.py | 171 +--
.../renderers/rendererPdf.py | 564 ++++---
.../renderers/rendererPptx.py | 1365 +++++------------
.../renderers/rendererXlsx.py | 860 ++---------
.../serviceGeneration/styleDefaults.py | 154 +-
.../methodAi/actions/generateDocument.py | 4 +-
.../methods/methodAi/actions/process.py | 47 +-
.../workflows/methods/methodAi/methodAi.py | 18 -
27 files changed, 1812 insertions(+), 3293 deletions(-)
create mode 100644 modules/connectors/_httpResilience.py
diff --git a/app.py b/app.py
index 0ffa9824..ec35a2cf 100644
--- a/app.py
+++ b/app.py
@@ -61,6 +61,13 @@ class DailyRotatingFileHandler(RotatingFileHandler):
return True
return False
+ def doRollover(self):
+ """Size-based rollover that tolerates Windows file locks."""
+ try:
+ super().doRollover()
+ except PermissionError:
+ pass
+
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
@@ -454,6 +461,20 @@ async def lifespan(app: FastAPI):
except Exception as e:
logger.warning(f"Could not shutdown feature containers: {e}")
+ # 4. Cancel all pending streaming EventManager tasks (cleanup sleeps, agent tasks)
+ try:
+ from modules.serviceCenter.core.serviceStreaming.eventManager import get_event_manager as _getStreamingEM
+ _getStreamingEM().shutdown()
+ except Exception as e:
+ logger.warning(f"Streaming EventManager shutdown failed: {e}")
+
+ # 5. Close shared HTTP sessions (ResilientHttp) to avoid TCP keepalive hang
+ try:
+ from modules.connectors._httpResilience import closeAllResilientHttp
+ await closeAllResilientHttp()
+ except Exception as e:
+ logger.warning(f"Closing HTTP sessions failed: {e}")
+
logger.info("Application has been shut down")
except asyncio.CancelledError:
@@ -734,7 +755,7 @@ if __name__ == "__main__":
port = int(os.environ.get("PORT", 8000))
try:
- from gunicorn.app.wsgiapp import WSGIApplication # noqa: F401
+ import gunicorn.app.wsgiapp # type: ignore[import-untyped] # noqa: F401
import subprocess
import sys
subprocess.run([
diff --git a/modules/aicore/aicoreModelSelector.py b/modules/aicore/aicoreModelSelector.py
index d04472cd..f51d6cec 100644
--- a/modules/aicore/aicoreModelSelector.py
+++ b/modules/aicore/aicoreModelSelector.py
@@ -140,11 +140,10 @@ class ModelSelector:
promptFiltered.append(model)
else:
maxAllowedTokens = model.contextLength * 0.8
- # Compare prompt tokens (not bytes) with model's token limit
- if promptTokens <= maxAllowedTokens:
+ if totalTokens <= maxAllowedTokens:
promptFiltered.append(model)
else:
- logger.debug(f"Model {model.name} filtered out: promptSize={promptTokens:.0f} tokens > maxAllowed={maxAllowedTokens:.0f} tokens (80% of {model.contextLength} tokens)")
+ logger.debug(f"Model {model.name} filtered out: totalTokens={totalTokens:.0f} > maxAllowed={maxAllowedTokens:.0f} tokens (80% of {model.contextLength} tokens)")
logger.debug(f"After prompt size filtering: {len(promptFiltered)} models")
diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py
index 5c1e87b5..ce6349f0 100644
--- a/modules/aicore/aicorePluginAnthropic.py
+++ b/modules/aicore/aicorePluginAnthropic.py
@@ -654,6 +654,7 @@ class AiAnthropic(BaseConnectorAi):
mimeType = parts[0].replace("data:", "")
base64Data = parts[1]
+ _SUPPORTED = {"image/jpeg", "image/png", "image/gif", "image/webp"}
import base64 as _b64
try:
rawHead = _b64.b64decode(base64Data[:32])
@@ -667,6 +668,9 @@ class AiAnthropic(BaseConnectorAi):
mimeType = "image/webp"
except Exception:
pass
+
+ if mimeType not in _SUPPORTED:
+ raise ValueError(f"Unsupported image media_type '{mimeType}' for Anthropic (supported: {', '.join(sorted(_SUPPORTED))})")
# Convert to Anthropic's vision format
anthropicMessages = [{
diff --git a/modules/connectors/_httpResilience.py b/modules/connectors/_httpResilience.py
new file mode 100644
index 00000000..504686c8
--- /dev/null
+++ b/modules/connectors/_httpResilience.py
@@ -0,0 +1,241 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""Shared HTTP resilience helpers for provider connectors.
+
+Provides a reusable session pool with concurrency limiter and retry-with-backoff
+so that Google, MSFT and Infomaniak connectors do not each re-implement
+per-request sessions, unbounded parallelism, and missing retry logic.
+"""
+
+import asyncio
+import logging
+import time
+from typing import Any, Dict, Optional, Union
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_MAX_CONCURRENT = 8
+_DEFAULT_MAX_RETRIES = 3
+_DEFAULT_TIMEOUT_S = 30
+_RETRYABLE_STATUS = {429, 502, 503, 504}
+
+
+_instances: list["ResilientHttp"] = []
+
+
+class ResilientHttp:
+ """Managed aiohttp.ClientSession with semaphore + retry.
+
+ Typical usage inside a connector module-level function::
+
+ _http = ResilientHttp("Google", maxConcurrent=8)
+
+ async def _googleGet(token, url):
+ return await _http.getJson(url, headers={"Authorization": f"Bearer {token}"})
+
+ The session is created lazily on first call, reused across requests,
+ and closed via ``closeAllResilientHttp()`` at app shutdown.
+ """
+
+ def __init__(
+ self,
+ providerLabel: str = "HTTP",
+ maxConcurrent: int = _DEFAULT_MAX_CONCURRENT,
+ maxRetries: int = _DEFAULT_MAX_RETRIES,
+ defaultTimeoutS: float = _DEFAULT_TIMEOUT_S,
+ ):
+ self._label = providerLabel
+ self._maxConcurrent = maxConcurrent
+ self._maxRetries = maxRetries
+ self._defaultTimeout = aiohttp.ClientTimeout(total=defaultTimeoutS)
+ self._semaphore: Optional[asyncio.Semaphore] = None
+ self._session: Optional[aiohttp.ClientSession] = None
+ _instances.append(self)
+
+ def _ensureReady(self) -> aiohttp.ClientSession:
+ if self._semaphore is None:
+ self._semaphore = asyncio.Semaphore(self._maxConcurrent)
+ if self._session is None or self._session.closed:
+ self._session = aiohttp.ClientSession(timeout=self._defaultTimeout)
+ return self._session
+
+ async def close(self):
+ if self._session and not self._session.closed:
+ await self._session.close()
+ await asyncio.sleep(0.25)
+ self._session = None
+
+ async def getJson(
+ self,
+ url: str,
+ headers: Dict[str, str],
+ timeout: Optional[aiohttp.ClientTimeout] = None,
+ allowRedirects: bool = True,
+ ) -> Dict[str, Any]:
+ """GET request returning parsed JSON with retry + throttle."""
+ session = self._ensureReady()
+ assert self._semaphore is not None
+ lastError: Optional[str] = None
+ for attempt in range(1, self._maxRetries + 1):
+ async with self._semaphore:
+ try:
+ async with session.get(
+ url,
+ headers=headers,
+ timeout=timeout or self._defaultTimeout,
+ allow_redirects=allowRedirects,
+ ) as resp:
+ if resp.status in (200, 201):
+ return await resp.json()
+ if resp.status in _RETRYABLE_STATUS:
+ retryAfter = _parseRetryAfter(resp.headers.get("Retry-After"))
+ waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30)
+ logger.warning(
+ f"{self._label} GET {resp.status} (attempt {attempt}/{self._maxRetries}), "
+ f"retry in {waitS:.1f}s: {url[:120]}"
+ )
+ await asyncio.sleep(waitS)
+ continue
+ errorText = await resp.text()
+ lastError = f"{resp.status}: {errorText[:200]}"
+ logger.warning(f"{self._label} GET {url[:120]} -> {lastError[:300]}")
+ return {"error": lastError}
+ except asyncio.TimeoutError:
+ lastError = f"timeout after {self._defaultTimeout.total}s"
+ if attempt < self._maxRetries:
+ logger.warning(f"{self._label} GET timeout (attempt {attempt}): {url[:120]}")
+ await asyncio.sleep(min(2 ** attempt, 10))
+ continue
+ except aiohttp.ClientError as e:
+ lastError = str(e)
+ if attempt < self._maxRetries:
+ logger.warning(f"{self._label} GET client error (attempt {attempt}): {e}")
+ await asyncio.sleep(min(2 ** attempt, 10))
+ continue
+ return {"error": lastError or "unknown error"}
+
+ async def getBytes(
+ self,
+ url: str,
+ headers: Dict[str, str],
+ timeout: Optional[aiohttp.ClientTimeout] = None,
+ allowRedirects: bool = True,
+ ) -> Optional[bytes]:
+ """GET request returning raw bytes (for file downloads)."""
+ session = self._ensureReady()
+ assert self._semaphore is not None
+ for attempt in range(1, self._maxRetries + 1):
+ async with self._semaphore:
+ try:
+ async with session.get(
+ url,
+ headers=headers,
+ timeout=timeout or self._defaultTimeout,
+ allow_redirects=allowRedirects,
+ ) as resp:
+ if resp.status == 200:
+ return await resp.read()
+ if resp.status in _RETRYABLE_STATUS:
+ retryAfter = _parseRetryAfter(resp.headers.get("Retry-After"))
+ waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30)
+ logger.warning(
+ f"{self._label} download {resp.status} (attempt {attempt}), "
+ f"retry in {waitS:.1f}s: {url[:120]}"
+ )
+ await asyncio.sleep(waitS)
+ continue
+ errorText = await resp.text()
+ logger.warning(f"{self._label} download {url[:120]} -> {resp.status}: {errorText[:200]}")
+ return None
+ except asyncio.TimeoutError:
+ if attempt < self._maxRetries:
+ logger.warning(f"{self._label} download timeout (attempt {attempt}): {url[:120]}")
+ await asyncio.sleep(min(2 ** attempt, 10))
+ continue
+ except aiohttp.ClientError as e:
+ if attempt < self._maxRetries:
+ logger.warning(f"{self._label} download client error (attempt {attempt}): {e}")
+ await asyncio.sleep(min(2 ** attempt, 10))
+ continue
+ return None
+
+ async def request(
+ self,
+ method: str,
+ url: str,
+ headers: Dict[str, str],
+ data: Any = None,
+ timeout: Optional[aiohttp.ClientTimeout] = None,
+ ) -> Dict[str, Any]:
+ """Generic HTTP request with retry for retryable status codes."""
+ session = self._ensureReady()
+ assert self._semaphore is not None
+ lastError: Optional[str] = None
+ for attempt in range(1, self._maxRetries + 1):
+ async with self._semaphore:
+ try:
+ kwargs: Dict[str, Any] = {"headers": headers}
+ if data is not None:
+ kwargs["data"] = data
+ async with session.request(
+ method, url,
+ timeout=timeout or self._defaultTimeout,
+ **kwargs,
+ ) as resp:
+ if resp.status in (200, 201, 202, 204):
+ if resp.status == 204:
+ return {}
+ return await resp.json()
+ if resp.status in _RETRYABLE_STATUS:
+ retryAfter = _parseRetryAfter(resp.headers.get("Retry-After"))
+ waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30)
+ logger.warning(
+ f"{self._label} {method} {resp.status} (attempt {attempt}), "
+ f"retry in {waitS:.1f}s: {url[:120]}"
+ )
+ await asyncio.sleep(waitS)
+ continue
+ errorText = await resp.text()
+ lastError = f"{resp.status}: {errorText[:200]}"
+ logger.warning(f"{self._label} {method} {url[:120]} -> {lastError[:300]}")
+ return {"error": lastError}
+ except asyncio.TimeoutError:
+ lastError = f"timeout after {(timeout or self._defaultTimeout).total}s"
+ if attempt < self._maxRetries:
+ await asyncio.sleep(min(2 ** attempt, 10))
+ continue
+ except aiohttp.ClientError as e:
+ lastError = str(e)
+ if attempt < self._maxRetries:
+ await asyncio.sleep(min(2 ** attempt, 10))
+ continue
+ return {"error": lastError or "unknown error"}
+
+
+async def closeAllResilientHttp() -> None:
+ """Close all ResilientHttp sessions. Call at app shutdown."""
+ for inst in _instances:
+ try:
+ await inst.close()
+ except Exception as e:
+ logger.debug(f"Error closing {inst._label} session: {e}")
+ logger.info(f"Closed {len(_instances)} ResilientHttp session(s)")
+
+
+def _parseRetryAfter(value: Optional[str]) -> float:
+ """Parse Retry-After header (seconds or HTTP-date). Returns 0 if absent/unparseable."""
+ if not value:
+ return 0.0
+ try:
+ return float(value)
+ except ValueError:
+ pass
+ try:
+ from email.utils import parsedate_to_datetime
+ dt = parsedate_to_datetime(value)
+ delta = (dt.timestamp() - time.time())
+ return max(delta, 0.5)
+ except Exception:
+ return 0.0
diff --git a/modules/connectors/providerGoogle/connectorGoogle.py b/modules/connectors/providerGoogle/connectorGoogle.py
index 1c5826de..a1f02a03 100644
--- a/modules/connectors/providerGoogle/connectorGoogle.py
+++ b/modules/connectors/providerGoogle/connectorGoogle.py
@@ -10,10 +10,13 @@ from typing import Any, Dict, List, Optional
import aiohttp
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
+from modules.connectors._httpResilience import ResilientHttp
from modules.datamodels.datamodelDataSource import ExternalEntry
logger = logging.getLogger(__name__)
+_http = ResilientHttp("Google", maxConcurrent=8, defaultTimeoutS=20)
+
_DRIVE_BASE = "https://www.googleapis.com/drive/v3"
_GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1"
_CALENDAR_BASE = "https://www.googleapis.com/calendar/v3"
@@ -57,17 +60,7 @@ def _parseGoogleDateRange(text: Optional[str]) -> tuple:
async def _googleGet(token: str, url: str) -> Dict[str, Any]:
headers = {"Authorization": f"Bearer {token}"}
- timeout = aiohttp.ClientTimeout(total=20)
- try:
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.get(url, headers=headers) as resp:
- if resp.status in (200, 201):
- return await resp.json()
- errorText = await resp.text()
- logger.warning(f"Google API {resp.status}: {errorText[:300]}")
- return {"error": f"{resp.status}: {errorText[:200]}"}
- except Exception as e:
- return {"error": str(e)}
+ return await _http.getJson(url, headers=headers)
def _raiseGoogleError(result: Dict[str, Any], ctx: str) -> None:
@@ -128,37 +121,33 @@ class DriveAdapter(ServiceAdapter):
if not fileId:
return b""
headers = {"Authorization": f"Bearer {self._token}"}
- timeout = aiohttp.ClientTimeout(total=60)
+ dlTimeout = aiohttp.ClientTimeout(total=60)
try:
- async with aiohttp.ClientSession(timeout=timeout) as session:
- # Try direct download first
- url = f"{_DRIVE_BASE}/files/{fileId}?alt=media"
- async with session.get(url, headers=headers) as resp:
- if resp.status == 200:
- return await resp.read()
- logger.debug(f"Google Drive direct download returned {resp.status} for {fileId}")
+ url = f"{_DRIVE_BASE}/files/{fileId}?alt=media"
+ data = await _http.getBytes(url, headers=headers, timeout=dlTimeout)
+ if data is not None:
+ return data
+ logger.debug(f"Google Drive direct download returned None for {fileId}")
- # If 403/404, check if it's a native Google file that needs export
- metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name"
- async with session.get(metaUrl, headers=headers) as metaResp:
- if metaResp.status != 200:
- logger.warning(f"Google Drive metadata fetch failed ({metaResp.status}) for {fileId}")
- return b""
- meta = await metaResp.json()
- fileMime = meta.get("mimeType", "")
- fileName = meta.get("name", fileId)
+ metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name"
+ meta = await _http.getJson(metaUrl, headers=headers)
+ if "error" in meta:
+ logger.warning(f"Google Drive metadata fetch failed for {fileId}: {meta['error']}")
+ return b""
+ fileMime = meta.get("mimeType", "")
+ fileName = meta.get("name", fileId)
- exportMime = self._EXPORT_MIME_MAP.get(fileMime)
- if not exportMime:
- logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})")
- return b""
+ exportMime = self._EXPORT_MIME_MAP.get(fileMime)
+ if not exportMime:
+ logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})")
+ return b""
- exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}"
- logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}")
- async with session.get(exportUrl, headers=headers) as exportResp:
- if exportResp.status == 200:
- return await exportResp.read()
- logger.warning(f"Google Drive export failed ({exportResp.status}) for '{fileName}'")
+ exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}"
+ logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}")
+ exported = await _http.getBytes(exportUrl, headers=headers, timeout=dlTimeout)
+ if exported is not None:
+ return exported
+ logger.warning(f"Google Drive export failed for '{fileName}'")
except Exception as e:
logger.error(f"Google Drive download failed for {fileId}: {e}")
return b""
diff --git a/modules/connectors/providerInfomaniak/connectorInfomaniak.py b/modules/connectors/providerInfomaniak/connectorInfomaniak.py
index 94a54860..dfdc8bab 100644
--- a/modules/connectors/providerInfomaniak/connectorInfomaniak.py
+++ b/modules/connectors/providerInfomaniak/connectorInfomaniak.py
@@ -44,10 +44,13 @@ from modules.connectors.connectorProviderBase import (
ServiceAdapter,
DownloadResult,
)
+from modules.connectors._httpResilience import ResilientHttp
from modules.datamodels.datamodelDataSource import ExternalEntry
logger = logging.getLogger(__name__)
+_http = ResilientHttp("Infomaniak", maxConcurrent=6, defaultTimeoutS=20)
+
_API_BASE = "https://api.infomaniak.com"
_CALENDAR_BASE = "https://calendar.infomaniak.com"
_CONTACTS_BASE = "https://contacts.infomaniak.com"
@@ -82,18 +85,7 @@ async def _infomaniakGet(
"""
url = f"{baseUrl.rstrip('/')}/{endpoint.lstrip('/')}"
headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
- timeout = aiohttp.ClientTimeout(total=20)
- try:
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.get(url, headers=headers, allow_redirects=False) as resp:
- if resp.status in (200, 201):
- return await resp.json()
- errorText = await resp.text()
- logger.warning(f"Infomaniak GET {url} -> {resp.status}: {errorText[:300]}")
- return {"error": f"{resp.status}: {errorText[:200]}"}
- except Exception as e:
- logger.error(f"Infomaniak GET {url} crashed: {e}")
- return {"error": str(e)}
+ return await _http.getJson(url, headers=headers, allowRedirects=False)
def _raiseInfomaniakError(result: Dict[str, Any], ctx: str) -> None:
@@ -124,20 +116,7 @@ async def _infomaniakDownload(
"""
url = f"{baseUrl.rstrip('/')}/{endpoint.lstrip('/')}"
headers = {"Authorization": f"Bearer {token}"}
- timeout = aiohttp.ClientTimeout(total=120)
- try:
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.get(url, headers=headers, allow_redirects=True) as resp:
- if resp.status == 200:
- return await resp.read()
- logger.warning(
- f"Infomaniak download {url} -> {resp.status}: "
- f"{(await resp.text())[:300]}"
- )
- return None
- except Exception as e:
- logger.error(f"Infomaniak download {url} crashed: {e}")
- return None
+ return await _http.getBytes(url, headers=headers, timeout=aiohttp.ClientTimeout(total=120))
def _unwrapData(payload: Any) -> Any:
diff --git a/modules/connectors/providerMsft/connectorMsft.py b/modules/connectors/providerMsft/connectorMsft.py
index 24f6e25c..0830e6ed 100644
--- a/modules/connectors/providerMsft/connectorMsft.py
+++ b/modules/connectors/providerMsft/connectorMsft.py
@@ -13,11 +13,13 @@ import urllib.parse
from typing import Dict, Any, List, Optional
from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult
+from modules.connectors._httpResilience import ResilientHttp
from modules.datamodels.datamodelDataSource import ExternalEntry
logger = logging.getLogger(__name__)
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
+_http = ResilientHttp("Graph", maxConcurrent=10, defaultTimeoutS=30)
class _GraphApiMixin:
@@ -44,24 +46,14 @@ class _GraphApiMixin:
async def _graphDownload(self, endpoint: str) -> Optional[bytes]:
"""Download binary content from Graph API."""
headers = {"Authorization": f"Bearer {self._accessToken}"}
- timeout = aiohttp.ClientTimeout(total=60)
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
- try:
- async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.get(url, headers=headers) as resp:
- if resp.status == 200:
- return await resp.read()
- logger.error(f"Download failed {resp.status}: {await resp.text()}")
- return None
- except Exception as e:
- logger.error(f"Graph download error: {e}")
- return None
+ return await _http.getBytes(url, headers=headers, timeout=aiohttp.ClientTimeout(total=60))
async def _makeGraphCall(
token: str, endpoint: str, method: str = "GET", data: Any = None
) -> Dict[str, Any]:
- """Execute a single Microsoft Graph API call."""
+ """Execute a single Microsoft Graph API call via shared resilient HTTP client."""
url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}"
contentType = "application/json; charset=utf-8"
if method == "PUT" and isinstance(data, bytes):
@@ -72,37 +64,7 @@ async def _makeGraphCall(
}
if "$count=true" in endpoint:
headers["ConsistencyLevel"] = "eventual"
- timeout = aiohttp.ClientTimeout(total=30)
- try:
- async with aiohttp.ClientSession(timeout=timeout) as session:
- kwargs: Dict[str, Any] = {"headers": headers}
- if data is not None:
- kwargs["data"] = data
-
- if method == "GET":
- async with session.get(url, **kwargs) as resp:
- return await _handleResponse(resp)
- elif method == "POST":
- async with session.post(url, **kwargs) as resp:
- return await _handleResponse(resp)
- elif method == "PUT":
- async with session.put(url, **kwargs) as resp:
- return await _handleResponse(resp)
- elif method == "PATCH":
- async with session.patch(url, **kwargs) as resp:
- return await _handleResponse(resp)
- elif method == "DELETE":
- async with session.delete(url, **kwargs) as resp:
- if resp.status in (200, 204):
- return {}
- return await _handleResponse(resp)
-
- except asyncio.TimeoutError:
- return {"error": f"Graph API timeout: {endpoint}"}
- except Exception as e:
- return {"error": f"Graph API error: {e}"}
-
- return {"error": f"Unsupported method: {method}"}
+ return await _http.request(method, url, headers=headers, data=data)
async def _handleResponse(resp: aiohttp.ClientResponse) -> Dict[str, Any]:
diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py
index 41903211..b33aaf74 100644
--- a/modules/features/trustee/mainTrustee.py
+++ b/modules/features/trustee/mainTrustee.py
@@ -494,7 +494,7 @@ TEMPLATE_WORKFLOWS = [
"closingBalance in accountSummary ist bereits der korrekte Ist-Wert.\n\n"
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
- "Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
+ "Hinweis: Wenn du ein Dokument erstellst, "
"verwende einen professionellen Finanz-Stil:\n"
"- Schriftart: Calibri\n"
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
@@ -504,7 +504,6 @@ TEMPLATE_WORKFLOWS = [
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
),
"resultType": "xlsx",
- "documentTheme": "finance",
"requireNeutralization": False,
"documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]},
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index dcf819cc..2d13439c 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -141,6 +141,18 @@ class AiObjects:
_MAX_SHORT_RETRY = 15.0
+ # Pre-flight guard: reject obviously oversized payloads before entering the failover loop
+ estimatedTokens = (len(prompt or "") + len(context or "")) // 3
+ bestContextLength = max((m.contextLength for m in failoverModelList if m.contextLength > 0), default=0)
+ if bestContextLength > 0 and estimatedTokens > bestContextLength * 0.95:
+ errorMsg = (f"Input too large for all available models: ~{estimatedTokens} estimated tokens "
+ f"vs best model context {bestContextLength}. Use chunking pipeline instead.")
+ logger.error(errorMsg)
+ return AiCallResponse(
+ content=errorMsg, modelName="error", priceCHF=0.0,
+ processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
+ )
+
lastError = None
for attempt, model in enumerate(failoverModelList):
try:
diff --git a/modules/serviceCenter/core/serviceStreaming/eventManager.py b/modules/serviceCenter/core/serviceStreaming/eventManager.py
index 00e29f17..bc1fb3c6 100644
--- a/modules/serviceCenter/core/serviceStreaming/eventManager.py
+++ b/modules/serviceCenter/core/serviceStreaming/eventManager.py
@@ -181,6 +181,19 @@ class EventManager:
task = asyncio.create_task(_cleanup())
self._cleanup_tasks[workflow_id] = task
+ def shutdown(self) -> None:
+ """Cancel all pending cleanup and agent tasks for fast process exit."""
+ for wfId, task in list(self._cleanup_tasks.items()):
+ if not task.done():
+ task.cancel()
+ self._cleanup_tasks.clear()
+ for wfId, task in list(self._agent_tasks.items()):
+ if not task.done():
+ task.cancel()
+ self._agent_tasks.clear()
+ self._queues.clear()
+ logger.info("EventManager shutdown: all tasks cancelled")
+
# Global event manager instance
_event_manager: Optional[EventManager] = None
diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
index 5d377bc7..c7e292e2 100644
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
@@ -224,7 +224,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
title=title,
userPrompt=content,
style=args.get("style"),
- documentTheme=args.get("documentTheme"),
imageResolver=_imageBytesResolver if lazyBlockImages else None,
)
@@ -315,17 +314,6 @@ def _registerMediaTools(registry: ToolRegistry, services):
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
"title": {"type": "string", "description": "Document title", "default": "Document"},
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
- "documentTheme": {
- "type": "string",
- "enum": ["general", "finance", "legal", "technical", "hr", "marketing"],
- "description": (
- "Named style preset applied by the renderer (colors, fonts, spacing). "
- "Pick the one that matches the document purpose: 'legal' for serif/justified "
- "legal filings, 'marketing' for bold image-friendly layouts, 'finance', "
- "'technical', 'hr', or 'general' (default). The explicit 'style' object, if "
- "provided, overrides individual preset keys."
- ),
- },
"style": {
"type": "object",
"description": (
diff --git a/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py b/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py
index 2fbe9c34..4c747e64 100644
--- a/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py
+++ b/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py
@@ -112,6 +112,50 @@ def _makeReadFile(services):
return readFile
+_MAX_FILE_BYTES = 50_000_000 # 50 MB safety limit
+
+
+def _makeReadFileBytes(services):
+ """Create a readFileBytes(fileId) closure for binary file access in the sandbox."""
+ def readFileBytes(fileId: str) -> bytes:
+ mgmt = getattr(services, 'interfaceDbComponent', None) if services else None
+ if not mgmt:
+ raise RuntimeError("readFileBytes: no file store available in this session")
+ data = mgmt.getFileData(str(fileId))
+ if data is None:
+ raise FileNotFoundError(f"File '{fileId}' not found in workspace")
+ if len(data) > _MAX_FILE_BYTES:
+ raise ValueError(f"File too large for sandbox analysis ({len(data)} bytes, limit {_MAX_FILE_BYTES})")
+ return data
+ return readFileBytes
+
+
+class SafeZipFile:
+ """Read-only in-memory ZIP analysis wrapper for the sandbox.
+ Does not expose extract/write -- only namelist, infolist, and in-memory read."""
+
+ def __init__(self, data: bytes):
+ import zipfile as _zf
+ self._zf = _zf.ZipFile(io.BytesIO(data), 'r')
+
+ def namelist(self):
+ return self._zf.namelist()
+
+ def infolist(self):
+ return [{"filename": i.filename, "file_size": i.file_size,
+ "compress_size": i.compress_size, "date_time": i.date_time}
+ for i in self._zf.infolist()]
+
+ def read(self, name: str) -> bytes:
+ return self._zf.read(name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self._zf.close()
+
+
async def executePython(code: str, *, services=None) -> Dict[str, Any]:
"""Execute Python code in a restricted sandbox. Returns {success, output, error}."""
import asyncio
@@ -120,8 +164,10 @@ async def executePython(code: str, *, services=None) -> Dict[str, Any]:
restrictedGlobals = _buildRestrictedGlobals()
vfs = _VirtualFS()
restrictedGlobals["__builtins__"]["open"] = vfs.open
+ restrictedGlobals["__builtins__"]["SafeZipFile"] = SafeZipFile
if services:
restrictedGlobals["__builtins__"]["readFile"] = _makeReadFile(services)
+ restrictedGlobals["__builtins__"]["readFileBytes"] = _makeReadFileBytes(services)
capturedOutput = io.StringIO()
oldStdout = sys.stdout
oldStderr = sys.stderr
diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
index d5054921..afbde59a 100644
--- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py
+++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py
@@ -1680,8 +1680,7 @@ Respond with ONLY a JSON object in this exact format:
language: str,
title: str,
userPrompt: str,
- parentOperationId: str,
- documentTheme: Optional[str] = None
+ parentOperationId: str
) -> List[RenderedDocument]:
"""
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
@@ -1733,8 +1732,7 @@ Respond with ONLY a JSON object in this exact format:
title,
userPrompt,
self,
- parentOperationId=renderOperationId, # Parent-Referenz für ChatLog-Hierarchie
- documentTheme=documentTheme
+ parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
)
# ChatLog abschließen
@@ -1776,8 +1774,7 @@ Respond with ONLY a JSON object in this exact format:
outputFormat: Optional[str] = None,
title: Optional[str] = None,
parentOperationId: Optional[str] = None,
- generationIntent: Optional[str] = None, # NEW: Explicit intent from action (skips detection)
- documentTheme: Optional[str] = None # Named style preset for document rendering
+ generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection)
) -> AiResponse:
"""
Unified AI content generation with explicit intent requirement.
@@ -1796,8 +1793,6 @@ Respond with ONLY a JSON object in this exact format:
parentOperationId: Optional parent operation ID for hierarchical logging
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
NO auto-detection - actions must explicitly specify intent.
- documentTheme: Optional named style preset (general/finance/legal/technical/
- hr/marketing) forwarded to the renderer for document generation.
Returns:
AiResponse with content, metadata, and optional documents
@@ -1868,8 +1863,7 @@ Respond with ONLY a JSON object in this exact format:
contentParts=contentParts,
outputFormat=outputFormat,
title=title,
- parentOperationId=parentOperationId,
- documentTheme=documentTheme
+ parentOperationId=parentOperationId
)
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
@@ -2085,8 +2079,7 @@ Respond with ONLY a JSON object in this exact format:
contentParts: Optional[List[ContentPart]],
outputFormat: str,
title: str,
- parentOperationId: Optional[str],
- documentTheme: Optional[str] = None
+ parentOperationId: Optional[str]
) -> AiResponse:
"""Handle document generation using document generation path."""
from modules.serviceCenter.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
@@ -2103,8 +2096,7 @@ Respond with ONLY a JSON object in this exact format:
contentParts=contentParts,
outputFormat=outputFormat,
title=title or "Generated Document",
- parentOperationId=parentOperationId,
- documentTheme=documentTheme
+ parentOperationId=parentOperationId
)
diff --git a/modules/serviceCenter/services/serviceAi/subStructureFilling.py b/modules/serviceCenter/services/serviceAi/subStructureFilling.py
index dbf289fd..2baf0a84 100644
--- a/modules/serviceCenter/services/serviceAi/subStructureFilling.py
+++ b/modules/serviceCenter/services/serviceAi/subStructureFilling.py
@@ -27,6 +27,21 @@ class _AiResponseFallback:
logger = logging.getLogger(__name__)
+def _normalizeImageElement(element: Dict[str, Any]) -> None:
+ """Ensure image element has nested content dict.
+ AI sometimes returns flat keys (base64Data, altText, ...) at the top level.
+ All renderers expect element['content'] to be a dict with those keys."""
+ if "content" in element and isinstance(element.get("content"), dict):
+ return
+ element["content"] = {
+ "base64Data": element.pop("base64Data", ""),
+ "altText": element.pop("altText", ""),
+ "caption": element.pop("caption", ""),
+ "mimeType": element.pop("mimeType", "image/png"),
+ "fileName": element.pop("fileName", element.get("id", "image") + ".png"),
+ }
+
+
def _elements_from_section_content_ai_json(parsed: Any) -> List[Any]:
"""Normalize section_content AI JSON (incl. models that return {\"text\": ...}) into elements."""
from modules.serviceCenter.services.serviceAi.subLoopingUseCases import _normalizeSectionContentJson
@@ -494,14 +509,18 @@ class StructureFiller:
try:
jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content))
if isinstance(jsonContent, dict) and jsonContent.get("type") == "image":
+ _normalizeImageElement(jsonContent)
elements.append(jsonContent)
logger.debug("AI returned proper JSON image structure")
- base64Data = None # Signal that image was already processed
+ base64Data = None
elif isinstance(jsonContent, list) and len(jsonContent) > 0:
if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image":
+ for item in jsonContent:
+ if isinstance(item, dict) and item.get("type") == "image":
+ _normalizeImageElement(item)
elements.extend(jsonContent)
logger.debug("AI returned proper JSON image structure in list")
- base64Data = None # Signal that image was already processed
+ base64Data = None
else:
base64Data = "" # Continue with normal processing
else:
diff --git a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
index 1ffb8557..8747c552 100644
--- a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py
@@ -1309,6 +1309,7 @@ class ExtractionService:
Calls aiObjects._callWithModel() for actual AI calls.
"""
lastError = None
+ _VISION_API_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp"}
# Check if this is an image - Vision models need special handling
isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/"))
@@ -1316,6 +1317,18 @@ class ExtractionService:
# Determine the correct operation type based on content type
actualOperationType = options.operationType
if isImage:
+ resolvedMime = contentPart.mimeType or "image/jpeg"
+ if resolvedMime not in _VISION_API_TYPES and contentPart.data:
+ resolvedMime = _sniffImageMime(contentPart.data) or resolvedMime
+ if resolvedMime not in _VISION_API_TYPES:
+ logger.info(f"Skipping unsupported image type '{resolvedMime}' (supported: {', '.join(sorted(_VISION_API_TYPES))})")
+ return AiCallResponse(
+ content=f"[Image skipped: unsupported format {resolvedMime}]",
+ modelName="skipped", priceCHF=0.0,
+ processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=0,
+ )
+ contentPart.mimeType = resolvedMime
+
actualOperationType = OperationTypeEnum.IMAGE_ANALYSE
# Get vision-capable models for images
availableModels = modelRegistry.getAvailableModels()
@@ -1805,6 +1818,24 @@ class ExtractionService:
)
+def _sniffImageMime(data) -> Optional[str]:
+ """Detect image format from magic bytes. Returns None if unrecognised."""
+ import base64 as _b64
+ try:
+ raw = data if isinstance(data, bytes) else _b64.b64decode(data[:32])
+ if raw[:3] == b"\xff\xd8\xff":
+ return "image/jpeg"
+ if raw[:8] == b"\x89PNG\r\n\x1a\n":
+ return "image/png"
+ if raw[:4] == b"GIF8":
+ return "image/gif"
+ if raw[:4] == b"RIFF" and len(raw) >= 12 and raw[8:12] == b"WEBP":
+ return "image/webp"
+ except Exception:
+ pass
+ return None
+
+
# Module-level function for use by subPipeline and ExtractionService
def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]:
"""Apply merging strategy to parts with intelligent token-aware merging.
diff --git a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py
index 5bf86aee..dbbe61c3 100644
--- a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py
@@ -14,7 +14,7 @@ from .subDocumentUtility import (
detectMimeTypeFromData,
convertDocumentDataToString
)
-from .styleDefaults import resolveStyle
+from .styleDefaults import resolveStyle, deepMerge
logger = logging.getLogger(__name__)
@@ -383,7 +383,80 @@ class GenerationService:
'workflowId': 'unknown'
}
- async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None, documentTheme: Optional[str] = None, imageResolver=None) -> List[RenderedDocument]:
+ async def _enhanceStyleWithAi(self, resolvedStyle: Dict[str, Any], extractedContent: Dict[str, Any], userPrompt: str | None, aiService) -> Dict[str, Any]:
+ """Let AI refine the resolved style based on document content and context.
+
+ Returns the enhanced style dict, or the original on failure.
+ """
+ try:
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+ import json as _json, re as _re
+
+ metadata = extractedContent.get("metadata", {}) if isinstance(extractedContent, dict) else {}
+ docTitle = metadata.get("title", "") if isinstance(metadata, dict) else ""
+ docType = metadata.get("documentType", "") if isinstance(metadata, dict) else ""
+ userHint = (userPrompt or "")[:300]
+
+ styleJson = _json.dumps(resolvedStyle, indent=2, default=str)
+
+ prompt = (
+ "You are a document styling expert. Given the document context below, "
+ "return a JSON delta object containing ONLY the style properties you want to change "
+ "from the current defaults. Return {} if no changes are needed.\n\n"
+ f"Document title: {docTitle}\n"
+ f"Document type: {docType}\n"
+ f"User request (excerpt): {userHint}\n\n"
+ f"Current style (full schema):\n{styleJson}\n\n"
+ "You may adjust any property: fonts (primary/monospace), colors, "
+ "documentTitle (size, alignment), headings (h1-h4 sizes, colors, spacing), "
+ "paragraph (size, lineSpacing, alignment e.g. justified), "
+ "table (header colors, banding, borders, cell padding), "
+ "list (bullet character, indent), image (default width, alignment), "
+ "codeBlock (font size, background, border), "
+ "coverPage (title/subtitle sizes and colors), "
+ "caption (size, color, italic), "
+ "page (format, margins, header/footer).\n"
+ "Match the document's purpose and tone. Examples: a legal document should use "
+ "serif fonts and justified text; a financial report conservative colors; "
+ "a marketing brochure bold colors and generous spacing.\n"
+ "Return ONLY a valid JSON object (no markdown fences, no explanation)."
+ )
+
+ options = AiCallOptions()
+ options.operationType = OperationTypeEnum.DATA_GENERATE
+ request = AiCallRequest(prompt=prompt, context="", options=options)
+ response = await aiService.callAi(request)
+
+ raw = (response.content or "").strip() if response else ""
+ if not raw:
+ return resolvedStyle
+
+ jsonMatch = _re.search(r'```json\s*\n(.*?)\n```', raw, _re.DOTALL)
+ if jsonMatch:
+ raw = jsonMatch.group(1).strip()
+ elif raw.startswith('```'):
+ raw = _re.sub(r'^```\w*\s*', '', raw)
+ raw = _re.sub(r'\s*```$', '', raw)
+
+ jsonStart = raw.find('{')
+ jsonEnd = raw.rfind('}')
+ if jsonStart == -1 or jsonEnd == -1:
+ return resolvedStyle
+ raw = raw[jsonStart:jsonEnd + 1]
+
+ delta = _json.loads(raw)
+ if not isinstance(delta, dict) or not delta:
+ return resolvedStyle
+
+ enhanced = deepMerge(resolvedStyle, delta)
+ logger.info("AI style enhancement applied %d top-level key(s)", len(delta))
+ return enhanced
+
+ except Exception as exc:
+ logger.warning("AI style enhancement failed, using base style: %s", exc)
+ return resolvedStyle
+
+ async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None, imageResolver=None) -> List[RenderedDocument]:
"""
Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each.
@@ -401,9 +474,6 @@ class GenerationService:
aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
- documentTheme: Optional named theme preset (general/finance/legal/
- technical/hr/marketing). Resolved as DEFAULT_STYLE <- preset <- style,
- so an explicit ``style`` override always wins.
imageResolver: Optional callable ``fileId -> bytes`` for lazy, on-demand
resolution of block images that carry only a ``fileId`` (no embedded
base64). Lets large documents avoid holding every image's bytes in
@@ -414,7 +484,9 @@ class GenerationService:
Each RenderedDocument represents one rendered file (main document or supporting file)
"""
try:
- resolvedStyle = resolveStyle(style, documentTheme)
+ resolvedStyle = resolveStyle(style)
+ if aiService:
+ resolvedStyle = await self._enhanceStyleWithAi(resolvedStyle, extractedContent, userPrompt, aiService)
# Validate JSON input
if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary")
diff --git a/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py b/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py
index f7d146f5..4fc6c9d5 100644
--- a/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py
+++ b/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py
@@ -34,8 +34,7 @@ class DocumentGenerationPath:
contentParts: Optional[List[ContentPart]] = None,
outputFormat: str = "txt",
title: Optional[str] = None,
- parentOperationId: Optional[str] = None,
- documentTheme: Optional[str] = None
+ parentOperationId: Optional[str] = None
) -> AiResponse:
"""
Generate document using existing chapter/section model.
@@ -166,8 +165,7 @@ class DocumentGenerationPath:
language, # Global fallback (per-document language extracted from structure in renderReport)
title or "Generated Document",
userPrompt,
- docOperationId,
- documentTheme=documentTheme
+ docOperationId
)
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py
index 35401f27..61eadee7 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py
@@ -154,8 +154,12 @@ class BaseRenderer(ABC):
para = style["paragraph"]
lst = style["list"]
cb = style["codeBlock"]
+ fonts = style.get("fonts") if isinstance(style.get("fonts"), dict) else {}
colors = style.get("colors") if isinstance(style.get("colors"), dict) else {}
- primaryColor = colors.get("primary", "#1F3864")
+ primaryColor = colors.get("primary", "#24292e")
+ secondaryColor = colors.get("secondary", "#586069")
+ accentColor = colors.get("accent", "#0366d6")
+ bgColor = colors.get("background", "#FFFFFF")
rawDocTitle = style.get("documentTitle")
docTitle = rawDocTitle if isinstance(rawDocTitle, dict) else {}
titleSizePt = docTitle.get("sizePt")
@@ -168,6 +172,10 @@ class BaseRenderer(ABC):
titleAlign = "center"
titleSpaceBefore = docTitle.get("spaceBeforePt", 0)
titleSpaceAfter = docTitle.get("spaceAfterPt", 18)
+ img = style.get("image") if isinstance(style.get("image"), dict) else {}
+ page = style.get("page") if isinstance(style.get("page"), dict) else {}
+ cover = style.get("coverPage") if isinstance(style.get("coverPage"), dict) else {}
+ caption = style.get("caption") if isinstance(style.get("caption"), dict) else {}
return {
"title": {
"font_size": titleSizePt,
@@ -179,56 +187,169 @@ class BaseRenderer(ABC):
},
"heading1": {
"font_size": h1["sizePt"], "color": h1["color"],
- "bold": h1.get("weight") == "bold", "align": "left",
+ "bold": h1.get("weight") == "bold", "align": h1.get("align", "left"),
"space_before": h1.get("spaceBeforePt", 24),
"space_after": h1.get("spaceAfterPt", 8),
},
"heading2": {
"font_size": h2["sizePt"], "color": h2["color"],
- "bold": h2.get("weight") == "bold", "align": "left",
+ "bold": h2.get("weight") == "bold", "align": h2.get("align", "left"),
"space_before": h2.get("spaceBeforePt", 20),
"space_after": h2.get("spaceAfterPt", 6),
},
"heading3": {
"font_size": h3["sizePt"], "color": h3["color"],
- "bold": h3.get("weight") == "bold", "align": "left",
+ "bold": h3.get("weight") == "bold", "align": h3.get("align", "left"),
"space_before": h3.get("spaceBeforePt", 16),
"space_after": h3.get("spaceAfterPt", 4),
},
"heading4": {
"font_size": h4["sizePt"], "color": h4["color"],
- "bold": h4.get("weight") == "bold", "align": "left",
+ "bold": h4.get("weight") == "bold", "align": h4.get("align", "left"),
"space_before": h4.get("spaceBeforePt", 12),
"space_after": h4.get("spaceAfterPt", 3),
},
"paragraph": {
"font_size": para["sizePt"], "color": para["color"],
- "bold": False, "align": "left",
- "line_height": para.get("lineSpacing", 1.15),
+ "bold": False, "align": para.get("align", "left"),
+ "line_height": para.get("lineSpacing", 1.5),
},
"table_header": {
"background": tbl["headerBg"], "text_color": tbl["headerFg"],
+ "font_size": tbl.get("headerSizePt", 10),
"bold": True, "align": "center",
},
"table_cell": {
"background": tbl["rowBandingOdd"], "text_color": para["color"],
+ "font_size": tbl.get("bodySizePt", 10),
"bold": False, "align": "left",
},
"table_border": {
- "style": "grid", "color": tbl["borderColor"],
+ "style": tbl.get("borderStyle", "grid"),
+ "color": tbl["borderColor"],
+ "width": tbl.get("borderWidthPt", 0.5),
},
+ "table_banding": {
+ "enabled": tbl.get("bandingEnabled", True),
+ "even": tbl.get("rowBandingEven", "#f6f8fa"),
+ "odd": tbl.get("rowBandingOdd", "#FFFFFF"),
+ },
+ "table_padding": tbl.get("cellPaddingPt", 4),
"bullet_list": {
"font_size": lst["sizePt"], "color": para["color"],
"indent": lst["indentPt"],
"bullet_char": lst.get("bulletChar", "\u2022"),
},
"code_block": {
- "font": style["fonts"]["monospace"],
+ "font": fonts.get("monospace", "Consolas"),
"font_size": cb["fontSizePt"], "color": para["color"],
"background": cb["background"],
+ "border_color": cb.get("borderColor", "#e1e4e8"),
+ },
+ "fonts": {
+ "primary": fonts.get("primary", "Calibri"),
+ "monospace": fonts.get("monospace", "Consolas"),
+ },
+ "colors": {
+ "primary": primaryColor,
+ "secondary": secondaryColor,
+ "accent": accentColor,
+ "background": bgColor,
+ },
+ "image": {
+ "default_width": img.get("defaultWidthPt", 480),
+ "max_width": img.get("maxWidthPt", 800),
+ "alignment": img.get("alignment", "center"),
+ },
+ "page": {
+ "format": page.get("format", "A4"),
+ "margins": page.get("marginsPt", {"top": 60, "bottom": 60, "left": 60, "right": 60}),
+ "show_page_numbers": page.get("showPageNumbers", True),
+ "header_height": page.get("headerHeight", 30),
+ "footer_height": page.get("footerHeight", 30),
+ "header_logo": page.get("headerLogo"),
+ "header_text": page.get("headerText", ""),
+ "footer_text": page.get("footerText", ""),
+ },
+ "cover_page": {
+ "title_size": cover.get("titleSizePt", 28),
+ "subtitle_size": cover.get("subtitleSizePt", 16),
+ "author_size": cover.get("authorSizePt", 12),
+ "date_size": cover.get("dateSizePt", 12),
+ "title_color": cover.get("titleColor", primaryColor),
+ "subtitle_color": cover.get("subtitleColor", secondaryColor),
+ },
+ "caption": {
+ "font_size": caption.get("sizePt", 10),
+ "color": caption.get("color", secondaryColor),
+ "italic": caption.get("italic", True),
+ "align": caption.get("align", "center"),
},
}
+ @staticmethod
+ def _looksNumeric(values: list) -> bool:
+ """Return True if most non-empty values in the column look numeric."""
+ numCount = 0
+ total = 0
+ for v in values:
+ text = str(v).strip() if v is not None else ""
+ if not text:
+ continue
+ total += 1
+ cleaned = text.replace(",", "").replace("'", "").replace(" ", "")
+ cleaned = cleaned.lstrip("$€£CHF").rstrip("%")
+ try:
+ float(cleaned)
+ numCount += 1
+ except ValueError:
+ pass
+ return total > 0 and numCount / total >= 0.6
+
+ @staticmethod
+ def _looksDate(values: list) -> bool:
+ """Return True if most non-empty values look like dates."""
+ dateCount = 0
+ total = 0
+ datePattern = re.compile(r"^\d{1,4}[./-]\d{1,2}[./-]\d{1,4}$")
+ for v in values:
+ text = str(v).strip() if v is not None else ""
+ if not text:
+ continue
+ total += 1
+ if datePattern.match(text):
+ dateCount += 1
+ return total > 0 and dateCount / total >= 0.6
+
+ def _inferColumnAlignments(self, headers: list, rows: list, tableStyle: dict | None = None) -> list:
+ """Infer per-column alignments from explicit tableStyle or data heuristic.
+
+ Returns a list of ``"left"``/``"center"``/``"right"`` strings, one per column.
+ """
+ colCount = len(headers) if headers else (len(rows[0]) if rows else 0)
+ if not colCount:
+ return []
+ if tableStyle and tableStyle.get("columnAlignments"):
+ explicit = tableStyle["columnAlignments"]
+ if isinstance(explicit, list) and len(explicit) >= colCount:
+ return [a if a in ("left", "center", "right") else "left" for a in explicit[:colCount]]
+ alignments = []
+ for colIdx in range(colCount):
+ colValues = []
+ for row in rows:
+ if colIdx < len(row):
+ cell = row[colIdx]
+ if isinstance(cell, list):
+ cell = "".join(r.get("value", "") for r in cell if isinstance(r, dict))
+ colValues.append(cell)
+ if self._looksNumeric(colValues):
+ alignments.append("right")
+ elif self._looksDate(colValues):
+ alignments.append("center")
+ else:
+ alignments.append("left")
+ return alignments
+
@staticmethod
def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any:
"""Extract inline runs from new-format content, falling back to old format.
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py
index 37d191e1..9d7b24ff 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py
@@ -10,13 +10,11 @@ from typing import Dict, Any, List, Optional
import io
import base64
import re
-import csv
try:
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
- from docx.enum.table import WD_TABLE_ALIGNMENT
DOCX_AVAILABLE = True
except ImportError:
DOCX_AVAILABLE = False
@@ -122,18 +120,12 @@ class RendererDocx(BaseRenderer):
doc = Document()
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
- # Phase 3: prefer unified style when provided
+ # Apply unified style (always provided by mainServiceGeneration.renderReport)
style_start = time.time()
self.logger.debug("_generateDocxFromJson: About to get style set")
- if unifiedStyle:
- styleSet = self._convertUnifiedStyleToInternal(unifiedStyle)
- self._unifiedStyle = unifiedStyle
- else:
- template_from_metadata = None
- if json_content and isinstance(json_content.get("metadata"), dict):
- template_from_metadata = json_content["metadata"].get("templateName")
- styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
- self._unifiedStyle = None
+ styleSet = self._convertUnifiedStyleToInternal(unifiedStyle)
+ self._unifiedStyle = unifiedStyle
+ self._styleSet = styleSet
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
# Setup basic document styles and create all styles from style set
@@ -201,108 +193,6 @@ class RendererDocx(BaseRenderer):
self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
raise Exception(f"DOCX generation failed: {str(e)}")
- async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
- """Get style set - use styles from document generation metadata if available,
- otherwise enhance default styles with AI if userPrompt provided.
-
- WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
- not be generated separately by renderers. Only fall back to AI if styles not provided.
-
- Args:
- extractedContent: Document content with metadata (may contain styles)
- userPrompt: User's prompt (AI will detect style instructions in any language)
- aiService: AI service (used only if styles not in metadata and userPrompt provided)
- templateName: Name of template style set (None = default)
-
- Returns:
- Dict with style definitions for all document styles
- """
- # Get default style set
- if templateName == "corporate":
- defaultStyleSet = self._getCorporateStyleSet()
- elif templateName == "minimal":
- defaultStyleSet = self._getMinimalStyleSet()
- else:
- defaultStyleSet = self._getDefaultStyleSet()
-
- # FIRST: Check if styles are provided in document generation metadata (preferred approach)
- if extractedContent:
- metadata = extractedContent.get("metadata", {})
- if isinstance(metadata, dict):
- styles = metadata.get("styles")
- if styles and isinstance(styles, dict):
- self.logger.debug("Using styles from document generation metadata")
- return self._validateStylesContrast(styles)
-
- # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
- if userPrompt and aiService:
- self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
- enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
- return self._validateStylesContrast(enhancedStyleSet)
- else:
- # Use default styles only
- return defaultStyleSet
-
- async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
- """Enhance default styles with AI based on user prompt."""
- try:
- style_template = self._createAiStyleTemplate("docx", userPrompt, defaultStyleSet)
- enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
- return enhanced_styles
- except Exception as e:
- self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
- return defaultStyleSet
-
- def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
- """Validate and fix contrast issues in AI-generated styles."""
- try:
- # Fix table header contrast
- if "table_header" in styles:
- header = styles["table_header"]
- bg_color = header.get("background", "#FFFFFF")
- text_color = header.get("text_color", "#000000")
-
- # If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
- header["background"] = "#4F4F4F"
- header["text_color"] = "#FFFFFF"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
- header["background"] = "#4F4F4F"
- header["text_color"] = "#FFFFFF"
-
- # Fix table cell contrast
- if "table_cell" in styles:
- cell = styles["table_cell"]
- bg_color = cell.get("background", "#FFFFFF")
- text_color = cell.get("text_color", "#000000")
-
- # If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
- cell["background"] = "#FFFFFF"
- cell["text_color"] = "#2F2F2F"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
- cell["background"] = "#FFFFFF"
- cell["text_color"] = "#2F2F2F"
-
- return styles
-
- except Exception as e:
- self.logger.warning(f"Style validation failed: {str(e)}")
- return self._getDefaultStyleSet()
-
- def _getDefaultStyleSet(self) -> Dict[str, Any]:
- """Default DOCX style set - used when no style instructions present."""
- return {
- "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "left"},
- "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
- "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
- "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
- "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
- "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
- "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
- "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
- "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
- }
def _setupBasicDocumentStyles(self, doc: Document) -> None:
"""Set up basic document styles."""
@@ -318,21 +208,6 @@ class RendererDocx(BaseRenderer):
- def _clearTemplateContent(self, doc: Document) -> None:
- """Clear template content while preserving styles."""
- try:
- # Remove all paragraphs except keep the styles
- for paragraph in list(doc.paragraphs):
- # Keep the paragraph but clear its content
- paragraph.clear()
-
- # Remove all tables
- for table in list(doc.tables):
- table._element.getparent().remove(table._element)
-
- except Exception as e:
- self.logger.warning(f"Could not clear template content: {str(e)}")
-
def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a single JSON section to DOCX using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
@@ -434,8 +309,8 @@ class RendererDocx(BaseRenderer):
def _addMarkdownInlineRuns(self, paragraph, text: str) -> None:
"""Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph."""
pos = 0
- us = getattr(self, '_unifiedStyle', None)
- monoFont = us["fonts"]["monospace"] if us else "Courier New"
+ ss = getattr(self, '_styleSet', None)
+ monoFont = ss["fonts"]["monospace"] if ss else "Consolas"
for m in self._MD_INLINE_RE.finditer(text):
if m.start() > pos:
paragraph.add_run(text[pos:m.start()])
@@ -450,15 +325,17 @@ class RendererDocx(BaseRenderer):
elif m.group(7):
run = paragraph.add_run(m.group(7))
run.font.name = monoFont
- run.font.size = Pt(9)
+ us = getattr(self, '_unifiedStyle', None)
+ codeSizePt = us["codeBlock"]["fontSizePt"] if us else 9
+ run.font.size = Pt(codeSizePt)
pos = m.end()
if pos < len(text):
paragraph.add_run(text[pos:])
def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None:
"""Process a list of InlineRun dicts into python-docx Runs on a paragraph."""
- us = getattr(self, '_unifiedStyle', None)
- monoFont = us["fonts"]["monospace"] if us else "Courier New"
+ ss = getattr(self, '_styleSet', None)
+ monoFont = ss["fonts"]["monospace"] if ss else "Consolas"
for run in runs:
runType = run.get("type", "text")
value = run.get("value", "")
@@ -471,11 +348,13 @@ class RendererDocx(BaseRenderer):
elif runType == "code":
r = paragraph.add_run(value)
r.font.name = monoFont
- r.font.size = Pt(9)
+ codeSizePt = ss["code_block"]["font_size"] if ss else 9
+ r.font.size = Pt(codeSizePt)
elif runType == "link":
r = paragraph.add_run(value)
r.font.underline = True
- r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9)
+ accentHex = ss["colors"]["accent"].lstrip('#') if ss else "0366d6"
+ r.font.color.rgb = RGBColor(int(accentHex[0:2], 16), int(accentHex[2:4], 16), int(accentHex[4:6], 16))
elif runType == "image":
b64 = run.get("base64Data", "")
if b64:
@@ -505,31 +384,38 @@ class RendererDocx(BaseRenderer):
By building the XML directly, we achieve 100-1000x faster performance.
"""
import time
- table_start = time.time()
+ from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge
+
+ tableStart = time.time()
try:
- # Extract from nested content structure
content = table_data.get("content", {})
if not isinstance(content, dict):
return
headers = content.get("headers", [])
rows = content.get("rows", [])
-
+
if not headers or not rows:
return
-
+
+ # Per-table style override: merge element-level tableStyle with global styles
+ perTableStyle = content.get("tableStyle", {})
+ if perTableStyle and isinstance(perTableStyle, dict):
+ mergedStyles = deepMerge(styles, perTableStyle)
+ else:
+ mergedStyles = styles
+
totalRows = len(rows)
totalCols = len(headers)
totalCells = totalRows * totalCols
-
+
self.logger.debug(f"_renderJsonTable: Starting FAST table render - {totalRows} rows x {totalCols} columns = {totalCells} cells")
-
- # Use fast XML-based table rendering
- self._renderTableFastXml(doc, headers, rows, styles)
-
- total_time = time.time() - table_start
- rate = totalCells / total_time if total_time > 0 else 0
- self.logger.info(f"_renderJsonTable: Table completed in {total_time:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s")
-
+
+ self._renderTableFastXml(doc, headers, rows, mergedStyles)
+
+ totalTime = time.time() - tableStart
+ rate = totalCells / totalTime if totalTime > 0 else 0
+ self.logger.info(f"_renderJsonTable: Table completed in {totalTime:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s")
+
except Exception as e:
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
@@ -544,60 +430,58 @@ class RendererDocx(BaseRenderer):
from docx.oxml.shared import OxmlElement, qn
from docx.oxml.ns import nsmap
from lxml import etree
-
- create_start = time.time()
-
- # Get the document body element
+
+ createStart = time.time()
+
body = doc._body._body
-
- # Create table element
+
tbl = OxmlElement('w:tbl')
-
- # Add table properties
+
tblPr = OxmlElement('w:tblPr')
-
- # Table width - auto
+
tblW = OxmlElement('w:tblW')
tblW.set(qn('w:type'), 'auto')
tblW.set(qn('w:w'), '0')
tblPr.append(tblW)
-
+
jc = OxmlElement('w:jc')
jc.set(qn('w:val'), 'left')
tblPr.append(jc)
-
- # Apply table borders directly (works without template styles)
+
+ # Border style wiring: grid / horizontal / none
borderStyle = styles.get("table_border", {}).get("style", "grid")
- tblBorders = self._createTableBordersXml(borderStyle)
+ tblBorders = self._createTableBordersXml(borderStyle, styles)
tblPr.append(tblBorders)
-
- # Table cell margins for better readability
+
+ # Table cell margins from style
+ paddingTwips = str(int(styles.get("table_padding", 4) * 20))
tblCellMar = OxmlElement('w:tblCellMar')
for side in ['top', 'left', 'bottom', 'right']:
margin = OxmlElement(f'w:{side}')
- margin.set(qn('w:w'), '80') # 80 twips = ~4pt padding
+ margin.set(qn('w:w'), paddingTwips)
margin.set(qn('w:type'), 'dxa')
tblCellMar.append(margin)
tblPr.append(tblCellMar)
-
+
tbl.append(tblPr)
-
- # Create table grid (column definitions)
+
tblGrid = OxmlElement('w:tblGrid')
for _ in range(len(headers)):
gridCol = OxmlElement('w:gridCol')
tblGrid.append(gridCol)
tbl.append(tblGrid)
-
- self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - create_start:.3f}s")
-
- # Build all rows using fast XML
- rows_start = time.time()
-
+
+ self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - createStart:.3f}s")
+
+ # Infer column alignments
+ colAlignments = self._inferColumnAlignments(headers, rows, styles)
+
+ rowsStart = time.time()
+
# Resolve header style colors
- tableStyle = styles.get("table_header", {})
- headerBg = tableStyle.get("background", "")
- headerFg = tableStyle.get("text_color", "")
+ tableHeaderStyle = styles.get("table_header", {})
+ headerBg = tableHeaderStyle.get("background", "")
+ headerFg = tableHeaderStyle.get("text_color", "")
# Flatten inline-run headers to plain strings for fast XML path
flatHeaders = []
@@ -605,15 +489,21 @@ class RendererDocx(BaseRenderer):
runs = self._inlineRunsForCell(h)
flatHeaders.append("".join(r.get("value", "") for r in runs))
- headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None)
+ headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None, colAlignments=colAlignments)
tbl.append(headerRow)
- header_time = time.time() - rows_start
- self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
+ headerTime = time.time() - rowsStart
+ self.logger.debug(f"_renderTableFastXml: Header row created in {headerTime:.3f}s")
- data_start = time.time()
+ dataStart = time.time()
rowCount = len(rows)
+ # Banding toggle
+ bandingDef = styles.get("table_banding", {})
+ bandingEnabled = bandingDef.get("enabled", True)
+ bandEven = bandingDef.get("even", "").lstrip('#') if bandingEnabled else None
+ bandOdd = bandingDef.get("odd", "").lstrip('#') if bandingEnabled else None
+
for idx, rowData in enumerate(rows):
cellTexts = []
for cell in rowData:
@@ -622,49 +512,54 @@ class RendererDocx(BaseRenderer):
while len(cellTexts) < len(flatHeaders):
cellTexts.append('')
- row = self._createTableRowXml(cellTexts, isHeader=False)
+ rowBg = (bandEven if idx % 2 == 0 else bandOdd) if bandingEnabled else None
+ row = self._createTableRowXml(cellTexts, isHeader=False, rowBgHex=rowBg, colAlignments=colAlignments)
tbl.append(row)
-
- # Log progress every 10%
+
if rowCount > 100 and (idx + 1) % (rowCount // 10) == 0:
- elapsed = time.time() - data_start
+ elapsed = time.time() - dataStart
rate = (idx + 1) * len(headers) / elapsed if elapsed > 0 else 0
self.logger.debug(f"_renderTableFastXml: Progress {((idx + 1) / rowCount * 100):.0f}% ({idx + 1}/{rowCount} rows) - Rate: {rate:.0f} cells/s")
-
- data_time = time.time() - data_start
-
- # Append table to document body
- body.append(tbl)
-
- # Add an empty paragraph after the table to prevent Word from merging consecutive tables
+
+ dataTime = time.time() - dataStart
+
+ sectPr = body.find(qn('w:sectPr'))
+ if sectPr is not None:
+ sectPr.addprevious(tbl)
+ else:
+ body.append(tbl)
+
separatorParagraph = OxmlElement('w:p')
- body.append(separatorParagraph)
-
- total_time = time.time() - create_start
+ if sectPr is not None:
+ sectPr.addprevious(separatorParagraph)
+ else:
+ body.append(separatorParagraph)
+
+ totalTime = time.time() - createStart
totalCells = (rowCount + 1) * len(headers)
- rate = totalCells / total_time if total_time > 0 else 0
-
- self.logger.debug(f"_renderTableFastXml: All rows created in {data_time:.2f}s, total: {total_time:.2f}s, rate: {rate:.0f} cells/s")
+ rate = totalCells / totalTime if totalTime > 0 else 0
+
+ self.logger.debug(f"_renderTableFastXml: All rows created in {dataTime:.2f}s, total: {totalTime:.2f}s, rate: {rate:.0f} cells/s")
- def _createTableBordersXml(self, borderStyle: str) -> Any:
+ def _createTableBordersXml(self, borderStyle: str, styles: Dict[str, Any] = None) -> Any:
"""
Create table borders XML element based on style.
Supports:
- 'grid': Full grid with all borders (default)
- - 'horizontal_only': Only horizontal lines between rows
- - 'none' or other: Minimal/no borders
+ - 'horizontal': Only horizontal lines between rows
+ - 'none': No borders at all
"""
from docx.oxml.shared import OxmlElement, qn
-
+
tblBorders = OxmlElement('w:tblBorders')
-
- # Border color - dark gray for professional look
- borderColor = '404040'
- borderSize = '4' # 0.5pt (in eighths of a point)
-
+
+ borderDef = (styles or {}).get("table_border", {})
+ borderColor = borderDef.get("color", "#e1e4e8").lstrip('#')
+ widthPt = borderDef.get("width", 0.5)
+ borderSize = str(int(widthPt * 8))
+
if borderStyle == "grid":
- # Full grid - all borders
for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'single')
@@ -672,9 +567,8 @@ class RendererDocx(BaseRenderer):
border.set(qn('w:space'), '0')
border.set(qn('w:color'), borderColor)
tblBorders.append(border)
-
- elif borderStyle == "horizontal_only":
- # Only horizontal lines
+
+ elif borderStyle in ("horizontal", "horizontal_only"):
for borderName in ['top', 'bottom', 'insideH']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'single')
@@ -682,46 +576,52 @@ class RendererDocx(BaseRenderer):
border.set(qn('w:space'), '0')
border.set(qn('w:color'), borderColor)
tblBorders.append(border)
- # No vertical borders
for borderName in ['left', 'right', 'insideV']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'nil')
tblBorders.append(border)
+
+ elif borderStyle == "none":
+ for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
+ border = OxmlElement(f'w:{borderName}')
+ border.set(qn('w:val'), 'nil')
+ tblBorders.append(border)
+
else:
- # Minimal - just outer border
- for borderName in ['top', 'left', 'bottom', 'right']:
+ for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
border = OxmlElement(f'w:{borderName}')
border.set(qn('w:val'), 'single')
border.set(qn('w:sz'), borderSize)
border.set(qn('w:space'), '0')
border.set(qn('w:color'), borderColor)
tblBorders.append(border)
-
+
return tblBorders
- def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any:
+ def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None, rowBgHex: str = None, colAlignments: list = None) -> Any:
"""Create a table row XML element with cells.
Fast-path: builds row XML directly via lxml."""
from docx.oxml.shared import OxmlElement, qn
+ ss = getattr(self, '_styleSet', None)
if headerBgHex is None:
- us = getattr(self, '_unifiedStyle', None)
- headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864'
+ headerBgHex = ss["table_header"]["background"].lstrip('#') if ss else '1F3864'
else:
headerBgHex = headerBgHex.lstrip('#')
if headerFgHex is None:
- us = getattr(self, '_unifiedStyle', None)
- headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF'
+ headerFgHex = ss["table_header"]["text_color"].lstrip('#') if ss else 'FFFFFF'
else:
headerFgHex = headerFgHex.lstrip('#')
+ _ALIGN_MAP = {"left": "left", "center": "center", "right": "right"}
+
tr = OxmlElement('w:tr')
if isHeader:
trPr = OxmlElement('w:trPr')
trPr.append(OxmlElement('w:tblHeader'))
tr.append(trPr)
- for cellText in cells:
+ for colIdx, cellText in enumerate(cells):
tc = OxmlElement('w:tc')
tcPr = OxmlElement('w:tcPr')
tcW = OxmlElement('w:tcW')
@@ -735,9 +635,26 @@ class RendererDocx(BaseRenderer):
shd.set(qn('w:color'), 'auto')
shd.set(qn('w:fill'), headerBgHex)
tcPr.append(shd)
+ elif rowBgHex:
+ shd = OxmlElement('w:shd')
+ shd.set(qn('w:val'), 'clear')
+ shd.set(qn('w:color'), 'auto')
+ shd.set(qn('w:fill'), rowBgHex)
+ tcPr.append(shd)
tc.append(tcPr)
p = OxmlElement('w:p')
+
+ # Apply per-column alignment
+ if colAlignments and colIdx < len(colAlignments):
+ alignment = _ALIGN_MAP.get(colAlignments[colIdx])
+ if alignment:
+ pPr = OxmlElement('w:pPr')
+ pJc = OxmlElement('w:jc')
+ pJc.set(qn('w:val'), alignment)
+ pPr.append(pJc)
+ p.append(pPr)
+
r = OxmlElement('w:r')
if isHeader:
@@ -759,148 +676,42 @@ class RendererDocx(BaseRenderer):
return tr
- def _applyHorizontalBordersOnly(self, table) -> None:
- """Apply only horizontal borders to the table (no vertical borders)."""
- try:
- from docx.oxml.shared import OxmlElement, qn
-
- # Get table properties
- tbl_pr = table._element.find(qn('w:tblPr'))
- if tbl_pr is None:
- tbl_pr = OxmlElement('w:tblPr')
- table._element.insert(0, tbl_pr)
-
- # Remove existing borders
- existing_borders = tbl_pr.find(qn('w:tblBorders'))
- if existing_borders is not None:
- tbl_pr.remove(existing_borders)
-
- # Create new borders element
- tbl_borders = OxmlElement('w:tblBorders')
-
- # Top border
- top_border = OxmlElement('w:top')
- top_border.set(qn('w:val'), 'single')
- top_border.set(qn('w:sz'), '4')
- top_border.set(qn('w:space'), '0')
- top_border.set(qn('w:color'), '000000')
- tbl_borders.append(top_border)
-
- # Bottom border
- bottom_border = OxmlElement('w:bottom')
- bottom_border.set(qn('w:val'), 'single')
- bottom_border.set(qn('w:sz'), '4')
- bottom_border.set(qn('w:space'), '0')
- bottom_border.set(qn('w:color'), '000000')
- tbl_borders.append(bottom_border)
-
- # Left border - none
- left_border = OxmlElement('w:left')
- left_border.set(qn('w:val'), 'none')
- tbl_borders.append(left_border)
-
- # Right border - none
- right_border = OxmlElement('w:right')
- right_border.set(qn('w:val'), 'none')
- tbl_borders.append(right_border)
-
- # Inside horizontal border
- inside_h_border = OxmlElement('w:insideH')
- inside_h_border.set(qn('w:val'), 'single')
- inside_h_border.set(qn('w:sz'), '4')
- inside_h_border.set(qn('w:space'), '0')
- inside_h_border.set(qn('w:color'), '000000')
- tbl_borders.append(inside_h_border)
-
- # Inside vertical border - none
- inside_v_border = OxmlElement('w:insideV')
- inside_v_border.set(qn('w:val'), 'none')
- tbl_borders.append(inside_v_border)
-
- tbl_pr.append(tbl_borders)
-
- except Exception as e:
- self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
-
- def _setCellBackground(self, cell, color: RGBColor) -> None:
- """Set the background color of a table cell."""
- try:
- from docx.oxml.shared import OxmlElement, qn
-
- # Get cell properties
- tc_pr = cell._element.find(qn('w:tcPr'))
- if tc_pr is None:
- tc_pr = OxmlElement('w:tcPr')
- cell._element.insert(0, tc_pr)
-
- # Remove existing shading
- existing_shading = tc_pr.find(qn('w:shd'))
- if existing_shading is not None:
- tc_pr.remove(existing_shading)
-
- # Create new shading element
- shading = OxmlElement('w:shd')
- shading.set(qn('w:val'), 'clear')
- shading.set(qn('w:color'), 'auto')
- # Convert RGBColor to hex string by unpacking RGB components
- red, green, blue = color
- hex_color = f"{red:02x}{green:02x}{blue:02x}"
- shading.set(qn('w:fill'), hex_color)
- tc_pr.append(shading)
-
- except Exception as e:
- self.logger.warning(f"Could not set cell background: {str(e)}")
-
- def _setCellBackgroundFast(self, cell, hex_color: str) -> None:
- """
- Set the background color of a table cell using pre-calculated hex string.
- PERFORMANCE OPTIMIZED: Avoids RGBColor unpacking and string formatting in hot loop.
- """
- try:
- from docx.oxml.shared import OxmlElement, qn
-
- # Get cell properties
- tc_pr = cell._element.find(qn('w:tcPr'))
- if tc_pr is None:
- tc_pr = OxmlElement('w:tcPr')
- cell._element.insert(0, tc_pr)
-
- # Remove existing shading
- existing_shading = tc_pr.find(qn('w:shd'))
- if existing_shading is not None:
- tc_pr.remove(existing_shading)
-
- # Create new shading element with pre-calculated hex color
- shading = OxmlElement('w:shd')
- shading.set(qn('w:val'), 'clear')
- shading.set(qn('w:color'), 'auto')
- shading.set(qn('w:fill'), hex_color)
- tc_pr.append(shading)
-
- except Exception as e:
- self.logger.warning(f"Could not set cell background: {str(e)}")
-
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
try:
+ from docx.oxml.shared import OxmlElement, qn
+
content = list_data.get("content", {})
if not isinstance(content, dict):
return
items = content.get("items", [])
- bullet_style = styles.get("bullet_list", {})
+ bulletStyle = styles.get("bullet_list", {})
- font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None
- text_color_rgb = None
- if bullet_style.get("color"):
- color_hex = bullet_style["color"].lstrip('#')
- text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
+ fontSizePt = Pt(bulletStyle["font_size"]) if bulletStyle.get("font_size") else None
+ textColorRgb = None
+ if bulletStyle.get("color"):
+ colorHex = bulletStyle["color"].lstrip('#')
+ textColorRgb = RGBColor(int(colorHex[0:2], 16), int(colorHex[2:4], 16), int(colorHex[4:6], 16))
+
+ indentTwips = int(bulletStyle.get("indent", 18) * 20)
+ bulletChar = bulletStyle.get("bullet_char", "\u2022")
for item in items:
itemRuns = self._inlineRunsForListItem(item)
if not itemRuns or not any(r.get("value") for r in itemRuns):
continue
para = doc.add_paragraph(style='List Bullet')
+
+ # Apply indent from style
+ pPr = para._p.get_or_add_pPr()
+ ind = pPr.find(qn('w:ind'))
+ if ind is None:
+ ind = OxmlElement('w:ind')
+ pPr.append(ind)
+ ind.set(qn('w:left'), str(indentTwips))
+ ind.set(qn('w:hanging'), '360')
+
isNewRunFormat = isinstance(item, list)
if isNewRunFormat:
self._renderInlineRuns(itemRuns, para, styles)
@@ -908,12 +719,12 @@ class RendererDocx(BaseRenderer):
itemText = "".join(r.get("value", "") for r in itemRuns)
self._addMarkdownInlineRuns(para, itemText)
- if bullet_style and para.runs and len(para.runs) > 0:
- run = para.runs[0]
- if font_size_pt:
- run.font.size = font_size_pt
- if text_color_rgb:
- run.font.color.rgb = text_color_rgb
+ # Apply font size and color to all runs
+ for run in para.runs:
+ if fontSizePt:
+ run.font.size = fontSizePt
+ if textColorRgb:
+ run.font.color.rgb = textColorRgb
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
@@ -1000,43 +811,62 @@ class RendererDocx(BaseRenderer):
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON code block to DOCX using AI-generated styles."""
try:
+ from docx.oxml.shared import OxmlElement, qn
+
content = code_data.get("content", {})
if not isinstance(content, dict):
return
code = content.get("code", "")
language = content.get("language", "")
- code_style = styles.get("code_block", {})
- us = getattr(self, '_unifiedStyle', None)
+ codeStyle = styles.get("code_block", {})
if code:
if language:
- lang_para = doc.add_paragraph(f"Code ({language}):")
- if len(lang_para.runs) > 0:
- lang_para.runs[0].bold = True
+ langPara = doc.add_paragraph(f"Code ({language}):")
+ if len(langPara.runs) > 0:
+ langPara.runs[0].bold = True
- code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New")
- code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9))
- code_text_color_rgb = None
- if "color" in code_style:
- color_hex = code_style["color"].lstrip('#')
- code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
-
- code_para = doc.add_paragraph(code)
- # Use direct access instead of iterating
- if len(code_para.runs) > 0:
- run = code_para.runs[0]
- run.font.name = code_font_name
- run.font.size = code_font_size_pt
- if code_text_color_rgb:
- run.font.color.rgb = code_text_color_rgb
+ codeFontName = codeStyle.get("font", styles["fonts"]["monospace"])
+ codeFontSizePt = Pt(codeStyle.get("font_size", 9))
+ codeTextColorRgb = None
+ if "color" in codeStyle:
+ colorHex = codeStyle["color"].lstrip('#')
+ codeTextColorRgb = RGBColor(int(colorHex[0:2], 16), int(colorHex[2:4], 16), int(colorHex[4:6], 16))
+
+ codePara = doc.add_paragraph(code)
+
+ # Apply background shading via XML
+ bgHex = codeStyle.get("background", "").lstrip('#')
+ borderColorHex = codeStyle.get("border_color", "").lstrip('#')
+ if bgHex or borderColorHex:
+ pPr = codePara._p.get_or_add_pPr()
+ if bgHex:
+ shd = OxmlElement('w:shd')
+ shd.set(qn('w:val'), 'clear')
+ shd.set(qn('w:color'), 'auto')
+ shd.set(qn('w:fill'), bgHex)
+ pPr.append(shd)
+ if borderColorHex:
+ pBdr = OxmlElement('w:pBdr')
+ for side in ['top', 'left', 'bottom', 'right']:
+ bdr = OxmlElement(f'w:{side}')
+ bdr.set(qn('w:val'), 'single')
+ bdr.set(qn('w:sz'), '4')
+ bdr.set(qn('w:space'), '4')
+ bdr.set(qn('w:color'), borderColorHex)
+ pBdr.append(bdr)
+ pPr.append(pBdr)
+
+ # Apply font styling to run
+ if len(codePara.runs) > 0:
+ run = codePara.runs[0]
else:
- # Create run if none exists
- run = code_para.add_run()
- run.font.name = code_font_name
- run.font.size = code_font_size_pt
- if code_text_color_rgb:
- run.font.color.rgb = code_text_color_rgb
-
+ run = codePara.add_run()
+ run.font.name = codeFontName
+ run.font.size = codeFontSizePt
+ if codeTextColorRgb:
+ run.font.color.rgb = codeTextColorRgb
+
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
@@ -1065,6 +895,15 @@ class RendererDocx(BaseRenderer):
content = element.get("content", element) if isinstance(element, dict) else {}
if not isinstance(content, dict):
content = {}
+
+ coverStyle = styles.get("cover_page", {})
+ titleSizePt = coverStyle.get("title_size", 28)
+ subtitleSizePt = coverStyle.get("subtitle_size", 16)
+ authorSizePt = coverStyle.get("author_size", 12)
+ dateSizePt = coverStyle.get("date_size", 12)
+ titleColor = coverStyle.get("title_color", "")
+ subtitleColor = coverStyle.get("subtitle_color", "")
+
for _ in range(6):
doc.add_paragraph("")
logoStream = self._imageStreamFromContent(content)
@@ -1078,14 +917,21 @@ class RendererDocx(BaseRenderer):
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = p.add_run(title)
run.bold = True
- run.font.size = Pt(28)
- for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)):
+ run.font.size = Pt(titleSizePt)
+ if titleColor:
+ cHex = titleColor.lstrip('#')
+ run.font.color.rgb = RGBColor(int(cHex[0:2], 16), int(cHex[2:4], 16), int(cHex[4:6], 16))
+ for key, sizePt, color in (("subtitle", subtitleSizePt, subtitleColor), ("author", authorSizePt, ""), ("date", dateSizePt, "")):
val = (content.get(key) or "").strip()
if not val:
continue
p = doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
- p.add_run(val).font.size = Pt(sizePt)
+ run = p.add_run(val)
+ run.font.size = Pt(sizePt)
+ if color:
+ cHex = color.lstrip('#')
+ run.font.color.rgb = RGBColor(int(cHex[0:2], 16), int(cHex[2:4], 16), int(cHex[4:6], 16))
doc.add_page_break()
except Exception as e:
self.logger.warning(f"Error rendering cover_page: {e}")
@@ -1215,8 +1061,23 @@ class RendererDocx(BaseRenderer):
caption_text = None
if caption_text:
- caption_para = doc.add_paragraph(caption_text)
- caption_para.runs[0].italic = True
+ captionPara = doc.add_paragraph(caption_text)
+ captionStyle = styles.get("caption", {})
+ if captionPara.runs:
+ captionRun = captionPara.runs[0]
+ captionRun.italic = captionStyle.get("italic", True)
+ if captionStyle.get("font_size"):
+ captionRun.font.size = Pt(captionStyle["font_size"])
+ if captionStyle.get("color"):
+ cHex = captionStyle["color"].lstrip('#')
+ captionRun.font.color.rgb = RGBColor(int(cHex[0:2], 16), int(cHex[2:4], 16), int(cHex[4:6], 16))
+ captionAlign = captionStyle.get("align", "center")
+ if captionAlign == "center":
+ captionPara.alignment = WD_ALIGN_PARAGRAPH.CENTER
+ elif captionAlign == "right":
+ captionPara.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+ else:
+ captionPara.alignment = WD_ALIGN_PARAGRAPH.LEFT
except Exception as embedError:
# Image decoding or embedding failed
raise Exception(f"Failed to decode or embed image: {str(embedError)}")
@@ -1228,152 +1089,6 @@ class RendererDocx(BaseRenderer):
if errorPara.runs:
errorPara.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error
- def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
- """Extract document structure from user prompt."""
- structure = {
- 'title': title,
- 'sections': [],
- 'format': 'standard'
- }
-
- if not userPrompt:
- return structure
-
- # Extract title from prompt if not provided
- if not title or title == "Generated Document":
- # Look for "create a ... document" or "generate a ... report"
- title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
- if title_match:
- structure['title'] = title_match.group(1).strip().title()
-
- # Extract sections from numbered lists in prompt
- section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
- sections = re.findall(section_pattern, userPrompt)
-
- for num, section_text in sections:
- structure['sections'].append({
- 'number': int(num),
- 'title': section_text.strip(),
- 'level': 2 # H2 level
- })
-
- # If no numbered sections found, try to extract from "including:" patterns
- if not structure['sections']:
- including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
- if including_match:
- including_text = including_match.group(1)
- # Split by common separators
- parts = re.split(r'[,;]\s*', including_text)
- for i, part in enumerate(parts, 1):
- part = part.strip()
- if part:
- structure['sections'].append({
- 'number': i,
- 'title': part,
- 'level': 2
- })
-
- # If still no sections, extract from any list-like patterns
- if not structure['sections']:
- # Look for bullet points or dashes
- bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
- bullets = re.findall(bullet_pattern, userPrompt)
- for i, bullet in enumerate(bullets, 1):
- bullet = bullet.strip()
- if bullet and len(bullet) > 3:
- structure['sections'].append({
- 'number': i,
- 'title': bullet,
- 'level': 2
- })
-
- # If still no sections, extract from sentence structure
- if not structure['sections']:
- # Split prompt into sentences and use as sections
- sentences = re.split(r'[.!?]\s+', userPrompt)
- for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
- sentence = sentence.strip()
- if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
- structure['sections'].append({
- 'number': i,
- 'title': sentence[:50] + "..." if len(sentence) > 50 else sentence,
- 'level': 2
- })
-
- # Final fallback: create sections from prompt keywords
- if not structure['sections']:
- # Extract key action words from prompt
- action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
- found_actions = []
- for action in action_words:
- if action in userPrompt.lower():
- found_actions.append(action.title())
-
- if found_actions:
- for i, action in enumerate(found_actions[:3], 1):
- structure['sections'].append({
- 'number': i,
- 'title': f"{action} Document Content",
- 'level': 2
- })
- else:
- # Last resort: generic but meaningful sections
- structure['sections'] = [
- {'number': 1, 'title': 'Document Analysis', 'level': 2},
- {'number': 2, 'title': 'Key Information', 'level': 2},
- {'number': 3, 'title': 'Summary and Conclusions', 'level': 2}
- ]
-
- return structure
-
- def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
- """Generate DOCX content based on extracted structure."""
- # Add sections based on prompt structure
- for section in structure['sections']:
- # Add section heading
- doc.add_heading(f"{section['number']}) {section['title']}", level=section['level'])
-
- # Add AI-generated content for this section
- # Try to extract relevant content for this section from the AI response
- section_content = self._extractSectionContent(content, section['title'])
-
- if section_content:
- doc.add_paragraph(section_content)
- else:
- # If no specific content found, add a note
- doc.add_paragraph(f"Content for {section['title']} based on document analysis.")
-
- # Add some spacing
- doc.add_paragraph()
-
- # Add the complete AI-generated content as additional analysis
- if content and content.strip():
- doc.add_heading("Complete Analysis", level=1)
- doc.add_paragraph(content)
-
- def _extractSectionContent(self, content: str, section_title: str) -> str:
- """Extract relevant content for a specific section from AI response."""
- if not content or not section_title:
- return ""
-
- # Look for content that matches the section title
- section_keywords = section_title.lower().split()
-
- # Split content into paragraphs
- paragraphs = content.split('\n\n')
-
- relevant_paragraphs = []
- for paragraph in paragraphs:
- paragraph_lower = paragraph.lower()
- # Check if paragraph contains keywords from section title
- if any(keyword in paragraph_lower for keyword in section_keywords if len(keyword) > 3):
- relevant_paragraphs.append(paragraph.strip())
-
- if relevant_paragraphs:
- return '\n\n'.join(relevant_paragraphs[:2]) # Max 2 paragraphs per section
-
- return ""
-
def _setupDocumentStyles(self, doc: Document, styleSet: Dict[str, Any]) -> None:
"""Create all styles in document from style set.
@@ -1386,11 +1101,15 @@ class RendererDocx(BaseRenderer):
if "title" in styleSet:
self._createStyle(doc, "Title", styleSet["title"], WD_STYLE_TYPE.PARAGRAPH)
- # Create Heading styles (Heading 1, Heading 2)
+ # Create Heading styles (Heading 1 through Heading 4)
if "heading1" in styleSet:
self._createStyle(doc, "Heading 1", styleSet["heading1"], WD_STYLE_TYPE.PARAGRAPH)
if "heading2" in styleSet:
self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH)
+ if "heading3" in styleSet:
+ self._createStyle(doc, "Heading 3", styleSet["heading3"], WD_STYLE_TYPE.PARAGRAPH)
+ if "heading4" in styleSet:
+ self._createStyle(doc, "Heading 4", styleSet["heading4"], WD_STYLE_TYPE.PARAGRAPH)
# Create Paragraph style
if "paragraph" in styleSet:
@@ -1426,6 +1145,10 @@ class RendererDocx(BaseRenderer):
font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
if "font" in styleConfig:
font.name = styleConfig["font"]
+ else:
+ us = getattr(self, '_unifiedStyle', None)
+ if us:
+ font.name = us["fonts"]["primary"]
# Set paragraph formatting for alignment
if "align" in styleConfig:
@@ -1440,328 +1163,3 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not create style '{styleName}': {str(e)}")
-
- def _processSection(self, doc, lines: list):
- """Process a section of content into DOCX elements."""
- for line in lines:
- if not line.strip():
- continue
-
- # Check for tables (lines with |)
- if '|' in line and not line.startswith('|'):
- # This might be part of a table, process as table
- table_data = self._extractTableData(lines)
- if table_data:
- self._addTable(doc, table_data)
- return
-
- # Check for lists
- if line.startswith('- ') or line.startswith('* '):
- # This is a list item
- doc.add_paragraph(line[2:], style='List Bullet')
- elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
- # This is a numbered list item
- doc.add_paragraph(line[3:], style='List Number')
- else:
- # Regular paragraph
- doc.add_paragraph(line)
-
- def _extractTableData(self, lines: list) -> list:
- """Extract table data from lines."""
- table_data = []
- in_table = False
-
- for line in lines:
- if '|' in line:
- if not in_table:
- in_table = True
- # Split by | and clean up
- cells = [cell.strip() for cell in line.split('|') if cell.strip()]
- if cells:
- table_data.append(cells)
- elif in_table and not line.strip():
- # Empty line, might be end of table
- break
-
- return table_data if len(table_data) > 1 else []
-
- def _addTable(self, doc, table_data: list):
- """Add a table to the document."""
- try:
- if not table_data:
- return
-
- # Create table
- table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
- table.alignment = WD_TABLE_ALIGNMENT.LEFT
-
- # Add data to table
- for row_idx, row_data in enumerate(table_data):
- for col_idx, cell_data in enumerate(row_data):
- if col_idx < len(table.rows[row_idx].cells):
- table.rows[row_idx].cells[col_idx].text = cell_data
-
- # Style the table
- self._styleTable(table)
-
- # Add an empty paragraph after the table to prevent Word from merging consecutive tables
- doc.add_paragraph()
-
- except Exception as e:
- self.logger.warning(f"Could not add table: {str(e)}")
-
- def _styleTable(self, table):
- """Apply styling to the table."""
- try:
- # Style header row
- if len(table.rows) > 0:
- header_cells = table.rows[0].cells
- for cell in header_cells:
- for paragraph in cell.paragraphs:
- for run in paragraph.runs:
- run.bold = True
- except Exception as e:
- self.logger.warning(f"Could not style table: {str(e)}")
-
- def _processTableRow(self, doc, line: str):
- """Process a table row and add it to the document."""
- if not line.strip():
- return
-
- # Split by pipe separator
- parts = [part.strip() for part in line.split('|')]
-
- if len(parts) >= 2:
- # This is a table row - create a table if it doesn't exist
- if not hasattr(self, '_current_table') or self._current_table is None:
- # Create new table
- self._current_table = doc.add_table(rows=1, cols=len(parts))
- self._current_table.style = 'Table Grid'
-
- # Add header row
- for i, part in enumerate(parts):
- if i < len(self._current_table.rows[0].cells):
- cell = self._current_table.rows[0].cells[i]
- cell.text = part
- # Make header bold
- for paragraph in cell.paragraphs:
- for run in paragraph.runs:
- run.bold = True
- else:
- # Add data row to existing table
- row = self._current_table.add_row()
- for i, part in enumerate(parts):
- if i < len(row.cells):
- row.cells[i].text = part
- else:
- # Not a table row, treat as regular text
- doc.add_paragraph(line)
-
- def _cleanAiContent(self, content: str) -> str:
- """Clean AI-generated content by removing debug information and duplicates."""
- if not content:
- return ""
-
- # Remove debug information
- lines = content.split('\n')
- clean_lines = []
-
- for line in lines:
- # Skip debug lines and separators
- if (line.startswith('[Skipped ') or
- line.startswith('=== DOCUMENT:') or
- line.startswith('---') or
- line.startswith('FILENAME:') or
- line.strip() == '' or
- line.strip() == '---'):
- continue
- clean_lines.append(line)
-
- # Join lines and remove duplicate content
- clean_content = '\n'.join(clean_lines)
-
- # Remove duplicate sections by keeping only the first occurrence
- sections = clean_content.split('\n\n')
- seen_sections = set()
- unique_sections = []
-
- for section in sections:
- section_key = section.strip()[:50] # Use first 50 chars as key
- if section_key not in seen_sections and section.strip():
- seen_sections.add(section_key)
- unique_sections.append(section)
-
- return '\n\n'.join(unique_sections)
-
- def _processTables(self, doc, content: str) -> str:
- """
- Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
- Returns the content with tables replaced by placeholders.
- """
- # csv is already imported at module level
-
- lines = content.split('\n')
- processed_lines = []
- i = 0
-
- while i < len(lines):
- line = lines[i].strip()
-
- # Check if this line looks like a table (contains pipes or commas with multiple fields)
- is_pipe_table = '|' in line and len(line.split('|')) >= 2
- is_csv_table = ',' in line and len(line.split(',')) >= 2
-
- if is_pipe_table or is_csv_table:
- # Collect consecutive table lines
- table_lines = []
- j = i
-
- # Determine separator and collect lines
- separator = '|' if is_pipe_table else ','
- while j < len(lines):
- current_line = lines[j].strip()
- if separator in current_line and len(current_line.split(separator)) >= 2:
- table_lines.append(current_line)
- j += 1
- else:
- break
-
- if len(table_lines) >= 2: # At least header + 1 data row
- # Create Word table
- try:
- if separator == '|':
- # Process pipe-separated table
- rows = []
- for table_line in table_lines:
- # Split by pipe and clean up
- cells = [cell.strip() for cell in table_line.split('|')]
- rows.append(cells)
- else:
- # Process CSV table
- csv_content = '\n'.join(table_lines)
- csv_reader = csv.reader(io.StringIO(csv_content))
- rows = list(csv_reader)
-
- if rows and len(rows[0]) > 0:
- # Create Word table
- table = doc.add_table(rows=len(rows), cols=len(rows[0]))
- table.style = 'Table Grid'
-
- # Populate table
- for row_idx, row_data in enumerate(rows):
- for col_idx, cell_data in enumerate(row_data):
- if col_idx < len(table.rows[row_idx].cells):
- table.rows[row_idx].cells[col_idx].text = cell_data.strip()
-
- # Make header row bold
- if row_idx == 0:
- for cell in table.rows[row_idx].cells:
- for paragraph in cell.paragraphs:
- for run in paragraph.runs:
- run.bold = True
-
- # Add an empty paragraph after the table to prevent Word from merging consecutive tables
- doc.add_paragraph()
-
- # Add placeholder to mark where table was inserted
- processed_lines.append(f"[TABLE_INSERTED_{len(processed_lines)}]")
-
- # Skip the table lines
- i = j
- continue
- except Exception as e:
- # If table parsing fails, treat as regular text
- pass
-
- processed_lines.append(line)
- i += 1
-
- return '\n'.join(processed_lines)
-
- def _parseAndFormatContent(self, doc, content: str, title: str):
- """Parse AI-generated content in standardized format and apply proper DOCX formatting."""
- if not content:
- return
-
- # Process tables and replace them with placeholders
- content = self._processTables(doc, content)
-
- # Parse content line by line in exact sequence
- lines = content.split('\n')
-
- for line in lines:
- line = line.strip()
- if not line:
- # Empty line - add paragraph break
- doc.add_paragraph()
- continue
-
- # Skip table placeholders (already processed)
- if line.startswith('[TABLE_INSERTED_'):
- continue
-
- # Check if this is a Markdown heading (# ## ###)
- if line.startswith('#'):
- level = len(line) - len(line.lstrip('#'))
- heading_text = line.lstrip('# ').strip()
- doc.add_heading(heading_text, level=min(level, 3))
-
- # Check if this is a numbered heading (1) Title, 2) Title, etc.)
- elif re.match(r'^\d+\)\s+.+', line):
- heading_text = re.sub(r'^\d+\)\s+', '', line)
- doc.add_heading(heading_text, level=1)
-
- # Check if this is a Markdown list item
- elif line.startswith('- ') or re.match(r'^\d+\.\s+', line):
- bullet_text = re.sub(r'^[-•]\s+|\d+\.\s+', '', line)
- self._add_bullet_point(doc, bullet_text)
-
- # Check if this is a code block
- elif line.startswith('```'):
- if not line.endswith('```'):
- # Start of code block - collect until end
- code_lines = [line]
- continue
- else:
- # End of code block
- if 'code_lines' in locals():
- code_lines.append(line)
- code_text = '\n'.join(code_lines)
- para = doc.add_paragraph()
- run = para.add_run(code_text)
- run.font.name = 'Courier New'
- del code_lines
-
- # Regular paragraph
- else:
- self._addParagraphToDoc(doc, line)
-
- def _addParagraphToDoc(self, doc, text: str):
- """Add a paragraph to the document with proper formatting."""
- if not text.strip():
- return
-
- # Check for Markdown formatting (**bold**, *italic*)
- para = doc.add_paragraph()
-
- # Split by bold markers
- parts = text.split('**')
- for i, part in enumerate(parts):
- if i % 2 == 0:
- # Regular text - check for italic
- italic_parts = part.split('*')
- for j, italic_part in enumerate(italic_parts):
- if j % 2 == 0:
- # Regular text
- if italic_part:
- para.add_run(italic_part)
- else:
- # Italic text
- if italic_part:
- run = para.add_run(italic_part)
- run.italic = True
- else:
- # Bold text
- if part:
- run = para.add_run(part)
- run.bold = True
\ No newline at end of file
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py
index 33093b8e..16c1cdfd 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py
@@ -115,8 +115,10 @@ class RendererHtml(BaseRenderer):
styles = self._convertUnifiedStyleToInternal(style)
self._unifiedStyle = style
else:
- styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
- self._unifiedStyle = None
+ from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle
+ style = resolveStyle()
+ styles = self._convertUnifiedStyleToInternal(style)
+ self._unifiedStyle = style
# Validate JSON structure
if not self._validateJsonStructure(jsonContent):
@@ -174,107 +176,6 @@ class RendererHtml(BaseRenderer):
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
raise Exception(f"HTML generation failed: {str(e)}")
- async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
- """Get style set - use styles from document generation metadata if available,
- otherwise enhance default styles with AI if userPrompt provided.
-
- WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
- not be generated separately by renderers. Only fall back to AI if styles not provided.
-
- Args:
- extractedContent: Document content with metadata (may contain styles)
- userPrompt: User's prompt (AI will detect style instructions in any language)
- aiService: AI service (used only if styles not in metadata and userPrompt provided)
- templateName: Name of template style set (None = default)
-
- Returns:
- Dict with style definitions for all document styles
- """
- # Get default style set
- defaultStyleSet = self._getDefaultStyleSet()
-
- # FIRST: Check if styles are provided in document generation metadata (preferred approach)
- if extractedContent:
- metadata = extractedContent.get("metadata", {})
- if isinstance(metadata, dict):
- styles = metadata.get("styles")
- if styles and isinstance(styles, dict):
- self.logger.debug("Using styles from document generation metadata")
- return self._validateStylesContrast(styles)
-
- # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
- if userPrompt and aiService:
- self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
- enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
- return self._validateStylesContrast(enhancedStyleSet)
- else:
- # Use default styles only
- return defaultStyleSet
-
- async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
- """Enhance default styles with AI based on user prompt."""
- try:
- style_template = self._createAiStyleTemplate("html", userPrompt, defaultStyleSet)
- enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
- return enhanced_styles
- except Exception as e:
- self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
- return defaultStyleSet
-
- def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
- """Validate and fix contrast issues in AI-generated styles."""
- try:
- # Fix table header contrast
- if "table_header" in styles:
- header = styles["table_header"]
- bgColor = header.get("background", "#FFFFFF")
- textColor = header.get("color", "#000000")
-
- # If both are white or both are dark, fix it
- if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
- header["background"] = "#4F4F4F"
- header["color"] = "#FFFFFF"
- elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
- header["background"] = "#4F4F4F"
- header["color"] = "#FFFFFF"
-
- # Fix table cell contrast
- if "table_cell" in styles:
- cell = styles["table_cell"]
- bgColor = cell.get("background", "#FFFFFF")
- textColor = cell.get("color", "#000000")
-
- # If both are white or both are dark, fix it
- if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
- cell["background"] = "#FFFFFF"
- cell["color"] = "#2F2F2F"
- elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
- cell["background"] = "#FFFFFF"
- cell["color"] = "#2F2F2F"
-
- return styles
-
- except Exception as e:
- self.logger.warning(f"Style validation failed: {str(e)}")
- return self._getDefaultStyleSet()
-
- def _getDefaultStyleSet(self) -> Dict[str, Any]:
- """Default HTML style set - used when no style instructions present."""
- return {
- "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
- "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
- "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
- "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
- "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
- "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
- "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
- "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
- "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
- "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
- "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
- }
-
-
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions."""
# When unified style is available, generate CSS directly from it
@@ -440,7 +341,9 @@ class RendererHtml(BaseRenderer):
css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;")
css_parts.append(f" color: {h.get('color', primaryColor)};")
css_parts.append(f" font-weight: {h.get('weight', 'bold')};")
- css_parts.append(f" margin: 1.2em 0 0.4em 0;")
+ spBefore = h.get('spaceBeforePt', max(24 - (level - 1) * 4, 12))
+ spAfter = h.get('spaceAfterPt', max(8 - (level - 1) * 2, 3))
+ css_parts.append(f" margin: {spBefore}pt 0 {spAfter}pt 0;")
css_parts.append("}")
# Paragraphs
@@ -453,11 +356,16 @@ class RendererHtml(BaseRenderer):
# Tables
borderColor = tbl.get("borderColor", "#DEE2E6")
+ borderStyle = tbl.get("borderStyle", "grid")
css_parts.append("table {")
- css_parts.append(f" border-collapse: collapse;")
- css_parts.append(f" width: 100%;")
- css_parts.append(f" margin: 1em 0;")
- css_parts.append(f" border: 1px solid {borderColor};")
+ css_parts.append(" border-collapse: collapse;")
+ css_parts.append(" width: 100%;")
+ css_parts.append(" margin: 1em 0;")
+ if borderStyle == "grid":
+ css_parts.append(f" border: 1px solid {borderColor};")
+ elif borderStyle == "horizontal":
+ css_parts.append(f" border-top: 1px solid {borderColor};")
+ css_parts.append(f" border-bottom: 1px solid {borderColor};")
css_parts.append("}")
# Table headers
@@ -466,17 +374,30 @@ class RendererHtml(BaseRenderer):
css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};")
css_parts.append(" font-weight: bold;")
css_parts.append(" text-align: center;")
- css_parts.append(f" padding: 10px;")
- css_parts.append(f" border: 1px solid {borderColor};")
+ css_parts.append(" padding: 10px;")
+ if borderStyle == "grid":
+ css_parts.append(f" border: 1px solid {borderColor};")
+ elif borderStyle == "horizontal":
+ css_parts.append(f" border-bottom: 1px solid {borderColor};")
css_parts.append("}")
# Table cells
css_parts.append("td {")
css_parts.append(f" color: {paraColor};")
css_parts.append(" padding: 8px;")
- css_parts.append(f" border: 1px solid {borderColor};")
+ if borderStyle == "grid":
+ css_parts.append(f" border: 1px solid {borderColor};")
+ elif borderStyle == "horizontal":
+ css_parts.append(f" border-bottom: 1px solid {borderColor};")
css_parts.append("}")
+ # Row banding
+ if tbl.get("bandingEnabled", True):
+ evenBg = tbl.get("rowBandingEven", "#f6f8fa")
+ oddBg = tbl.get("rowBandingOdd", "#FFFFFF")
+ css_parts.append(f"tbody tr:nth-child(even) {{ background: {evenBg}; }}")
+ css_parts.append(f"tbody tr:nth-child(odd) {{ background: {oddBg}; }}")
+
# Lists
css_parts.append("ul {")
css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;")
@@ -499,13 +420,33 @@ class RendererHtml(BaseRenderer):
css_parts.append("}")
# Images
+ imgStyle = style.get("image", {})
+ imgMaxWidth = imgStyle.get("maxWidthPt", 800)
+ imgAlignment = imgStyle.get("alignment", "center")
css_parts.append("img {")
- css_parts.append(" max-width: 100%;")
+ css_parts.append(f" max-width: min({imgMaxWidth}pt, 100%);")
css_parts.append(" height: auto;")
- css_parts.append(" margin: 1em 0;")
+ css_parts.append(" display: block;")
+ if imgAlignment == "center":
+ css_parts.append(" margin: 1em auto;")
+ elif imgAlignment == "right":
+ css_parts.append(" margin: 1em 0 1em auto;")
+ else:
+ css_parts.append(" margin: 1em 0;")
css_parts.append(" border-radius: 4px;")
css_parts.append("}")
+ # Figcaptions
+ captionStyle = style.get("caption", {})
+ css_parts.append("figcaption {")
+ css_parts.append(f" font-size: {captionStyle.get('sizePt', 10)}pt;")
+ css_parts.append(f" color: {captionStyle.get('color', '#586069')};")
+ if captionStyle.get("italic", True):
+ css_parts.append(" font-style: italic;")
+ css_parts.append(f" text-align: {captionStyle.get('align', 'center')};")
+ css_parts.append(" margin-top: 0.5em;")
+ css_parts.append("}")
+
# Generated info
css_parts.append(".generated-info {")
css_parts.append(" font-size: 0.9em;")
@@ -839,9 +780,7 @@ class RendererHtml(BaseRenderer):
# Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris
# Include a marker so we can find and replace it
imageMarker = f""
- # Add max-width and max-height to ensure image fits within page dimensions
- # Typical page width is ~800-1200px, height varies but we limit to 600px for readability
- imgTag = f'
'
+ imgTag = f'
'
if captionEscaped:
return f'{imageMarker}{imgTag}{captionEscaped}'
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py
index 9bfe3788..fc6bd51b 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py
@@ -20,7 +20,7 @@ try:
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
- from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY, TA_RIGHT
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
@@ -28,12 +28,71 @@ except ImportError:
import re as _re_pdf
from ._pdfFontFallback import wrapEmojiSpansInXml as _wrapEmojiSpansInXml
+from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge as _deepMergeStyle
# A4 width in pt; margins must match SimpleDocTemplate(leftMargin/rightMargin)
_PDF_MARGIN_LR_PT = 72.0
_PDF_A4_WIDTH_PT = 595.27
_PDF_CONTENT_WIDTH_PT = _PDF_A4_WIDTH_PT - (2 * _PDF_MARGIN_LR_PT)
+# Font resolution: map CSS/system font names to ReportLab built-in equivalents.
+# ReportLab core fonts: Helvetica, Times-Roman, Courier, Symbol, ZapfDingbats.
+_FONT_FALLBACK_MAP = {
+ "calibri": "Helvetica",
+ "arial": "Helvetica",
+ "verdana": "Helvetica",
+ "segoe ui": "Helvetica",
+ "helvetica": "Helvetica",
+ "times new roman": "Times-Roman",
+ "times": "Times-Roman",
+ "georgia": "Times-Roman",
+ "consolas": "Courier",
+ "courier new": "Courier",
+ "courier": "Courier",
+ "monospace": "Courier",
+}
+_BOLD_VARIANT = {
+ "Helvetica": "Helvetica-Bold",
+ "Times-Roman": "Times-Bold",
+ "Courier": "Courier-Bold",
+}
+_registeredTtfFonts: set = set()
+
+
+def _resolveFontFamily(fontName: str, bold: bool = False) -> str:
+ """Resolve a CSS/system font name to a ReportLab-compatible font name.
+
+ Tries TTF registration from the system font dir first; on failure falls
+ back to the closest built-in core font.
+ """
+ if not fontName:
+ return "Helvetica-Bold" if bold else "Helvetica"
+ key = fontName.strip().lower()
+ if key in _registeredTtfFonts:
+ return f"{fontName}-Bold" if bold else fontName
+ if key not in _FONT_FALLBACK_MAP:
+ try:
+ from reportlab.pdfbase import pdfmetrics
+ from reportlab.pdfbase.ttfonts import TTFont
+ import os
+ winFontsDir = os.path.join(os.environ.get("WINDIR", r"C:\Windows"), "Fonts")
+ candidates = [
+ os.path.join(winFontsDir, f"{fontName}.ttf"),
+ os.path.join(winFontsDir, f"{fontName.lower()}.ttf"),
+ f"/usr/share/fonts/truetype/{fontName.lower()}/{fontName.lower()}.ttf",
+ ]
+ for path in candidates:
+ if os.path.isfile(path):
+ pdfmetrics.registerFont(TTFont(fontName, path))
+ _registeredTtfFonts.add(key)
+ return fontName
+ except Exception:
+ pass
+ coreFont = _FONT_FALLBACK_MAP.get(key, "Helvetica")
+ if bold:
+ return _BOLD_VARIANT.get(coreFont, f"{coreFont}-Bold")
+ return coreFont
+
def _boxDrawingCharToAscii(ch: str) -> str:
"""Map one box-drawing character to ASCII (Courier has no glyphs for U+2500–U+257F)."""
@@ -170,25 +229,23 @@ class RendererPdf(BaseRenderer):
# memory simultaneously. Collected here, deleted after the build.
self._tempImageFiles = []
try:
- # Get style set from unified style or legacy approach
+ self._unifiedStyle = unifiedStyle
if unifiedStyle:
styles = self._convertUnifiedStyleToInternal(unifiedStyle)
- self._unifiedStyle = unifiedStyle
- for level in range(1, 7):
- hKey = f"heading{level}"
- if hKey not in styles:
- styles[hKey] = self._defaultHeadingStyleDef(level)
- else:
- styles[hKey].setdefault("space_after", 12)
- styles[hKey].setdefault("space_before", 12)
- styles["paragraph"].setdefault("space_after", 6)
- styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2))
- styles["bullet_list"].setdefault("space_after", 3)
- styles["code_block"].setdefault("space_after", 6)
- styles["code_block"].setdefault("align", "left")
else:
- styles = await self._getStyleSet(json_content, userPrompt, aiService)
- self._unifiedStyle = None
+ styles = self._convertUnifiedStyleToInternal({})
+ for level in range(1, 7):
+ hKey = f"heading{level}"
+ if hKey not in styles:
+ styles[hKey] = self._defaultHeadingStyleDef(level)
+ else:
+ styles[hKey].setdefault("space_after", 12)
+ styles[hKey].setdefault("space_before", 12)
+ styles["paragraph"].setdefault("space_after", 6)
+ styles["paragraph"].setdefault("line_height", (unifiedStyle or {}).get("paragraph", {}).get("lineSpacing", 1.5))
+ styles["bullet_list"].setdefault("space_after", 3)
+ styles["code_block"].setdefault("space_after", 6)
+ styles["code_block"].setdefault("align", "left")
# Validate JSON structure
if not self._validateJsonStructure(json_content):
@@ -307,247 +364,6 @@ class RendererPdf(BaseRenderer):
if not removed:
raise
- async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
- """Get style set - use styles from document generation metadata if available,
- otherwise enhance default styles with AI if userPrompt provided.
-
- WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
- not be generated separately by renderers. Only fall back to AI if styles not provided.
-
- Args:
- extractedContent: Document content with metadata (may contain styles)
- userPrompt: User's prompt (AI will detect style instructions in any language)
- aiService: AI service (used only if styles not in metadata and userPrompt provided)
- templateName: Name of template style set (None = default)
-
- Returns:
- Dict with style definitions for all document styles
- """
- # Get default style set
- defaultStyleSet = self._getDefaultStyleSet()
-
- # FIRST: Check if styles are provided in document generation metadata (preferred approach)
- if extractedContent:
- metadata = extractedContent.get("metadata", {})
- if isinstance(metadata, dict):
- styles = metadata.get("styles")
- if styles and isinstance(styles, dict):
- self.logger.debug("Using styles from document generation metadata")
- enhancedStyleSet = self._convertColorsFormat(styles)
- return self._validateStylesContrast(enhancedStyleSet)
-
- # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
- if userPrompt and aiService:
- self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
- enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
- # Convert colors to PDF format after getting styles
- enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
- return self._validateStylesContrast(enhancedStyleSet)
- else:
- # Use default styles only
- return defaultStyleSet
-
- async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
- """Enhance default styles with AI based on user prompt."""
- try:
- style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet)
- enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet)
- return enhanced_styles
- except Exception as e:
- self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
- return defaultStyleSet
-
- def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
- """Validate and fix contrast issues in AI-generated styles."""
- try:
- # Fix table header contrast
- if "table_header" in styles:
- header = styles["table_header"]
- bg_color = header.get("background", "#FFFFFF")
- text_color = header.get("text_color", "#000000")
-
- # If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
- header["background"] = "#4F4F4F"
- header["text_color"] = "#FFFFFF"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
- header["background"] = "#4F4F4F"
- header["text_color"] = "#FFFFFF"
-
- # Fix table cell contrast
- if "table_cell" in styles:
- cell = styles["table_cell"]
- bg_color = cell.get("background", "#FFFFFF")
- text_color = cell.get("text_color", "#000000")
-
- # If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
- cell["background"] = "#FFFFFF"
- cell["text_color"] = "#2F2F2F"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
- cell["background"] = "#FFFFFF"
- cell["text_color"] = "#2F2F2F"
-
- return styles
-
- except Exception as e:
- self.logger.warning(f"Style validation failed: {str(e)}")
- return self._getDefaultStyleSet()
-
- def _getDefaultStyleSet(self) -> Dict[str, Any]:
- """Default PDF style set - used when no style instructions present."""
- return {
- "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
- # Markdown #..###### — sizes must strictly decrease (H1 largest … H6 smallest).
- "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
- "heading2": {"font_size": 15, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 10, "space_before": 10},
- "heading3": {"font_size": 13, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
- "heading4": {"font_size": 12, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
- "heading5": {"font_size": 11, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6},
- "heading6": {"font_size": 10, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 4, "space_before": 4},
- "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
- "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "left", "font_size": 12},
- "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
- "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
- "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6, "align": "left"}
- }
-
- async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
- """Get AI styles with proper PDF color conversion."""
- if not ai_service:
- return default_styles
-
- try:
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
-
- request_options = AiCallOptions()
- request_options.operationType = OperationTypeEnum.DATA_GENERATE
-
- request = AiCallRequest(prompt=style_template, context="", options=request_options)
-
- # Check if AI service is properly configured
- if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
- self.logger.warning("AI service not properly configured, using defaults")
- return default_styles
-
- response = await ai_service.callAi(request)
-
- # Check if response is valid
- if not response:
- self.logger.warning("AI service returned no response, using defaults")
- return default_styles
-
- import json
- import re
-
- # Clean and parse JSON
- result = response.content.strip() if response and response.content else ""
-
- # Check if result is empty
- if not result:
- self.logger.warning("AI styling returned empty response, using defaults")
- return default_styles
-
- # Log the raw response for debugging
- self.logger.debug(f"AI styling raw response: {result[:200]}...")
-
- # Extract JSON from various formats
- json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
- if json_match:
- result = json_match.group(1).strip()
- elif result.startswith('```json'):
- result = re.sub(r'^```json\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
- elif result.startswith('```'):
- result = re.sub(r'^```\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
-
- # Try to extract JSON from explanatory text
- json_patterns = [
- r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object
- r'\{.*?"title".*?\}', # JSON with title field
- r'\{.*?"font_size".*?\}', # JSON with font_size field
- ]
-
- for pattern in json_patterns:
- json_match = re.search(pattern, result, re.DOTALL)
- if json_match:
- result = json_match.group(0)
- break
-
- # Additional cleanup - remove any leading/trailing whitespace and newlines
- result = result.strip()
-
- # Check if result is still empty after cleanup
- if not result:
- self.logger.warning("AI styling returned empty content after cleanup, using defaults")
- return default_styles
-
- # Try to parse JSON
- try:
- styles = json.loads(result)
- self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
- except json.JSONDecodeError as json_error:
- self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
-
- # Use print instead of logger to avoid truncation
- self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
- self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
-
- self.logger.warning(f"Raw content that failed to parse: {result}")
-
- # Try to fix incomplete JSON by adding missing closing braces
- open_braces = result.count('{')
- close_braces = result.count('}')
-
- if open_braces > close_braces:
- # JSON is incomplete, add missing closing braces
- missing_braces = open_braces - close_braces
- result = result + '}' * missing_braces
- self.logger.info(f"Added {missing_braces} missing closing brace(s)")
-
- # Try parsing the fixed JSON
- try:
- styles = json.loads(result)
- self.logger.info("Successfully fixed incomplete JSON")
- except json.JSONDecodeError as fix_error:
- self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
- # Try to extract just the JSON part if it's embedded in text
- json_start = result.find('{')
- json_end = result.rfind('}')
- if json_start != -1 and json_end != -1 and json_end > json_start:
- json_part = result[json_start:json_end+1]
- try:
- styles = json.loads(json_part)
- self.logger.info("Successfully extracted JSON from explanatory text")
- except json.JSONDecodeError:
- self.logger.warning("Could not extract valid JSON from response, using defaults")
- return default_styles
- else:
- return default_styles
- else:
- # Try to extract just the JSON part if it's embedded in text
- json_start = result.find('{')
- json_end = result.rfind('}')
- if json_start != -1 and json_end != -1 and json_end > json_start:
- json_part = result[json_start:json_end+1]
- try:
- styles = json.loads(json_part)
- self.logger.info("Successfully extracted JSON from explanatory text")
- except json.JSONDecodeError:
- self.logger.warning("Could not extract valid JSON from response, using defaults")
- return default_styles
- else:
- return default_styles
-
- # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
- styles = self._convertColorsFormat(styles)
-
- return styles
-
- except Exception as e:
- self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
- return default_styles
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert colors to proper format for PDF compatibility."""
@@ -580,9 +396,13 @@ class RendererPdf(BaseRenderer):
sizes = {1: 18, 2: 15, 3: 13, 4: 12, 5: 11, 6: 10}
fs = sizes.get(level, 10)
sb = max(4, 14 - level)
+ us = getattr(self, '_unifiedStyle', None) or {}
+ clrs = us.get("colors", {})
+ primary = clrs.get("primary", "#24292e")
+ secondary = clrs.get("secondary", "#586069")
return {
"font_size": fs,
- "color": "#2F2F2F" if level <= 2 else "#4F4F4F",
+ "color": primary if level <= 2 else secondary,
"bold": True,
"align": "left",
"space_after": sb,
@@ -594,14 +414,19 @@ class RendererPdf(BaseRenderer):
title_style_def = styles.get("title") or {}
fs = title_style_def.get("font_size", 26)
bold = title_style_def.get("bold", True)
+ us = getattr(self, '_unifiedStyle', None)
+ primaryFont = us["fonts"]["primary"] if us else "Calibri"
+ coverTitleColor = styles.get("cover_page", {}).get("title_color")
+ colorsFallback = styles.get("colors", {}).get("primary", "#24292e")
+ titleColor = title_style_def.get("color", coverTitleColor or colorsFallback)
return ParagraphStyle(
"DocumentTitle",
- fontName="Helvetica-Bold" if bold else "Helvetica",
+ fontName=_resolveFontFamily(primaryFont, bold),
fontSize=fs,
spaceAfter=title_style_def.get("space_after", 18),
spaceBefore=title_style_def.get("space_before", 0),
alignment=self._getAlignment(title_style_def.get("align", "center")),
- textColor=self._hexToColor(title_style_def.get("color", "#1F3864")),
+ textColor=self._hexToColor(titleColor),
leading=fs * 1.25,
)
@@ -611,28 +436,32 @@ class RendererPdf(BaseRenderer):
heading_style_def = styles.get(heading_key) or self._defaultHeadingStyleDef(level)
fs = heading_style_def.get("font_size", self._defaultHeadingStyleDef(level)["font_size"])
bold = heading_style_def.get("bold", True)
+ us = getattr(self, '_unifiedStyle', None)
+ primaryFont = us["fonts"]["primary"] if us else "Calibri"
return ParagraphStyle(
f'CustomHeading{level}',
- fontName="Helvetica-Bold" if bold else "Helvetica",
+ fontName=_resolveFontFamily(primaryFont, bold),
fontSize=fs,
spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12),
alignment=self._getAlignment(heading_style_def.get("align", "left")),
- textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F")),
+ textColor=self._hexToColor(heading_style_def.get("color", styles.get("colors", {}).get("primary", "#24292e"))),
leading=fs * 1.35,
)
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {})
-
+ us = getattr(self, '_unifiedStyle', None)
+ primaryFont = us["fonts"]["primary"] if us else "Calibri"
return ParagraphStyle(
'CustomNormal',
+ fontName=_resolveFontFamily(primaryFont, False),
fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6),
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
- textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
- leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
+ textColor=self._hexToColor(paragraph_style_def.get("color", "#24292e")),
+ leading=paragraph_style_def.get("line_height", 1.5) * paragraph_style_def.get("font_size", 11)
)
def _getAlignment(self, align: str) -> int:
@@ -644,10 +473,10 @@ class RendererPdf(BaseRenderer):
"center": TA_CENTER,
"left": TA_LEFT,
"justify": TA_JUSTIFY,
- "right": TA_LEFT, # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
- "0": TA_LEFT, # Handle numeric strings
+ "right": TA_RIGHT,
+ "0": TA_LEFT,
"1": TA_CENTER,
- "2": TA_JUSTIFY
+ "2": TA_JUSTIFY,
}
return align_map.get(align.lower().strip(), TA_LEFT)
@@ -687,7 +516,7 @@ class RendererPdf(BaseRenderer):
"""Convert inline runs to ReportLab Paragraph XML."""
parts = []
us = getattr(self, '_unifiedStyle', None)
- monoFont = us["fonts"]["monospace"] if us else "Courier"
+ monoFont = _resolveFontFamily(us["fonts"]["monospace"] if us else "Courier")
for run in runs:
runType = run.get("type", "text")
value = self._escapeReportlabXml(run.get("value", ""))
@@ -730,13 +559,15 @@ class RendererPdf(BaseRenderer):
if not text:
return ""
text = _normalizePdfMonospaceText(text)
+ us = getattr(self, '_unifiedStyle', None)
+ monoFont = _resolveFontFamily(us["fonts"]["monospace"] if us else "Courier")
out: List[str] = []
pos = 0
for m in _re_pdf.finditer(r"`([^`]*)`", text):
before = text[pos:m.start()]
out.append(self._applyInlineMarkdownToEscapedPlain(before))
code = m.group(1)
- out.append(f'{self._escapeReportlabXml(code)}')
+ out.append(f'{self._escapeReportlabXml(code)}')
pos = m.end()
out.append(self._applyInlineMarkdownToEscapedPlain(text[pos:]))
return _wrapEmojiSpansInXml("".join(out))
@@ -750,16 +581,75 @@ class RendererPdf(BaseRenderer):
"""Paragraph style for table cells (word wrap within colWidth)."""
tdef = styles.get(tableStyleKey, {})
fs = tdef.get("font_size", 12 if header else 10)
- defaultTc = "#FFFFFF" if header else "#2F2F2F"
+ defaultTc = "#24292e"
+ us = getattr(self, '_unifiedStyle', None)
+ primaryFont = us["fonts"]["primary"] if us else "Calibri"
+ isBold = header and tdef.get("bold", True)
return ParagraphStyle(
f"TblCell{'H' if header else 'B'}{tableStyleKey}",
fontSize=fs,
leading=fs * 1.25,
- alignment=TA_LEFT,
+ alignment=self._getAlignment(tdef.get("align", "left")),
textColor=self._hexToColor(tdef.get("text_color", defaultTc)),
- fontName="Helvetica-Bold" if header and tdef.get("bold", True) else "Helvetica",
+ fontName=_resolveFontFamily(primaryFont, isBold),
)
+ def _createCaptionStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
+ """Paragraph style for image/figure captions driven by styles["caption"]."""
+ captionDef = styles.get("caption", {})
+ us = getattr(self, '_unifiedStyle', None)
+ primaryFont = us["fonts"]["primary"] if us else "Calibri"
+ fs = captionDef.get("font_size", 10)
+ colorFallback = styles.get("colors", {}).get("secondary", "#586069")
+ return ParagraphStyle(
+ "CaptionStyle",
+ fontName=_resolveFontFamily(primaryFont, False),
+ fontSize=fs,
+ leading=fs * 1.25,
+ textColor=self._hexToColor(captionDef.get("color", colorFallback)),
+ alignment=self._getAlignment(captionDef.get("align", "center")),
+ spaceAfter=4,
+ )
+
+ def _inferColumnAlignments(self, headers: List, rows: List, mergedTableStyle: Dict[str, Any]) -> List[str]:
+ """Infer per-column text alignment from explicit style or cell content heuristics.
+
+ Numeric-majority columns (>60 %) get right-aligned; everything else left.
+ An explicit ``columnAlignments`` list in *mergedTableStyle* takes precedence.
+ """
+ numCols = len(headers)
+ explicit = mergedTableStyle.get("columnAlignments", [])
+ if explicit and len(explicit) >= numCols:
+ return list(explicit[:numCols])
+ alignments = list(explicit) if explicit else []
+ for colIdx in range(len(alignments), numCols):
+ numericCount = 0
+ totalCount = 0
+ for row in rows:
+ if colIdx < len(row):
+ cell = row[colIdx]
+ if isinstance(cell, list):
+ val = "".join(
+ r.get("value", "") if isinstance(r, dict) else str(r) for r in cell
+ ).strip()
+ elif cell is not None:
+ val = str(cell).strip()
+ else:
+ val = ""
+ if val:
+ totalCount += 1
+ cleaned = val.replace(",", "").replace("%", "").replace("$", "").replace("\u20ac", "").replace("'", "").strip()
+ try:
+ float(cleaned)
+ numericCount += 1
+ except (ValueError, TypeError):
+ pass
+ if totalCount > 0 and numericCount / totalCount > 0.6:
+ alignments.append("right")
+ else:
+ alignments.append("left")
+ return alignments
+
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
@@ -841,7 +731,12 @@ class RendererPdf(BaseRenderer):
return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))]
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
- """Render a JSON table: left-aligned, width capped to printable area, cells wrap."""
+ """Render a JSON table: left-aligned, width capped to printable area, cells wrap.
+
+ Supports per-table style overrides via ``content["tableStyle"]``, border
+ style variants (grid / horizontal / none), banding toggle, configurable
+ cell padding, and auto-inferred column alignments.
+ """
try:
content = table_data.get("content", {})
if not isinstance(content, dict):
@@ -852,12 +747,30 @@ class RendererPdf(BaseRenderer):
if not headers or not rows:
return []
+ # Per-table style override merged onto global table style
+ us = getattr(self, '_unifiedStyle', None) or {}
+ globalTableStyle = us.get("table", {})
+ perTableOverride = content.get("tableStyle", {})
+ mergedTableStyle = _deepMergeStyle(globalTableStyle, perTableOverride) if perTableOverride else dict(globalTableStyle)
+
numCols = len(headers)
colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
colWidths = [colWidth] * numCols
+ colAligns = self._inferColumnAlignments(headers, rows, mergedTableStyle)
+
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
- cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
+
+ cellBasePs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
+ colCellStyles: List[ParagraphStyle] = []
+ for colIdx in range(numCols):
+ colAlign = colAligns[colIdx] if colIdx < len(colAligns) else "left"
+ colPs = ParagraphStyle(
+ f"TblCellB_c{colIdx}",
+ parent=cellBasePs,
+ alignment=self._getAlignment(colAlign),
+ )
+ colCellStyles.append(colPs)
def _cellPara(cell, ps):
runs = self._inlineRunsForCell(cell)
@@ -871,28 +784,45 @@ class RendererPdf(BaseRenderer):
for row in rows:
padded = list(row) + [""] * max(0, numCols - len(row))
padded = padded[:numCols]
- bodyRows.append([_cellPara(c, cellPs) for c in padded])
+ bodyRows.append([_cellPara(padded[i], colCellStyles[i]) for i in range(numCols)])
table_matrix = [headerRow] + bodyRows
table = Table(table_matrix, colWidths=colWidths, repeatRows=1)
table_header_style = styles.get("table_header", {})
- table_cell_style = styles.get("table_cell", {})
+ borderColor = self._hexToColor(mergedTableStyle.get("borderColor", "#e1e4e8"))
+ borderWidth = mergedTableStyle.get("borderWidthPt", 0.5)
+ evenBg = self._hexToColor(mergedTableStyle.get("rowBandingEven", "#f6f8fa"))
+ oddBg = self._hexToColor(mergedTableStyle.get("rowBandingOdd", "#FFFFFF"))
+ cellPad = mergedTableStyle.get("cellPaddingPt", 4)
- table_style = [
- ("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))),
- ("BACKGROUND", (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))),
+ tableStyleCmds = [
+ ("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#f6f8fa"))),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("VALIGN", (0, 0), (-1, -1), "TOP"),
- ("LEFTPADDING", (0, 0), (-1, -1), 4),
- ("RIGHTPADDING", (0, 0), (-1, -1), 4),
- ("TOPPADDING", (0, 0), (-1, 0), 6),
- ("BOTTOMPADDING", (0, 0), (-1, 0), 8),
- ("TOPPADDING", (0, 1), (-1, -1), 4),
- ("BOTTOMPADDING", (0, 1), (-1, -1), 4),
- ("GRID", (0, 0), (-1, -1), 0.5, colors.black),
+ ("LEFTPADDING", (0, 0), (-1, -1), cellPad),
+ ("RIGHTPADDING", (0, 0), (-1, -1), cellPad),
+ ("TOPPADDING", (0, 0), (-1, 0), cellPad + 2),
+ ("BOTTOMPADDING", (0, 0), (-1, 0), cellPad + 4),
+ ("TOPPADDING", (0, 1), (-1, -1), cellPad),
+ ("BOTTOMPADDING", (0, 1), (-1, -1), cellPad),
]
- table.setStyle(TableStyle(table_style))
+
+ borderStyleName = mergedTableStyle.get("borderStyle", "grid")
+ if borderStyleName == "grid":
+ tableStyleCmds.append(("GRID", (0, 0), (-1, -1), borderWidth, borderColor))
+ elif borderStyleName == "horizontal":
+ tableStyleCmds.append(("LINEABOVE", (0, 0), (-1, 0), borderWidth, borderColor))
+ for rowIdx in range(len(table_matrix)):
+ tableStyleCmds.append(("LINEBELOW", (0, rowIdx), (-1, rowIdx), borderWidth, borderColor))
+
+ bandingEnabled = mergedTableStyle.get("bandingEnabled", True)
+ if bandingEnabled:
+ for rowIdx in range(1, len(table_matrix)):
+ bg = evenBg if rowIdx % 2 == 0 else oddBg
+ tableStyleCmds.append(("BACKGROUND", (0, rowIdx), (-1, rowIdx), bg))
+
+ table.setStyle(TableStyle(tableStyleCmds))
return [table, Spacer(1, 12)]
except Exception as e:
@@ -911,7 +841,7 @@ class RendererPdf(BaseRenderer):
bulletStyle = ParagraphStyle(
"BulletItem",
fontSize=bulletStyleDef.get("font_size", 11),
- textColor=self._hexToColor(bulletStyleDef.get("color", "#333333")),
+ textColor=self._hexToColor(bulletStyleDef.get("color", styles.get("colors", {}).get("primary", "#24292e"))),
leftIndent=indent,
firstLineIndent=-indent,
spaceAfter=2,
@@ -1006,11 +936,13 @@ class RendererPdf(BaseRenderer):
fs = code_style_def.get("font_size", 9)
mono = code_style_def.get("font", "Courier")
+ textColorFallback = styles.get("colors", {}).get("primary", "#24292e")
+
if language:
lang_style = ParagraphStyle(
"CodeLanguage",
fontSize=fs,
- textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
+ textColor=self._hexToColor(code_style_def.get("color", textColorFallback)),
fontName="Helvetica-Bold",
alignment=TA_LEFT,
)
@@ -1024,7 +956,7 @@ class RendererPdf(BaseRenderer):
approxCharWPt = max(fs * 0.52, 4.5)
usableWidth = _PDF_CONTENT_WIDTH_PT - 16 # left+right padding
maxLineChars = max(48, int(usableWidth / approxCharWPt))
- bg_col = self._hexToColor(code_style_def.get("background", "#F5F5F5"))
+ bg_col = self._hexToColor(code_style_def.get("background", "#f6f8fa"))
leading = fs * 1.2
spaceAfter = code_style_def.get("space_after", 6)
@@ -1054,17 +986,19 @@ class RendererPdf(BaseRenderer):
fontName=mono,
fontSize=fs,
leading=leading,
- textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
+ textColor=self._hexToColor(code_style_def.get("color", textColorFallback)),
alignment=TA_LEFT,
leftIndent=0,
rightIndent=0,
)
pf = Preformatted(chunkText, codePrStyle, dedent=0, maxLineLength=maxLineChars)
+ borderCol = self._hexToColor(code_style_def.get("border_color", "#e1e4e8"))
tbl = Table([[pf]], colWidths=[_PDF_CONTENT_WIDTH_PT])
tbl.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, -1), bg_col),
+ ("BOX", (0, 0), (-1, -1), 0.5, borderCol),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("LEFTPADDING", (0, 0), (-1, -1), 8),
("RIGHTPADDING", (0, 0), (-1, -1), 8),
@@ -1103,11 +1037,26 @@ class RendererPdf(BaseRenderer):
if title:
out.append(self._paragraphFromInlineMarkdown(title, self._createDocumentTitleStyle(styles)))
out.append(Spacer(1, 18))
- for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)):
+ coverDef = styles.get("cover_page", {})
+ coverSizes = {
+ "subtitle": coverDef.get("subtitle_size", 16),
+ "author": coverDef.get("author_size", 12),
+ "date": coverDef.get("date_size", 12),
+ }
+ coverColors = {
+ "subtitle": coverDef.get("subtitle_color"),
+ "author": None,
+ "date": None,
+ }
+ for key in ("subtitle", "author", "date"):
val = (content.get(key) or "").strip()
if not val:
continue
- st = ParagraphStyle(f"cover_{key}", parent=self._createNormalStyle(styles), alignment=1, fontSize=sizePt)
+ sizePt = coverSizes[key]
+ kwargs: Dict[str, Any] = {"alignment": 1, "fontSize": sizePt}
+ if coverColors[key]:
+ kwargs["textColor"] = self._hexToColor(coverColors[key])
+ st = ParagraphStyle(f"cover_{key}", parent=self._createNormalStyle(styles), **kwargs)
out.append(Paragraph(self._escapeReportlabXml(val), st))
out.append(Spacer(1, 8))
out.append(PageBreak())
@@ -1310,26 +1259,27 @@ class RendererPdf(BaseRenderer):
# Add caption if available
if caption:
- captionStyle = self._createNormalStyle(styles)
- captionStyle.fontSize = 10
- captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
- elements.append(Paragraph(f"{caption}", captionStyle))
+ captionDef = styles.get("caption", {})
+ capStyle = self._createCaptionStyle(styles)
+ capXml = self._escapeReportlabXml(caption)
+ if captionDef.get("italic", True):
+ capXml = f"{capXml}"
+ elements.append(Paragraph(capXml, capStyle))
elif alt_text and alt_text != "Image":
- # Use alt text as caption if no caption provided, but avoid usageHint format
if "Render as visual element:" in alt_text:
- # Extract filename from usageHint if possible
parts = alt_text.split("Render as visual element:")
if len(parts) > 1:
- filename = parts[1].strip()
- caption_text = f"Figure: {filename}"
+ caption_text = f"Figure: {parts[1].strip()}"
else:
caption_text = alt_text
else:
caption_text = f"Figure: {alt_text}"
- captionStyle = self._createNormalStyle(styles)
- captionStyle.fontSize = 10
- captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
- elements.append(Paragraph(f"{caption_text}", captionStyle))
+ captionDef = styles.get("caption", {})
+ capStyle = self._createCaptionStyle(styles)
+ capXml = self._escapeReportlabXml(caption_text)
+ if captionDef.get("italic", True):
+ capXml = f"{capXml}"
+ elements.append(Paragraph(capXml, capStyle))
return elements
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py
index 49ee8048..7064ea1e 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py
@@ -9,6 +9,7 @@ from datetime import datetime, UTC
from typing import Dict, Any, Optional, List
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
+from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge
logger = logging.getLogger(__name__)
@@ -22,6 +23,16 @@ _PPTX_MD_INLINE_RE = re.compile(
)
+def _parseHexColor(hexStr: str) -> tuple:
+ """Parse a hex color string like '#1F3864' into an RGB tuple."""
+ hexStr = hexStr.lstrip('#')
+ if len(hexStr) == 6:
+ return (int(hexStr[0:2], 16), int(hexStr[2:4], 16), int(hexStr[4:6], 16))
+ elif len(hexStr) == 8:
+ return (int(hexStr[2:4], 16), int(hexStr[4:6], 16), int(hexStr[6:8], 16))
+ return (0, 0, 0)
+
+
class RendererPptx(BaseRenderer):
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
@@ -74,26 +85,18 @@ class RendererPptx(BaseRenderer):
Base64-encoded PowerPoint presentation as string
"""
try:
- # Import python-pptx
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
import re
- # Get style set: prefer unified style, then metadata, then AI-enhanced
- if style:
- internalStyle = self._convertUnifiedStyleToInternal(style)
- defaultPptx = self._getDefaultStyleSet()
- for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"):
- internalStyle[key] = defaultPptx.get(key)
- internalStyle["heading"] = internalStyle["heading1"]
- internalStyle["subheading"] = internalStyle["heading2"]
- styles = internalStyle
- self._unifiedStyle = style
- else:
- styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
- self._unifiedStyle = None
+ if not style:
+ style = {}
+ internalStyle = self._convertUnifiedStyleToInternal(style)
+ styles = internalStyle
+ self._styles = styles
+ self._unifiedStyle = style
# Create new presentation
prs = Presentation()
@@ -140,8 +143,9 @@ class RendererPptx(BaseRenderer):
p = tf.paragraphs[0]
p.font.size = Pt(titleStyle.get("font_size", 36))
p.font.bold = titleStyle.get("bold", True)
- tColor = self._getSafeColor(titleStyle.get("color", (31, 78, 121)))
+ tColor = self._getSafeColor(titleStyle.get("color", self._primaryColor()))
p.font.color.rgb = RGBColor(*tColor)
+ self._applyPrimaryFont(p.font)
except Exception as titleErr:
logger.warning(f"Could not style title slide: {titleErr}")
# Clear subtitle placeholder
@@ -207,23 +211,20 @@ class RendererPptx(BaseRenderer):
title_font_size = max(10, min(title_font_size, 32))
titleFrame.paragraphs[0].font.size = Pt(title_font_size)
titleFrame.paragraphs[0].font.bold = title_style.get("bold", True)
- title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
+ title_color = self._getSafeColor(title_style.get("color", self._primaryColor()))
titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT
titleFrame.word_wrap = True
+ self._applyPrimaryFont(titleFrame.paragraphs[0].font)
# Render sections with proper PowerPoint objects (tables, lists, etc.)
- # Organize content into frames for better layout
if hasSections:
- # Organize sections into content groups for frame-based layout
- # Images are handled within the frame rendering method
self._renderSlideContentWithFrames(slide, slide_sections, slide_images, styles, prs)
# Fallback: if no sections but has content text, render in textbox
elif slide_content and not hasImages:
- # Create textbox for content (no placeholders in blank layout)
from pptx.util import Inches
- title_height_used = Inches(1.0) # Title height for blank slides
+ title_height_used = Inches(1.0)
content_left = Inches(0.5)
content_top = title_height_used + Inches(0.3)
content_width = prs.slide_width - Inches(1)
@@ -233,7 +234,6 @@ class RendererPptx(BaseRenderer):
text_frame.word_wrap = True
text_frame.auto_size = None
- # Split content into paragraphs
paragraphs = slide_content.split('\n\n')
for paragraph in paragraphs:
@@ -241,10 +241,8 @@ class RendererPptx(BaseRenderer):
p = text_frame.add_paragraph()
p.text = paragraph.strip()
- # Apply AI-generated styling with adaptive sizing
paragraph_style = styles.get("paragraph", {})
base_font_size = paragraph_style.get("font_size", 18)
- # Calculate adaptive font size based on content length
try:
total_chars = len(slide_content)
chars_per_line = max(1, int(content_width / Pt(10)))
@@ -253,16 +251,16 @@ class RendererPptx(BaseRenderer):
font_multiplier = 1.0
if available_lines > 0 and lines_needed > available_lines:
font_multiplier = max(0.6, min(1.0, (available_lines / lines_needed) * 1.1))
- calculated_size = max(6, int(base_font_size * font_multiplier)) # Minimum 6pt
+ calculated_size = max(6, int(base_font_size * font_multiplier))
except (ZeroDivisionError, ValueError, TypeError):
- calculated_size = max(6, base_font_size) # Fallback to base size with minimum
+ calculated_size = max(6, base_font_size)
p.font.size = Pt(calculated_size)
p.font.bold = paragraph_style.get("bold", False)
- paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))
+ paragraph_color = self._getSafeColor(paragraph_style.get("color", self._paragraphColor()))
p.font.color.rgb = RGBColor(*paragraph_color)
+ self._applyPrimaryFont(p.font)
- # Apply alignment
align = paragraph_style.get("align", "left")
if align == "center":
p.alignment = PP_ALIGN.CENTER
@@ -283,8 +281,9 @@ class RendererPptx(BaseRenderer):
if title_shape.text_frame.paragraphs[0].font:
title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48))
title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
- title_color = self._getSafeColor(title_style.get("color", (31, 78, 121)))
+ title_color = self._getSafeColor(title_style.get("color", self._primaryColor()))
title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
+ self._applyPrimaryFont(title_shape.text_frame.paragraphs[0].font)
# Clear subtitle placeholder instead of adding filler text
try:
@@ -356,242 +355,38 @@ class RendererPptx(BaseRenderer):
)
]
- def _parseContentToSlides(self, content: str, title: str) -> list:
- """
- Parse content into slide data structure.
-
- Args:
- content: Content to parse
- title: Presentation title
-
- Returns:
- List of slide data dictionaries
- """
- slides = []
-
- # Split content by slide markers or headers
- slide_sections = self._splitContentIntoSlides(content)
-
- for i, section in enumerate(slide_sections):
- if section.strip():
- slide_data = {
- "title": f"Slide {i + 1}",
- "content": section.strip()
- }
-
- # Extract title from content if it starts with #
- lines = section.strip().split('\n')
- if lines and lines[0].startswith('#'):
- # Remove # symbols and clean up title
- slide_title = lines[0].lstrip('#').strip()
- slide_data["title"] = slide_title
- slide_data["content"] = '\n'.join(lines[1:]).strip()
- elif lines and lines[0].strip():
- # Use first line as title if it looks like a title
- first_line = lines[0].strip()
- if len(first_line) < 100 and not first_line.endswith('.'):
- slide_data["title"] = first_line
- slide_data["content"] = '\n'.join(lines[1:]).strip()
-
- slides.append(slide_data)
-
- return slides
-
- def _splitContentIntoSlides(self, content: str) -> list:
- """
- Split content into individual slides based on headers and structure.
-
- Args:
- content: Content to split
-
- Returns:
- List of slide content strings
- """
- # re is already imported at module level
-
- # First, try to split by major headers (# or ##)
- # This is the most common case for AI-generated content
- header_pattern = r'^(#{1,2})\s+(.+)$'
- lines = content.split('\n')
- slides = []
- current_slide = []
-
- for line in lines:
- # Check if this line is a header
- header_match = re.match(header_pattern, line.strip())
- if header_match:
- # If we have content in current slide, save it
- if current_slide:
- slide_content = '\n'.join(current_slide).strip()
- if slide_content:
- slides.append(slide_content)
- current_slide = []
-
- # Start new slide with this header
- current_slide.append(line)
- else:
- # Add line to current slide
- current_slide.append(line)
-
- # Add the last slide
- if current_slide:
- slide_content = '\n'.join(current_slide).strip()
- if slide_content:
- slides.append(slide_content)
-
- # If we found slides with headers, return them
- if len(slides) > 1:
- return slides
-
- # Fallback: Split by double newlines
- sections = content.split('\n\n\n')
- if len(sections) > 1:
- return [s.strip() for s in sections if s.strip()]
-
- # Another fallback: Split by double newlines
- sections = content.split('\n\n')
- if len(sections) > 1:
- return [s.strip() for s in sections if s.strip()]
-
- # Last resort: return as single slide
- return [content.strip()]
-
-
def getOutputMimeType(self) -> str:
"""Get MIME type for rendered output."""
return self.outputMimeType
-
- async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
- """Get style set - use styles from document generation metadata if available,
- otherwise enhance default styles with AI if userPrompt provided.
-
- WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
- not be generated separately by renderers. Only fall back to AI if styles not provided.
-
- Args:
- extractedContent: Document content with metadata (may contain styles)
- userPrompt: User's prompt (AI will detect style instructions in any language)
- aiService: AI service (used only if styles not in metadata and userPrompt provided)
- templateName: Name of template style set (None = default)
-
- Returns:
- Dict with style definitions for all document styles
- """
- # Get default style set
- defaultStyleSet = self._getDefaultStyleSet()
-
- # FIRST: Check if styles are provided in document generation metadata (preferred approach)
- if extractedContent:
- metadata = extractedContent.get("metadata", {})
- if isinstance(metadata, dict):
- styles = metadata.get("styles")
- if styles and isinstance(styles, dict):
- self.logger.debug("Using styles from document generation metadata")
- enhancedStyleSet = self._convertColorsFormat(styles)
- return self._validateStylesReadability(enhancedStyleSet)
-
- # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
- if userPrompt and aiService:
- self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
- enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
- # Colors already converted in _getAiStylesWithPptxColors
- return self._validateStylesReadability(enhancedStyleSet)
- else:
- # Use default styles only
- return defaultStyleSet
-
- async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
- """Enhance default styles with AI based on user prompt."""
- try:
- style_template = self._createProfessionalPptxTemplate(userPrompt, defaultStyleSet)
- enhanced_styles = await self._getAiStylesWithPptxColors(aiService, style_template, defaultStyleSet)
- return enhanced_styles
- except Exception as e:
- self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
- return defaultStyleSet
-
- def _validateStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
- """Validate and fix readability issues in AI-generated styles."""
- try:
- # Ensure minimum font sizes for PowerPoint readability
- min_font_sizes = {
- "title": 36,
- "heading": 24,
- "subheading": 20,
- "paragraph": 14,
- "bullet_list": 14,
- "table_header": 12,
- "table_cell": 12
- }
-
- for style_name, min_size in min_font_sizes.items():
- if style_name in styles:
- current_size = styles[style_name].get("font_size", 12)
- if current_size < min_size:
- styles[style_name]["font_size"] = min_size
-
- return styles
-
- except Exception as e:
- logger.warning(f"Style validation failed: {str(e)}")
- return self._getDefaultStyleSet()
-
- def _getDefaultStyleSet(self) -> Dict[str, Any]:
- """Default PowerPoint style set - used when no style instructions present."""
- return {
- "title": {"font_size": 32, "color": "#1B365D", "bold": True, "align": "left"},
- "heading": {"font_size": 24, "color": "#1B365D", "bold": True, "align": "left"},
- "subheading": {"font_size": 20, "color": "#4A90E2", "bold": True, "align": "left"},
- "paragraph": {"font_size": 14, "color": "#2F2F2F", "bold": False, "align": "left"},
- "bullet_list": {"font_size": 14, "color": "#2F2F2F", "indent": 20},
- "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"},
- "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"},
- "slide_size": "16:9",
- "content_per_slide": "concise",
- "design_theme": "corporate",
- "color_scheme": "professional",
- "background_style": "clean",
- "accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"],
- "professional_grade": True,
- "executive_ready": True
- }
-
- def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
- """Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
- # json is already imported at module level
- schema_json = json.dumps(style_schema, indent=4)
-
- return f"""Customize the JSON below for professional PowerPoint slides.
-User Request: {userPrompt or "Create professional corporate slides"}
+ def _primaryColor(self) -> tuple:
+ """Return the primary color from the internal styles as an RGB tuple."""
+ s = getattr(self, '_styles', None)
+ if s:
+ return self._getSafeColor(s.get("colors", {}).get("primary", "#24292e"))
+ return self._getSafeColor("#24292e")
-Rules:
-- Use professional colors (blues, grays, deep greens)
-- Large, readable font sizes
-- High contrast
-- Sophisticated color palettes
+ def _paragraphColor(self) -> tuple:
+ """Return the paragraph text color from the internal styles as an RGB tuple."""
+ s = getattr(self, '_styles', None)
+ if s:
+ return self._getSafeColor(s.get("paragraph", {}).get("color", "#24292e"))
+ return self._getSafeColor("#24292e")
-Return ONLY this JSON with your changes:
+ def _applyPrimaryFont(self, font) -> None:
+ """Set font.name to the primary font from unified style when available."""
+ us = getattr(self, '_unifiedStyle', None)
+ if us and isinstance(us.get("fonts"), dict):
+ primaryFont = us["fonts"].get("primary")
+ if primaryFont:
+ font.name = primaryFont
-{schema_json}
-
-JSON ONLY. NO OTHER TEXT."""
-
- async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
- """Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing."""
- if not aiService:
- return default_styles
-
- try:
- # Use base template method which handles debug file writing
- enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles)
-
- # Convert colors to PPTX format (RGB tuples)
- return self._convertColorsFormat(enhanced_styles)
-
- except Exception as e:
- self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults")
- return default_styles
+ def _monoFont(self) -> str:
+ """Return the monospace font name from the internal styles."""
+ s = getattr(self, '_styles', None)
+ if s:
+ return s.get("fonts", {}).get("monospace", "Consolas")
+ return "Consolas"
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to RGB format for PowerPoint compatibility."""
@@ -600,14 +395,13 @@ JSON ONLY. NO OTHER TEXT."""
if isinstance(style_config, dict):
for prop, value in style_config.items():
if isinstance(value, str) and value.startswith('#'):
- # Convert hex to RGB tuple for PowerPoint
hex_color = value.lstrip('#')
if len(hex_color) == 6:
r = int(hex_color[0:2], 16)
g = int(hex_color[2:4], 16)
b = int(hex_color[4:6], 16)
styles[style_name][prop] = (r, g, b)
- elif len(hex_color) == 8: # aRGB format
+ elif len(hex_color) == 8:
r = int(hex_color[2:4], 16)
g = int(hex_color[4:6], 16)
b = int(hex_color[6:8], 16)
@@ -628,7 +422,7 @@ JSON ONLY. NO OTHER TEXT."""
g = int(hex_color[2:4], 16)
b = int(hex_color[4:6], 16)
return (r, g, b)
- elif len(hex_color) == 8: # aRGB format
+ elif len(hex_color) == 8:
r = int(hex_color[2:4], 16)
g = int(hex_color[4:6], 16)
b = int(hex_color[6:8], 16)
@@ -661,7 +455,7 @@ JSON ONLY. NO OTHER TEXT."""
document_title = title if title else metadata.get("title", "Generated Document")
- # Title slide (clean — just the document title, no filler text)
+ # Title slide (clean - just the document title, no filler text)
slides.append({
"title": document_title,
"content": "",
@@ -682,7 +476,6 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.error(f"Error parsing JSON to slides: {str(e)}")
- # Return minimal fallback slides
return [
{
"title": title,
@@ -690,319 +483,6 @@ JSON ONLY. NO OTHER TEXT."""
}
]
- def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
- """Create a slide from a JSON section."""
- try:
- # Get section title from data or use default
- section_title = "Untitled Section"
- if section.get("content_type") == "heading":
- # Extract text from elements array - use nested content structure
- for element in section.get("elements", []):
- if isinstance(element, dict):
- content = element.get("content", {})
- if isinstance(content, dict):
- text = content.get("text", "")
- if text:
- section_title = text
- break
- elif section.get("title"):
- section_title = section.get("title")
-
- content_type = section.get("content_type", "paragraph")
- elements = section.get("elements", [])
-
- # Check for three content formats from Phase 5D in elements
- content_parts = []
- for element in elements:
- if not isinstance(element, dict):
- continue
-
- element_type = element.get("type", "")
-
- # Support three content formats from Phase 5D
- if element_type == "reference":
- # Document reference format
- doc_ref = element.get("documentReference", "")
- label = element.get("label", "Reference")
- content_parts.append(f"[Reference: {label}]")
- continue
- elif element_type == "extracted_text":
- # Extracted text format
- content = element.get("content", "")
- source = element.get("source", "")
- if content:
- source_text = f" (Source: {source})" if source else ""
- content_parts.append(f"{content}{source_text}")
- continue
-
- # Handle image sections specially
- if content_type == "image":
- # Extract image data from nested content structure
- images = []
- for element in elements:
- if isinstance(element, dict):
- # Extract from nested content structure
- content = element.get("content", {})
- if isinstance(content, dict):
- base64Data = content.get("base64Data")
- altText = content.get("altText", "Image")
- caption = content.get("caption", "")
- else:
- # Fallback to direct element fields
- base64Data = element.get("base64Data")
- altText = element.get("altText", "Image")
- caption = element.get("caption", "")
-
- if base64Data:
- images.append({
- "base64Data": base64Data,
- "altText": altText,
- "caption": caption
- })
-
- return {
- "title": section_title or (elements[0].get("content", {}).get("altText", "Image") if elements and isinstance(elements[0], dict) else "Image"),
- "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present
- "images": images
- }
-
- # Build slide content based on section type - iterate over elements and format each
- if not content_parts: # Only if we didn't process reference/extracted_text above
- for element in elements:
- if not isinstance(element, dict):
- continue
-
- element_type = element.get("type", "")
- # Use element type if available, otherwise fall back to section content_type
- if not element_type:
- element_type = content_type
-
- if element_type == "table":
- formatted = self._formatTableForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "bullet_list" or element_type == "list":
- formatted = self._formatListForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "heading":
- formatted = self._formatHeadingForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "paragraph":
- formatted = self._formatParagraphForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "code_block" or element_type == "code":
- formatted = self._formatCodeForSlide(element)
- if formatted:
- content_parts.append(formatted)
- else:
- # Fallback to paragraph formatting
- formatted = self._formatParagraphForSlide(element)
- if formatted:
- content_parts.append(formatted)
-
- # Combine content parts
- slide_content = "\n\n".join(filter(None, content_parts))
-
- return {
- "title": section_title,
- "content": slide_content,
- "images": [] # No images for non-image sections
- }
-
- except Exception as e:
- logger.warning(f"Error creating slide from section: {str(e)}")
- return None
-
- def _formatTableForSlide(self, element: Dict[str, Any]) -> str:
- """Format table data for slide presentation."""
- try:
- # Extract table data from element - handle nested content structure
- if not isinstance(element, dict):
- return ""
-
- # Extract from nested content structure
- content = element.get("content", {})
- if not isinstance(content, dict):
- return ""
-
- headers = content.get("headers", [])
- rows = content.get("rows", [])
-
- if not headers:
- return ""
-
- # Create table representation
- table_lines = []
-
- # Add headers
- header_line = " | ".join(str(h) for h in headers)
- table_lines.append(header_line)
-
- # Add separator
- separator = "-" * len(header_line)
- table_lines.append(separator)
-
- # Add data rows (limit based on content density)
- max_rows = 5 # Default limit
- for row in rows[:max_rows]:
- row_line = " | ".join(str(cell) for cell in row)
- table_lines.append(row_line)
-
- if len(rows) > max_rows:
- table_lines.append(f"... and {len(rows) - max_rows} more rows")
-
- return "\n".join(table_lines)
-
- except Exception as e:
- logger.warning(f"Error formatting table for slide: {str(e)}")
- return ""
-
- def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
- """Format list data for slide presentation."""
- try:
- # Extract from nested content structure
- content = list_data.get("content", {})
- if not isinstance(content, dict):
- return ""
- items = content.get("items", [])
-
- if not items:
- return ""
-
- # Create list representation
- list_lines = []
-
- for item in items:
- if isinstance(item, dict):
- text = item.get("text", "")
- list_lines.append(f"• {text}")
-
- # Add subitems (limit to 3 for readability)
- subitems = item.get("subitems", [])[:3]
- for subitem in subitems:
- if isinstance(subitem, dict):
- list_lines.append(f" - {subitem.get('text', '')}")
- else:
- list_lines.append(f" - {subitem}")
- else:
- list_lines.append(f"• {str(item)}")
-
- return "\n".join(list_lines)
-
- except Exception as e:
- logger.warning(f"Error formatting list for slide: {str(e)}")
- return ""
-
- def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
- """Format heading data for slide presentation."""
- try:
- # Extract from nested content structure
- content = heading_data.get("content", {})
- if not isinstance(content, dict):
- return ""
- text = content.get("text", "")
- level = content.get("level", 1)
-
- if text:
- return f"{'#' * level} {text}"
-
- return ""
-
- except Exception as e:
- logger.warning(f"Error formatting heading for slide: {str(e)}")
- return ""
-
- def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
- """Format paragraph data for slide presentation."""
- try:
- # Extract from nested content structure
- content = paragraph_data.get("content", {})
- if isinstance(content, dict):
- if content.get("inlineRuns"):
- text = "".join(r.get("value", "") for r in content["inlineRuns"])
- else:
- text = content.get("text", "")
- elif isinstance(content, str):
- text = content
- else:
- text = ""
-
- if text:
- max_length = 200
- if len(text) > max_length:
- text = text[:max_length] + "..."
-
- return text
-
- return ""
-
- except Exception as e:
- logger.warning(f"Error formatting paragraph for slide: {str(e)}")
- return ""
-
- def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
- """Format code data for slide presentation."""
- try:
- # Extract from nested content structure
- content = code_data.get("content", {})
- if not isinstance(content, dict):
- return ""
- code = content.get("code", "")
- language = content.get("language", "")
-
- if code:
- # Limit code length based on content density
- max_length = 100 # Default limit
- if len(code) > max_length:
- code = code[:max_length] + "..."
-
- if language:
- return f"Code ({language}):\n{code}"
- else:
- return f"Code:\n{code}"
-
- return ""
-
- except Exception as e:
- logger.warning(f"Error formatting code for slide: {str(e)}")
- return ""
-
- def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
- """Determine the best professional slide layout based on content."""
- try:
- content = slide_data.get("content", "")
- title = slide_data.get("title", "")
-
- if not content:
- return 0
-
- # Professional layout selection based on content
- if "|" in content and "-" in content:
- # Has both tables and lists - use content with caption for professional look
- return 2
- elif "|" in content:
- # Has tables - use content layout for clean table presentation
- return 1
- elif content.count("•") > 2:
- # Has many bullet points - use content layout for better readability
- return 1
- elif len(content) > 200:
- # Long content - use content layout for better text flow
- return 1
- elif title and len(title) > 20:
- # Long title - use title and content layout
- return 1
- else:
- # Default to title and content layout for professional appearance
- return 1
-
- except Exception as e:
- logger.warning(f"Error determining slide layout: {str(e)}")
- return 1 # Default to title and content layout
-
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections: each top-level heading creates a new slide.
@@ -1078,191 +558,6 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slides from sections: {str(e)}")
return []
- def _formatSectionContent(self, section: Dict[str, Any]) -> str:
- """Format section content for slide presentation."""
- try:
- content_type = section.get("content_type", "paragraph")
- elements = section.get("elements", [])
-
- # Image sections return empty content (handled separately)
- if content_type == "image":
- return ""
-
- # Process each element in the section - use element type, not section type
- content_parts = []
- for element in elements:
- if not isinstance(element, dict):
- continue
-
- element_type = element.get("type", "")
- # Use element type if available, otherwise fall back to section content_type
- if not element_type:
- element_type = content_type
-
- if element_type == "table":
- formatted = self._formatTableForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "bullet_list" or element_type == "list":
- formatted = self._formatListForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "heading":
- formatted = self._formatHeadingForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "paragraph":
- formatted = self._formatParagraphForSlide(element)
- if formatted:
- content_parts.append(formatted)
- elif element_type == "code_block" or element_type == "code":
- formatted = self._formatCodeForSlide(element)
- if formatted:
- content_parts.append(formatted)
- else:
- # Fallback to paragraph formatting
- formatted = self._formatParagraphForSlide(element)
- if formatted:
- content_parts.append(formatted)
-
- return "\n\n".join(filter(None, content_parts))
-
- except Exception as e:
- logger.warning(f"Error formatting section content: {str(e)}")
- return ""
-
- def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None:
- """Add images to a PowerPoint slide."""
- try:
- from pptx.util import Inches, Pt
- from pptx.enum.text import PP_ALIGN
- from pptx.dml.color import RGBColor
- import base64
- import io
-
- if not images:
- return
-
- # Get slide dimensions from presentation
- if hasattr(self, '_currentPresentation'):
- prs = self._currentPresentation
- else:
- prs = slide.presentation
- slideWidth = prs.slide_width
- slideHeight = prs.slide_height
- titleHeight = Inches(1.5) # Approximate title height
-
- # Available area for images
- availableWidth = slideWidth - Inches(1) # Margins
- availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins
-
- # Position images
- if len(images) == 1:
- # Single image: center it
- img = images[0]
- base64Data = img.get("base64Data")
- # Validate base64Data is present and not empty
- if not base64Data or not isinstance(base64Data, str) or len(base64Data.strip()) == 0:
- logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}")
- return
-
- try:
- imageBytes = base64.b64decode(base64Data)
- if len(imageBytes) == 0:
- logger.error("Decoded image bytes are empty")
- return
- imageStream = io.BytesIO(imageBytes)
- except Exception as decode_error:
- logger.error(f"Failed to decode base64 image data: {str(decode_error)}")
- return
-
- # Get image dimensions
- try:
- from PIL import Image as PILImage
- pilImage = PILImage.open(imageStream)
- imgWidth, imgHeight = pilImage.size
-
- # Scale to fit available space (max 90% of slide for better visibility)
- # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96)
- # Conversion: pixels * (72/96) = points
- imgWidthPoints = imgWidth * (72.0 / 96.0)
- imgHeightPoints = imgHeight * (72.0 / 96.0)
-
- maxWidth = availableWidth * 0.9
- maxHeight = availableHeight * 0.9
-
- scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0)
- finalWidth = imgWidthPoints * scale
- finalHeight = imgHeightPoints * scale
-
- # Center image
- left = (slideWidth - finalWidth) / 2
- top = titleHeight + (availableHeight - finalHeight) / 2
-
- imageStream.seek(0)
- except Exception:
- # Fallback: use default size
- finalWidth = Inches(6)
- finalHeight = Inches(4.5)
- left = (slideWidth - finalWidth) / 2
- top = titleHeight + Inches(1)
- imageStream.seek(0)
-
- # Add image to slide
- try:
- slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
- except Exception as add_error:
- # If add_picture fails, try with explicit format
- imageStream.seek(0)
- # Ensure we have valid image data
- if len(imageBytes) > 0:
- slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight)
- else:
- raise Exception(f"Empty image data: {add_error}")
-
- # Add caption if available
- caption = img.get("caption") or img.get("altText")
- if caption and caption != "Image":
- # Add text box below image
- captionTop = top + finalHeight + Inches(0.2)
- captionBox = slide.shapes.add_textbox(
- Inches(1),
- captionTop,
- slideWidth - Inches(2),
- Inches(0.5)
- )
- captionFrame = captionBox.text_frame
- captionFrame.text = caption
- captionFrame.paragraphs[0].font.size = Pt(12)
- captionFrame.paragraphs[0].font.italic = True
- captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
- else:
- # Multiple images: arrange in grid
- cols = 2 if len(images) <= 4 else 3
- rows = (len(images) + cols - 1) // cols
-
- imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols
- imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows
-
- for idx, img in enumerate(images):
- base64Data = img.get("base64Data")
- if base64Data:
- row = idx // cols
- col = idx % cols
-
- imageBytes = base64.b64decode(base64Data)
- imageStream = io.BytesIO(imageBytes)
-
- left = Inches(0.5) + col * (imgWidth + Inches(0.5))
- top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5))
-
- slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight)
-
- except Exception as e:
- logger.error(f"Error embedding images in PPTX slide: {str(e)}")
- import traceback
- logger.error(f"Traceback: {traceback.format_exc()}")
-
def _addMarkdownInlineRuns(self, paragraph, text: str, fontSize=None, fontColor=None, fontBold=None) -> None:
"""Parse markdown inline formatting and add Runs to a pptx paragraph.
@@ -1282,6 +577,7 @@ JSON ONLY. NO OTHER TEXT."""
run.font.bold = bold
elif fontBold is not None:
run.font.bold = fontBold
+ self._applyPrimaryFont(run.font)
pos = 0
for m in _PPTX_MD_INLINE_RE.finditer(text):
@@ -1301,7 +597,7 @@ JSON ONLY. NO OTHER TEXT."""
elif m.group(7):
r = paragraph.add_run()
r.text = m.group(7)
- r.font.name = "Courier New"
+ r.font.name = self._monoFont()
if fontSize and hasattr(fontSize, 'pt'):
r.font.size = Pt(max(8, int(fontSize.pt * 0.85)))
elif fontSize:
@@ -1320,8 +616,7 @@ JSON ONLY. NO OTHER TEXT."""
"""Process InlineRun dicts into pptx text runs."""
from pptx.util import Pt
paragraph.text = ""
- us = getattr(self, '_unifiedStyle', None)
- monoFont = us["fonts"]["monospace"] if us else "Courier New"
+ monoFont = self._monoFont()
for run in runs:
runType = run.get("type", "text")
value = run.get("value", "")
@@ -1331,6 +626,7 @@ JSON ONLY. NO OTHER TEXT."""
r.font.size = fontSize
if fontColor:
r.font.color.rgb = fontColor
+ self._applyPrimaryFont(r.font)
if runType == "bold":
r.font.bold = True
elif runType == "italic":
@@ -1359,6 +655,17 @@ JSON ONLY. NO OTHER TEXT."""
if not headers:
return
+ # Per-table style override (deep-merge with global table styles)
+ perTableOverride = content.get("tableStyle", {})
+ globalTableStyle = {
+ "table_header": styles.get("table_header", {}),
+ "table_cell": styles.get("table_cell", {}),
+ "table_border": styles.get("table_border", {}),
+ "table_banding": styles.get("table_banding", {}),
+ "table_padding": styles.get("table_padding", 4),
+ }
+ mergedTableStyle = deepMerge(globalTableStyle, perTableOverride) if perTableOverride else globalTableStyle
+
num_cols = int(len(headers))
num_rows = int(len(rows) + 1)
left = Inches(0.5)
@@ -1382,107 +689,142 @@ JSON ONLY. NO OTHER TEXT."""
table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height)
table = table_shape.table
- # Set column widths - width is in EMU, divide evenly
- # python-pptx expects EMU values (914400 EMU = 1 inch)
- col_width_emu = int(width) // num_cols # Ensure integer division for EMU
+ # Set column widths evenly (EMU values)
+ col_width_emu = int(width) // num_cols
for col_idx in range(num_cols):
table.columns[col_idx].width = col_width_emu
- # Add headers with styling - OPTIMIZED: pre-calculate color/style objects
- header_style = styles.get("table_header", {})
- header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121)))
- header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255)))
- header_font_size = header_style.get("font_size", 18)
+ # Infer per-column alignments from data or explicit tableStyle
+ columnAlignments = self._inferColumnAlignments(headers, rows, mergedTableStyle)
- # Pre-calculate and cache RGB color objects
- header_bg_rgb = RGBColor(*header_bg_color)
- header_text_rgb = RGBColor(*header_text_color)
- header_font_size_pt = Pt(header_font_size)
- header_bold = header_style.get("bold", True)
+ # Add headers with styling
+ headerStyle = mergedTableStyle.get("table_header", {})
+ headerBgColor = self._getSafeColor(headerStyle.get("background", self._tableHeaderBg()))
+ headerTextColor = self._getSafeColor(headerStyle.get("text_color", (255, 255, 255)))
+ headerFontSize = headerStyle.get("font_size", 10)
- # Determine alignment once
- align = header_style.get("align", "center")
- if align == "left":
- header_alignment = PP_ALIGN.LEFT
- elif align == "right":
- header_alignment = PP_ALIGN.RIGHT
+ headerBgRgb = RGBColor(*headerBgColor)
+ headerTextRgb = RGBColor(*headerTextColor)
+ headerFontSizePt = Pt(headerFontSize)
+ headerBold = headerStyle.get("bold", True)
+
+ headerAlign = headerStyle.get("align", "center")
+ if headerAlign == "left":
+ headerAlignment = PP_ALIGN.LEFT
+ elif headerAlign == "right":
+ headerAlignment = PP_ALIGN.RIGHT
else:
- header_alignment = PP_ALIGN.CENTER
+ headerAlignment = PP_ALIGN.CENTER
for col_idx, header in enumerate(headers):
cell = table.cell(0, col_idx)
- # Clear existing text and set new text
cell.text_frame.clear()
cellRuns = self._inlineRunsForCell(header)
- header_text = "".join(r.get("value", "") for r in cellRuns)
- cell.text = header_text
+ headerText = "".join(r.get("value", "") for r in cellRuns)
+ cell.text = headerText
- # Ensure paragraph exists
if len(cell.text_frame.paragraphs) == 0:
cell.text_frame.add_paragraph()
- # Apply styling - use cached objects
cell.fill.solid()
- cell.fill.fore_color.rgb = header_bg_rgb
+ cell.fill.fore_color.rgb = headerBgRgb
para = cell.text_frame.paragraphs[0]
- para.font.bold = header_bold
- para.font.size = header_font_size_pt
- para.font.color.rgb = header_text_rgb
- para.alignment = header_alignment
+ para.font.bold = headerBold
+ para.font.size = headerFontSizePt
+ para.font.color.rgb = headerTextRgb
+ para.alignment = headerAlignment
+ self._applyPrimaryFont(para.font)
- # Ensure text is set on paragraph
if not para.text:
- para.text = header_text
+ para.text = headerText
- # Add data rows with styling - OPTIMIZED: pre-calculate color/style objects
- cell_style = styles.get("table_cell", {})
- cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255)))
- cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47)))
- cell_font_size = cell_style.get("font_size", 16)
+ # Add data rows with optional row banding
+ cellStyle = mergedTableStyle.get("table_cell", {})
+ cellTextColor = self._getSafeColor(cellStyle.get("text_color", self._paragraphColor()))
+ cellFontSize = cellStyle.get("font_size", 10)
- # Pre-calculate and cache RGB color objects
- cell_bg_rgb = RGBColor(*cell_bg_color)
- cell_text_rgb = RGBColor(*cell_text_color)
- cell_font_size_pt = Pt(cell_font_size)
- cell_bold = cell_style.get("bold", False)
+ bandingConfig = mergedTableStyle.get("table_banding", {})
+ bandingEnabled = bandingConfig.get("enabled", True)
+ bandingEven = self._getSafeColor(bandingConfig.get("even", self._tableRowBandingEven()))
+ bandingOdd = self._getSafeColor(bandingConfig.get("odd", self._tableRowBandingOdd()))
- # Determine alignment once
- align = cell_style.get("align", "left")
- if align == "center":
- cell_alignment = PP_ALIGN.CENTER
- elif align == "right":
- cell_alignment = PP_ALIGN.RIGHT
+ cellTextRgb = RGBColor(*cellTextColor)
+ cellFontSizePt = Pt(cellFontSize)
+ cellBold = cellStyle.get("bold", False)
+
+ cellAlign = cellStyle.get("align", "left")
+ if cellAlign == "center":
+ cellAlignment = PP_ALIGN.CENTER
+ elif cellAlign == "right":
+ cellAlignment = PP_ALIGN.RIGHT
else:
- cell_alignment = PP_ALIGN.LEFT
+ cellAlignment = PP_ALIGN.LEFT
for row_idx, row_data in enumerate(rows, 1):
+ dataRowIdx = row_idx - 1
+
+ if bandingEnabled:
+ bandColor = bandingEven if dataRowIdx % 2 == 0 else bandingOdd
+ else:
+ bandColor = self._getSafeColor("#FFFFFF")
+ cellBgRgb = RGBColor(*bandColor)
+
for col_idx, cell_data in enumerate(row_data[:num_cols]):
cell = table.cell(row_idx, col_idx)
- # Clear existing text and set new text
cell.text_frame.clear()
cellRuns = self._inlineRunsForCell(cell_data)
- cell_text = "".join(r.get("value", "") for r in cellRuns)
- cell.text = cell_text
+ cellText = "".join(r.get("value", "") for r in cellRuns)
+ cell.text = cellText
- # Ensure paragraph exists
if len(cell.text_frame.paragraphs) == 0:
cell.text_frame.add_paragraph()
- # Apply styling - use cached objects
cell.fill.solid()
- cell.fill.fore_color.rgb = cell_bg_rgb
+ cell.fill.fore_color.rgb = cellBgRgb
para = cell.text_frame.paragraphs[0]
- para.font.size = cell_font_size_pt
- para.font.bold = cell_bold
- para.font.color.rgb = cell_text_rgb
- para.alignment = cell_alignment
+ para.font.size = cellFontSizePt
+ para.font.bold = cellBold
+ para.font.color.rgb = cellTextRgb
+ self._applyPrimaryFont(para.font)
+
+ # Apply per-column alignment (inferred or explicit)
+ if col_idx < len(columnAlignments):
+ colAlign = columnAlignments[col_idx]
+ if colAlign == "center":
+ para.alignment = PP_ALIGN.CENTER
+ elif colAlign == "right":
+ para.alignment = PP_ALIGN.RIGHT
+ else:
+ para.alignment = PP_ALIGN.LEFT
+ else:
+ para.alignment = cellAlignment
- # Ensure text is set on paragraph
if not para.text:
- para.text = cell_text
+ para.text = cellText
except Exception as e:
logger.warning(f"Error adding table to slide: {str(e)}")
+
+ def _tableHeaderBg(self) -> tuple:
+ """Return table header background color from the internal styles."""
+ s = getattr(self, '_styles', None)
+ if s:
+ return self._getSafeColor(s.get("table_header", {}).get("background", "#f6f8fa"))
+ return self._getSafeColor("#f6f8fa")
+
+ def _tableRowBandingEven(self) -> tuple:
+ """Return even row banding color from the internal styles."""
+ s = getattr(self, '_styles', None)
+ if s:
+ return self._getSafeColor(s.get("table_banding", {}).get("even", "#f6f8fa"))
+ return self._getSafeColor("#f6f8fa")
+
+ def _tableRowBandingOdd(self) -> tuple:
+ """Return odd row banding color from the internal styles."""
+ s = getattr(self, '_styles', None)
+ if s:
+ return self._getSafeColor(s.get("table_banding", {}).get("odd", "#FFFFFF"))
+ return self._getSafeColor("#FFFFFF")
def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
"""Add bullet list to slide text frame with consistent formatting."""
@@ -1498,9 +840,12 @@ JSON ONLY. NO OTHER TEXT."""
if not items:
return
- listStyle = styles.get("paragraph", {})
- fontSize = Pt(max(10, int(listStyle.get("font_size", 14) * font_size_multiplier)))
- fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47))))
+ bulletStyle = styles.get("bullet_list", {})
+ bulletChar = bulletStyle.get("bullet_char", "\u2022")
+ baseFontSize = bulletStyle.get("font_size", 14)
+ fontSize = Pt(max(10, int(baseFontSize * font_size_multiplier)))
+ fontColor = RGBColor(*self._getSafeColor(bulletStyle.get("color", styles.get("paragraph", {}).get("color", self._paragraphColor()))))
+ indentPt = bulletStyle.get("indent", 18)
for item in items:
runs = self._inlineRunsForListItem(item)
@@ -1511,15 +856,18 @@ JSON ONLY. NO OTHER TEXT."""
p.alignment = PP_ALIGN.LEFT
p.space_before = Pt(2)
p.space_after = Pt(2)
+ if hasattr(p, 'paragraph_format'):
+ p.paragraph_format.left_indent = Pt(indentPt)
+ bulletPrefix = f"{bulletChar} "
if isNewFormat:
- bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs
+ bulletRuns = [{"type": "text", "value": bulletPrefix}] + runs
self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor)
else:
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
if not itemText or not itemText.strip():
continue
- self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
+ self._addMarkdownInlineRuns(p, f"{bulletPrefix}{itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
# Subitems (only for dict-style items)
if isinstance(item, dict):
@@ -1531,14 +879,16 @@ JSON ONLY. NO OTHER TEXT."""
sp.alignment = PP_ALIGN.LEFT
sp.space_before = Pt(1)
sp.space_after = Pt(1)
+ if hasattr(sp, 'paragraph_format'):
+ sp.paragraph_format.left_indent = Pt(indentPt * 2)
if isSubNew:
- subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns
+ subBulletRuns = [{"type": "text", "value": "\u2013 "}] + subRuns
self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor)
else:
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
if not subText:
continue
- self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
+ self._addMarkdownInlineRuns(sp, f"\u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
except Exception as e:
logger.warning(f"Error adding bullet list to slide: {str(e)}")
@@ -1549,7 +899,6 @@ JSON ONLY. NO OTHER TEXT."""
from pptx.util import Pt
from pptx.dml.color import RGBColor
- # Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return
@@ -1561,23 +910,17 @@ JSON ONLY. NO OTHER TEXT."""
p = text_frame.add_paragraph()
p.level = 0
- heading_style = styles.get("heading", {})
- if level == 1:
- base_font_size = heading_style.get("font_size", 28)
- elif level == 2:
- base_font_size = heading_style.get("font_size", 22)
- elif level == 3:
- base_font_size = heading_style.get("font_size", 18)
- else:
- base_font_size = heading_style.get("font_size", 16)
+ headingKey = f"heading{min(level, 4)}"
+ headingStyle = styles.get(headingKey, styles.get("heading1", {}))
+ baseFontSize = headingStyle.get("font_size", 22)
- calculated_size = max(12, int(base_font_size * font_size_multiplier))
- fSize = Pt(calculated_size)
- fColor = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121))))
- self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=True)
- # Add spacing before and after headings
- p.space_before = Pt(12 if level == 1 else 8) # More space before H1
- p.space_after = Pt(6) # Space after heading
+ calculatedSize = max(12, int(baseFontSize * font_size_multiplier))
+ fSize = Pt(calculatedSize)
+ fColor = RGBColor(*self._getSafeColor(headingStyle.get("color", styles.get("colors", {}).get("primary", self._primaryColor()))))
+ fBold = headingStyle.get("bold", True)
+ self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold)
+ p.space_before = Pt(headingStyle.get("space_before", 12))
+ p.space_after = Pt(headingStyle.get("space_after", 6))
except Exception as e:
logger.warning(f"Error adding heading to slide: {str(e)}")
@@ -1589,7 +932,6 @@ JSON ONLY. NO OTHER TEXT."""
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
- # Extract from nested content structure
content = element.get("content", {})
if isinstance(content, dict):
inlineRuns = self._inlineRunsFromContent(content)
@@ -1620,7 +962,7 @@ JSON ONLY. NO OTHER TEXT."""
base_font_size = paragraph_style.get("font_size", 14)
calculated_size = max(10, int(base_font_size * font_size_multiplier))
fSize = Pt(calculated_size)
- fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
+ fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", self._paragraphColor())))
fBold = paragraph_style.get("bold", False)
if hasInlineRuns:
@@ -1643,13 +985,12 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.warning(f"Error adding paragraph to slide: {str(e)}")
- def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None:
- """Add code block to slide text frame."""
+ def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame=None, font_size_multiplier: float = 1.0, top=None) -> None:
+ """Add code block to slide as a separate shape with background fill and border."""
try:
- from pptx.util import Pt
+ from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
- # Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return
@@ -1657,135 +998,184 @@ JSON ONLY. NO OTHER TEXT."""
code = content.get("code", "")
language = content.get("language", "")
- if code:
- code_style = styles.get("code_block", {})
- code_font = code_style.get("font", "Courier New")
- base_code_font_size = code_style.get("font_size", 9)
- code_font_size = max(6, int(base_code_font_size * font_size_multiplier)) # Minimum 6pt for code
- code_color = self._getSafeColor(code_style.get("color", (47, 47, 47)))
-
- p = text_frame.add_paragraph()
- if language:
- p.text = f"Code ({language}):"
- p.font.bold = True
- p.font.size = Pt(code_font_size)
- p = text_frame.add_paragraph()
-
- p.text = code
- p.font.name = code_font
- p.font.size = Pt(code_font_size)
- p.font.color.rgb = RGBColor(*code_color)
+ if not code:
+ return
+
+ codeStyle = styles.get("code_block", {})
+ codeFont = codeStyle.get("font", self._monoFont())
+ baseCodeFontSize = codeStyle.get("font_size", 9)
+ codeFontSize = max(6, int(baseCodeFontSize * font_size_multiplier))
+ codeColor = self._getSafeColor(codeStyle.get("color", styles.get("paragraph", {}).get("color", self._paragraphColor())))
+ bgColor = self._getSafeColor(codeStyle.get("background", "#f6f8fa"))
+ borderColor = self._getSafeColor(codeStyle.get("border_color", "#e1e4e8"))
+
+ if hasattr(self, '_currentPresentation'):
+ prs = self._currentPresentation
+ else:
+ prs = slide.presentation
+
+ if top is not None:
+ codeTop = top
+ else:
+ maxBottom = Inches(1.5)
+ for shape in slide.shapes:
+ shapeBottom = shape.top + shape.height
+ if shapeBottom > maxBottom:
+ maxBottom = shapeBottom
+ codeTop = maxBottom + Inches(0.15)
+ codeLeft = Inches(0.5)
+ codeWidth = prs.slide_width - Inches(1)
+
+ lineCount = len(code.split('\n'))
+ estimatedHeight = Pt(codeFontSize * 1.4) * lineCount + Pt(16)
+ if language:
+ estimatedHeight += Pt(codeFontSize * 1.6)
+ maxHeight = prs.slide_height - codeTop - Inches(0.3)
+ codeHeight = min(estimatedHeight, maxHeight)
+
+ codeBox = slide.shapes.add_textbox(codeLeft, codeTop, codeWidth, codeHeight)
+ codeTf = codeBox.text_frame
+ codeTf.word_wrap = True
+ codeTf.auto_size = None
+
+ codeBox.fill.solid()
+ codeBox.fill.fore_color.rgb = RGBColor(*bgColor)
+ codeBox.line.color.rgb = RGBColor(*borderColor)
+ codeBox.line.width = Pt(0.75)
+
+ if language:
+ p = codeTf.paragraphs[0]
+ p.text = f"Code ({language}):"
+ p.font.bold = True
+ p.font.size = Pt(codeFontSize)
+ self._applyPrimaryFont(p.font)
+ p = codeTf.add_paragraph()
+ else:
+ p = codeTf.paragraphs[0]
+
+ p.text = code
+ p.font.name = codeFont
+ p.font.size = Pt(codeFontSize)
+ p.font.color.rgb = RGBColor(*codeColor)
except Exception as e:
logger.warning(f"Error adding code block to slide: {str(e)}")
def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""
- # datetime and UTC are already imported at module level
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
+ def _estimateElementHeightPt(self, element: Dict[str, Any], section: Dict[str, Any], styles: Dict[str, Any]) -> float:
+ """Estimate the rendered height of a text element in points."""
+ elementType = element.get("type", "") or section.get("content_type", "paragraph")
+ content = element.get("content", {})
+ charsPerLine = 65
+
+ if elementType == "heading":
+ level = content.get("level", 1) if isinstance(content, dict) else 1
+ headingKey = f"heading{min(level, 4)}"
+ hs = styles.get(headingKey, styles.get("heading1", {}))
+ return hs.get("font_size", 22) * 1.3 + hs.get("space_before", 12) + hs.get("space_after", 6)
+
+ if elementType in ("bullet_list", "list"):
+ items = content.get("items", []) if isinstance(content, dict) else []
+ total = len(items)
+ for item in items:
+ if isinstance(item, dict):
+ total += len(item.get("subitems", []))
+ fontSize = styles.get("bullet_list", {}).get("font_size", 14)
+ return max(total, 1) * fontSize * 1.5 + 4
+
+ text = ""
+ if isinstance(content, dict):
+ text = content.get("text", "")
+ elif isinstance(content, str):
+ text = content
+ pFontSize = styles.get("paragraph", {}).get("font_size", 14)
+ lines = max(1, len(text) / charsPerLine) if text else 1
+ return lines * pFontSize * 1.5 + 12
+
def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None:
- """Render all sections sequentially: text/bullets/headings into a shared
- textbox, tables and images as separate shapes placed below."""
+ """Render slide content with sequential Y-tracking.
+ Text elements are batched into sized textboxes; tables and code blocks
+ are placed as separate shapes at the tracked Y position."""
try:
from pptx.util import Inches, Pt
margin = Inches(0.5)
- contentTop = Inches(1.3)
+ currentY = Inches(1.3)
availableWidth = prs.slide_width - Inches(1)
- availableHeight = prs.slide_height - contentTop - Inches(0.3)
+ slideBottom = prs.slide_height - Inches(0.3)
+ spacing = Inches(0.1)
- # Create a single textbox for all non-table, non-image content
- textbox = slide.shapes.add_textbox(margin, contentTop, availableWidth, availableHeight)
- textFrame = textbox.text_frame
- textFrame.word_wrap = True
- textFrame.auto_size = None
+ pendingTextElements: List[tuple] = []
+
+ def _flushText():
+ nonlocal currentY, pendingTextElements
+ if not pendingTextElements:
+ return
+ totalPt = sum(self._estimateElementHeightPt(el, sec, styles) for sec, el in pendingTextElements)
+ estimatedHeight = Pt(totalPt)
+ remaining = slideBottom - currentY
+ boxHeight = min(estimatedHeight, remaining) if remaining > Inches(0.2) else estimatedHeight
+
+ textbox = slide.shapes.add_textbox(margin, currentY, availableWidth, boxHeight)
+ tf = textbox.text_frame
+ tf.word_wrap = True
+ tf.auto_size = None
+
+ for sec, el in pendingTextElements:
+ syntheticSection = {"content_type": sec.get("content_type", "paragraph"), "elements": [el]}
+ self._renderSectionToTextFrame(slide, syntheticSection, styles, tf, font_size_multiplier=1.0)
+
+ currentY += boxHeight + spacing
+ pendingTextElements = []
for section in slide_sections:
- self._renderSectionToTextFrame(slide, section, styles, textFrame, font_size_multiplier=1.0)
+ sectionType = section.get("content_type", "paragraph")
+ elements = section.get("elements", [])
+ if not elements:
+ continue
+
+ for element in elements:
+ if not isinstance(element, dict):
+ continue
+ elementType = element.get("type", "") or sectionType
+
+ if elementType == "image":
+ continue
+ if elementType == "paragraph" and self._isHorizontalRule(element):
+ continue
+
+ if elementType == "table":
+ _flushText()
+ self._addTableToSlide(slide, element, styles, top=currentY, max_width=availableWidth)
+ content = element.get("content", {})
+ numRows = (len(content.get("rows", [])) + 1) if isinstance(content, dict) else 1
+ currentY += Inches(0.4) * numRows + spacing
+
+ elif elementType in ("code_block", "code"):
+ _flushText()
+ self._addCodeBlockToSlide(slide, element, styles, top=currentY, font_size_multiplier=1.0)
+ content = element.get("content", {})
+ code = content.get("code", "") if isinstance(content, dict) else ""
+ lineCount = max(1, len(code.split('\n')))
+ codeFontSize = styles.get("code_block", {}).get("font_size", 9)
+ currentY += Pt(codeFontSize * 1.4) * lineCount + Pt(16) + spacing
+
+ else:
+ pendingTextElements.append((section, element))
+
+ _flushText()
- # Render standalone images that were passed alongside sections
if slide_images:
- self._addImagesToSlideInFrame(slide, slide_images, styles, margin, contentTop, availableWidth, availableHeight)
+ remainingHeight = slideBottom - currentY
+ if remainingHeight > Inches(0.5):
+ self._addImagesToSlideInFrame(slide, slide_images, styles, margin, currentY, availableWidth, remainingHeight)
except Exception as e:
logger.error(f"Error rendering slide content: {str(e)}")
- def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None:
- """Render text sections (paragraphs, lists, headings) in a text frame."""
- try:
- from pptx.util import Inches, Pt
- from pptx.enum.text import PP_ALIGN
- from pptx.dml.color import RGBColor
-
- # Calculate total text length for adaptive font sizing
- total_text_length = 0
- if adaptiveFontSize:
- for section in text_sections:
- elements = section.get("elements", [])
- for element in elements:
- if isinstance(element, dict):
- element_type = element.get("type", "")
- if element_type in ["paragraph", "bullet_list", "list", "heading"]:
- content = element.get("content", "")
- if isinstance(content, dict):
- if "text" in content:
- total_text_length += len(str(content["text"]))
- elif "items" in content:
- for item in content.get("items", []):
- total_text_length += len(str(item))
- elif isinstance(content, str):
- total_text_length += len(content)
-
- # Calculate adaptive font size multiplier based on text length and frame size
- font_size_multiplier = 1.0
- if adaptiveFontSize and total_text_length > 0:
- try:
- # More accurate calculation: estimate characters per line based on average character width
- # Average character width is approximately 0.6 * font_size in points
- # For 14pt font, average char width ≈ 8.4pt
- avg_char_width_pt = 8.4 # Approximate for 14pt font
- chars_per_line = max(1, int(float(width) / avg_char_width_pt))
-
- # Estimate lines needed
- lines_needed = total_text_length / max(chars_per_line, 1)
-
- # Available lines based on height (line height ≈ 1.2 * font_size)
- line_height_pt = 16.8 # Approximate for 14pt font with 1.2 spacing
- available_lines = max(1, int(float(height) / line_height_pt))
-
- if available_lines > 0 and lines_needed > available_lines:
- # More aggressive scaling for long texts
- # Calculate exact scale needed, then add 10% buffer
- scale_needed = available_lines / lines_needed
- font_size_multiplier = scale_needed * 0.9 # 10% buffer
- # Allow scaling down to 50% for very long texts (minimum readable)
- font_size_multiplier = max(0.5, min(1.0, font_size_multiplier))
- elif lines_needed <= available_lines * 0.7:
- # If text is much shorter than available space, can use slightly larger font
- font_size_multiplier = min(1.1, (available_lines / lines_needed) * 0.8)
- except (ZeroDivisionError, ValueError, TypeError) as calc_error:
- logger.debug(f"Font size calculation error: {str(calc_error)}")
- # Fallback to default if calculation fails
- font_size_multiplier = 1.0
-
- textbox = slide.shapes.add_textbox(left, top, width, height)
- text_frame = textbox.text_frame
- text_frame.word_wrap = True
- text_frame.auto_size = None # Disable auto-size for fixed frame
- # Ensure text frame can display bullets
- text_frame.margin_left = Pt(0)
- text_frame.margin_right = Pt(0)
- text_frame.margin_top = Pt(0)
- text_frame.margin_bottom = Pt(0)
-
- # Pass font size multiplier to rendering methods
- for section in text_sections:
- self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier)
-
- except Exception as e:
- logger.warning(f"Error rendering text sections in frame: {str(e)}")
-
@staticmethod
def _isHorizontalRule(element: Dict[str, Any]) -> bool:
"""Detect markdown horizontal rules (---, ***, ___) that should be skipped on slides."""
@@ -1829,14 +1219,14 @@ JSON ONLY. NO OTHER TEXT."""
elif elementType == "paragraph":
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier)
elif elementType in ("code_block", "code"):
- self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier)
+ self._addCodeBlockToSlide(slide, element, styles, font_size_multiplier=font_size_multiplier)
elif elementType == "extracted_text":
content = element.get("content", "")
if content:
p = text_frame.add_paragraph()
pStyle = styles.get("paragraph", {})
fSize = Pt(max(10, int(pStyle.get("font_size", 14) * font_size_multiplier)))
- fColor = RGBColor(*self._getSafeColor(pStyle.get("color", (47, 47, 47))))
+ fColor = RGBColor(*self._getSafeColor(pStyle.get("color", self._paragraphColor())))
self._addMarkdownInlineRuns(p, content, fontSize=fSize, fontColor=fColor)
p.alignment = PP_ALIGN.LEFT
elif elementType == "reference":
@@ -1845,6 +1235,7 @@ JSON ONLY. NO OTHER TEXT."""
p.text = f"[Reference: {label}]"
p.font.italic = True
p.alignment = PP_ALIGN.LEFT
+ self._applyPrimaryFont(p.font)
else:
self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier)
@@ -1865,9 +1256,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.info(f"Rendering {len(images)} image(s) in frame at ({left}, {top}), size ({width}, {height})")
- # Calculate image dimensions within frame
if len(images) == 1:
- # Single image: fit to frame
img = images[0]
base64Data = img.get("base64Data")
@@ -1878,13 +1267,10 @@ JSON ONLY. NO OTHER TEXT."""
# Clean base64 data (remove data URI prefix if present)
if isinstance(base64Data, str):
if base64Data.startswith("data:image/"):
- # Extract base64 from data URI
base64Data = base64Data.split(",", 1)[1]
- # Remove any whitespace
base64Data = base64Data.strip()
try:
- # Decode base64
imageBytes = base64.b64decode(base64Data, validate=True)
if len(imageBytes) == 0:
logger.error("Decoded image bytes are empty")
@@ -1892,15 +1278,13 @@ JSON ONLY. NO OTHER TEXT."""
imageStream = io.BytesIO(imageBytes)
- # Get image dimensions using PIL
imgWidth, imgHeight = None, None
try:
from PIL import Image as PILImage
pilImage = PILImage.open(imageStream)
imgWidth, imgHeight = pilImage.size
- imageStream.seek(0) # Reset stream for PowerPoint
+ imageStream.seek(0)
- # Validate image dimensions - ensure they're reasonable
if imgWidth <= 1 or imgHeight <= 1:
logger.warning(f"Image has invalid dimensions: {imgWidth}x{imgHeight}, using default size")
imgWidth, imgHeight = 800, 600
@@ -1909,43 +1293,34 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Image dimensions very small: {imgWidth}x{imgHeight}, may appear tiny")
except ImportError:
logger.warning("PIL not available, using default image size")
- imgWidth, imgHeight = 800, 600 # Default dimensions
+ imgWidth, imgHeight = 800, 600
except Exception as pil_error:
logger.warning(f"Error getting image dimensions with PIL: {str(pil_error)}, using default size")
imgWidth, imgHeight = 800, 600
imageStream.seek(0)
- # Ensure we have valid dimensions
if not imgWidth or not imgHeight or imgWidth <= 1 or imgHeight <= 1:
logger.warning("Invalid image dimensions, using default 800x600")
imgWidth, imgHeight = 800, 600
- # Scale to fit frame while maintaining aspect ratio
- # width and height parameters are already in Inches (from pptx.util.Inches)
- # Convert PIL pixel dimensions to Inches (assuming 96 DPI for PIL images)
imgWidthInches = Inches(imgWidth / 96.0)
imgHeightInches = Inches(imgHeight / 96.0)
- # Calculate scale to fit within frame
- # Inches objects support division, result is a float
try:
scale_width = width / imgWidthInches if imgWidthInches > 0 else 1.0
scale_height = height / imgHeightInches if imgHeightInches > 0 else 1.0
- scale = min(scale_width, scale_height, 1.0) # Don't scale up, only down
+ scale = min(scale_width, scale_height, 1.0)
finalWidth = imgWidthInches * scale
finalHeight = imgHeightInches * scale
- # Ensure minimum size (at least 1 inch) to prevent tiny rendering
minSize = Inches(1)
if finalWidth < minSize or finalHeight < minSize:
- # Use minimum size while maintaining aspect ratio
min_scale = max(minSize / imgWidthInches if imgWidthInches > 0 else 1.0,
minSize / imgHeightInches if imgHeightInches > 0 else 1.0)
finalWidth = max(minSize, imgWidthInches * min_scale)
finalHeight = max(minSize, imgHeightInches * min_scale)
- # Ensure we don't exceed frame bounds
if finalWidth > width:
finalWidth = width
finalHeight = imgHeightInches * (width / imgWidthInches) if imgWidthInches > 0 else finalHeight
@@ -1954,14 +1329,13 @@ JSON ONLY. NO OTHER TEXT."""
finalWidth = imgWidthInches * (height / imgHeightInches) if imgHeightInches > 0 else finalWidth
except (ZeroDivisionError, TypeError, AttributeError) as calc_error:
logger.warning(f"Error calculating image size: {str(calc_error)}, using frame size")
- finalWidth = width * 0.9 # Use 90% of frame width
- finalHeight = height * 0.9 # Use 90% of frame height
+ finalWidth = width * 0.9
+ finalHeight = height * 0.9
# Center in frame
frame_left = left + (width - finalWidth) / 2
frame_top = top + (height - finalHeight) / 2
- # Add image to slide
imageStream.seek(0)
slide.shapes.add_picture(imageStream, frame_left, frame_top, width=finalWidth, height=finalHeight)
logger.info(f"Successfully added image to slide at ({frame_left}, {frame_top}), size ({finalWidth}, {finalHeight})")
@@ -1969,13 +1343,23 @@ JSON ONLY. NO OTHER TEXT."""
# Add caption if available
caption = img.get("caption") or img.get("altText")
if caption and caption != "Image":
+ captionStyle = styles.get("caption", {})
captionTop = frame_top + finalHeight + Inches(0.1)
captionBox = slide.shapes.add_textbox(left, captionTop, width, Inches(0.4))
captionFrame = captionBox.text_frame
captionFrame.text = caption
- captionFrame.paragraphs[0].font.size = Pt(10)
- captionFrame.paragraphs[0].font.italic = True
- captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
+ captionFrame.paragraphs[0].font.size = Pt(captionStyle.get("font_size", 10))
+ captionFrame.paragraphs[0].font.italic = captionStyle.get("italic", True)
+ captionColor = self._getSafeColor(captionStyle.get("color", self._paragraphColor()))
+ captionFrame.paragraphs[0].font.color.rgb = RGBColor(*captionColor)
+ captionAlignVal = captionStyle.get("align", "center")
+ if captionAlignVal == "left":
+ captionFrame.paragraphs[0].alignment = PP_ALIGN.LEFT
+ elif captionAlignVal == "right":
+ captionFrame.paragraphs[0].alignment = PP_ALIGN.RIGHT
+ else:
+ captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER
+ self._applyPrimaryFont(captionFrame.paragraphs[0].font)
except base64.binascii.Error as b64_error:
logger.error(f"Invalid base64 data: {str(b64_error)}")
except Exception as img_error:
@@ -1993,7 +1377,6 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Image {idx} has no base64Data")
continue
- # Clean base64 data
if isinstance(base64Data, str):
if base64Data.startswith("data:image/"):
base64Data = base64Data.split(",", 1)[1]
@@ -2012,22 +1395,18 @@ JSON ONLY. NO OTHER TEXT."""
imageStream = io.BytesIO(imageBytes)
- # Try to get dimensions for better scaling
try:
from PIL import Image as PILImage
pilImage = PILImage.open(imageStream)
imgW, imgH = pilImage.size
- # Scale to fit grid cell while maintaining aspect ratio
scale = min(imgWidth / (imgW * (72.0 / 96.0)), imgHeight / (imgH * (72.0 / 96.0)), 1.0)
finalW = (imgW * (72.0 / 96.0)) * scale
finalH = (imgH * (72.0 / 96.0)) * scale
- # Center in grid cell
cell_left = img_left + (imgWidth - finalW) / 2
cell_top = img_top + (imgHeight - finalH) / 2
imageStream.seek(0)
slide.shapes.add_picture(imageStream, cell_left, cell_top, width=finalW, height=finalH)
except (ImportError, Exception):
- # Fallback: use grid cell size directly
imageStream.seek(0)
slide.shapes.add_picture(imageStream, img_left, img_top, width=imgWidth, height=imgHeight)
diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py
index 3c6fdd5e..0b544dd8 100644
--- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py
@@ -6,6 +6,7 @@ Excel renderer for report generation using openpyxl.
from .documentRendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
+from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge
from typing import Dict, Any, List, Optional
import io
import base64
@@ -128,176 +129,6 @@ class RendererXlsx(BaseRenderer):
)
]
- def _generateExcel(self, content: str, title: str) -> str:
- """Generate Excel content using openpyxl."""
- try:
- # Create workbook
- wb = Workbook()
-
- # Remove default sheet
- wb.remove(wb.active)
-
- # Create sheets
- summarySheet = wb.create_sheet("Summary", 0)
- dataSheet = wb.create_sheet("Data", 1)
- analysisSheet = wb.create_sheet("Analysis", 2)
-
- # Add content to sheets
- self._populateSummarySheet(summarySheet, title, wb)
- self._populateDataSheet(dataSheet, content)
- self._populateAnalysisSheet(analysisSheet, content)
-
- # Ensure workbook has at least one sheet (Excel requirement)
- if len(wb.worksheets) == 0:
- wb.create_sheet("Sheet1")
-
- # Save to buffer with error handling
- buffer = io.BytesIO()
- try:
- wb.save(buffer)
- buffer.seek(0)
- except Exception as save_error:
- self.logger.error(f"Error saving Excel workbook: {str(save_error)}")
- # Try to fix common issues and retry
- try:
- # Remove any invalid sheet names or empty sheets
- for sheet in list(wb.worksheets):
- if not sheet.title or len(sheet.title.strip()) == 0:
- wb.remove(sheet)
- # Ensure at least one sheet exists
- if len(wb.worksheets) == 0:
- wb.create_sheet("Sheet1")
- # Retry save
- buffer = io.BytesIO()
- wb.save(buffer)
- buffer.seek(0)
- except Exception as retry_error:
- self.logger.error(f"Retry save also failed: {str(retry_error)}")
- raise Exception(f"Failed to save Excel workbook: {str(save_error)}")
-
- # Convert to base64
- excelBytes = buffer.getvalue()
- excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
-
- return excelBase64
-
- except Exception as e:
- self.logger.error(f"Error generating Excel: {str(e)}")
- raise
-
- def _populateSummarySheet(self, sheet, title: str, wb: Workbook = None):
- """Populate the summary sheet."""
- try:
- # Title
- sheet['A1'] = title
- sheet['A1'].font = Font(size=16, bold=True)
- sheet['A1'].alignment = Alignment(horizontal='left')
-
- # Generation info
- sheet['A3'] = "Generated:"
- sheet['B3'] = self._formatTimestamp()
- sheet['A4'] = "Status:"
- sheet['B4'] = "Generated Successfully"
-
- # Key metrics placeholder
- sheet['A6'] = "Key Metrics:"
- sheet['A6'].font = Font(bold=True)
- sheet['A7'] = "Total Items:"
- # Only add formula if Data sheet exists (check workbook sheets)
- if wb and "Data" in [s.title for s in wb.worksheets]:
- sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
- else:
- sheet['B7'] = "N/A" # Data sheet not available
-
- # Auto-adjust column widths
- sheet.column_dimensions['A'].width = 20
- sheet.column_dimensions['B'].width = 30
-
- except Exception as e:
- self.logger.warning(f"Could not populate summary sheet: {str(e)}")
-
- def _populateDataSheet(self, sheet, content: str):
- """Populate the data sheet."""
- try:
- # Headers
- headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
- for col, header in enumerate(headers, 1):
- cell = sheet.cell(row=1, column=col, value=header)
- cell.font = Font(bold=True)
- cell.fill = PatternFill(start_color="FFCCCCCC", end_color="FFCCCCCC", fill_type="solid")
-
- # Process content
- lines = content.split('\n')
- row = 2
-
- for line in lines:
- line = line.strip()
- if not line:
- continue
-
- # Check for table data (lines with |)
- if '|' in line:
- cells = [cell.strip() for cell in line.split('|') if cell.strip()]
- for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
- sheet.cell(row=row, column=col, value=cellData)
- row += 1
- else:
- # Regular content
- sheet.cell(row=row, column=1, value=line)
- row += 1
-
- # Auto-adjust column widths
- for col in range(1, 6):
- sheet.column_dimensions[get_column_letter(col)].width = 20
-
- except Exception as e:
- self.logger.warning(f"Could not populate data sheet: {str(e)}")
-
- def _populateAnalysisSheet(self, sheet, content: str):
- """Populate the analysis sheet."""
- try:
- # Title
- sheet['A1'] = "Analysis & Insights"
- sheet['A1'].font = Font(size=14, bold=True)
-
- # Content analysis
- lines = content.split('\n')
- row = 3
-
- sheet['A3'] = "Content Analysis:"
- sheet['A3'].font = Font(bold=True)
- row += 1
-
- # Count different types of content
- tableLines = sum(1 for line in lines if '|' in line)
- listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
- textLines = len(lines) - tableLines - listLines
-
- sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
- row += 1
- sheet[f'A{row}'] = f"Table Rows: {tableLines}"
- row += 1
- sheet[f'A{row}'] = f"List Items: {listLines}"
- row += 1
- sheet[f'A{row}'] = f"Text Lines: {textLines}"
- row += 2
-
- # Recommendations
- sheet[f'A{row}'] = "Recommendations:"
- sheet[f'A{row}'].font = Font(bold=True)
- row += 1
- sheet[f'A{row}'] = "1. Review data accuracy"
- row += 1
- sheet[f'A{row}'] = "2. Consider additional analysis"
- row += 1
- sheet[f'A{row}'] = "3. Update regularly"
-
- # Auto-adjust column width
- sheet.column_dimensions['A'].width = 30
-
- except Exception as e:
- self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
-
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
@@ -308,12 +139,9 @@ class RendererXlsx(BaseRenderer):
# Store unified style for use by inline-run helpers
self._unifiedStyle = style
- # Get style set: prefer unified style, fall back to legacy approach
- if style:
- styles = self._convertUnifiedStyleToInternal(style)
- styles = self._convertColorsFormat(styles)
- else:
- styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
+ # Convert unified style to internal format
+ styles = self._convertUnifiedStyleToInternal(style)
+ styles = self._convertColorsFormat(styles)
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(jsonContent):
@@ -380,109 +208,6 @@ class RendererXlsx(BaseRenderer):
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}")
- async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]:
- """Get style set - use styles from document generation metadata if available,
- otherwise enhance default styles with AI if userPrompt provided.
-
- WICHTIG: In a dynamic scalable AI system, styling should come from document generation,
- not be generated separately by renderers. Only fall back to AI if styles not provided.
-
- Args:
- extractedContent: Document content with metadata (may contain styles)
- userPrompt: User's prompt (AI will detect style instructions in any language)
- aiService: AI service (used only if styles not in metadata and userPrompt provided)
- templateName: Name of template style set (None = default)
-
- Returns:
- Dict with style definitions for all document styles
- """
- # Get default style set
- defaultStyleSet = self._getDefaultStyleSet()
-
- # FIRST: Check if styles are provided in document generation metadata (preferred approach)
- if extractedContent:
- metadata = extractedContent.get("metadata", {})
- if isinstance(metadata, dict):
- styles = metadata.get("styles")
- if styles and isinstance(styles, dict):
- self.logger.debug("Using styles from document generation metadata")
- enhancedStyleSet = self._convertColorsFormat(styles)
- return self._validateStylesContrast(enhancedStyleSet)
-
- # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata)
- if userPrompt and aiService:
- self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...")
- enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService)
- # Convert colors to Excel format after getting styles
- enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet)
- return self._validateStylesContrast(enhancedStyleSet)
- else:
- # Use default styles only
- return defaultStyleSet
-
- async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]:
- """Enhance default styles with AI based on user prompt."""
- try:
- style_template = self._createAiStyleTemplate("xlsx", userPrompt, defaultStyleSet)
- enhanced_styles = await self._getAiStylesWithExcelColors(aiService, style_template, defaultStyleSet)
- return enhanced_styles
- except Exception as e:
- self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles")
- return defaultStyleSet
-
- def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
- """Validate and fix contrast issues in AI-generated styles."""
- try:
- # Fix table header contrast
- if "table_header" in styles:
- header = styles["table_header"]
- bgColor = header.get("background", "FFFFFFFF")
- textColor = header.get("text_color", "FF000000")
-
- # Normalize colors (remove # if present, ensure aRGB format)
- bgColor = self._normalizeColor(bgColor)
- textColor = self._normalizeColor(textColor)
-
- # If both are white or both are dark, fix it
- if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF":
- header["background"] = "FF4F4F4F"
- header["text_color"] = "FFFFFFFF"
- elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000":
- header["background"] = "FF4F4F4F"
- header["text_color"] = "FFFFFFFF"
- else:
- # Ensure colors are in correct format
- header["background"] = bgColor
- header["text_color"] = textColor
-
- # Fix table cell contrast
- if "table_cell" in styles:
- cell = styles["table_cell"]
- bgColor = cell.get("background", "FFFFFFFF")
- textColor = cell.get("text_color", "FF000000")
-
- # Normalize colors (remove # if present, ensure aRGB format)
- bgColor = self._normalizeColor(bgColor)
- textColor = self._normalizeColor(textColor)
-
- # If both are white or both are dark, fix it
- if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF":
- cell["background"] = "FFFFFFFF"
- cell["text_color"] = "FF2F2F2F"
- elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000":
- cell["background"] = "FFFFFFFF"
- cell["text_color"] = "FF2F2F2F"
- else:
- # Ensure colors are in correct format
- cell["background"] = bgColor
- cell["text_color"] = textColor
-
- return styles
-
- except Exception as e:
- self.logger.warning(f"Style validation failed: {str(e)}")
- return self._getDefaultStyleSet()
-
def _normalizeColor(self, colorValue: str) -> str:
"""Normalize color to aRGB format without # prefix."""
if not isinstance(colorValue, str):
@@ -506,77 +231,10 @@ class RendererXlsx(BaseRenderer):
# Unexpected format, return default black
return "FF000000"
- def _getDefaultStyleSet(self) -> Dict[str, Any]:
- """Default Excel style set - used when no style instructions present."""
- return {
- "title": {"font_size": 16, "color": "FF1F4E79", "bold": True, "align": "left"},
- "heading": {"font_size": 14, "color": "FF2F2F2F", "bold": True, "align": "left"},
- "table_header": {"background": "FF4F4F4F", "text_color": "FFFFFFFF", "bold": True, "align": "center"},
- "table_cell": {"background": "FFFFFFFF", "text_color": "FF2F2F2F", "bold": False, "align": "left"},
- "bullet_list": {"font_size": 11, "color": "FF2F2F2F", "indent": 2},
- "paragraph": {"font_size": 11, "color": "FF2F2F2F", "bold": False, "align": "left"},
- "code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
- }
-
def _renderInlineRuns(self, runs: list) -> str:
"""Flatten inline runs to plain text for Excel cells."""
return "".join(r.get("value", "") for r in runs)
- async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
- """Get AI styles with proper Excel color conversion."""
- if not aiService:
- return defaultStyles
-
- try:
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
-
- requestOptions = AiCallOptions()
- requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
-
- request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
- response = await aiService.callAi(request)
-
- import json
- import re
-
- # Clean and parse JSON
- result = response.content.strip() if response and response.content else ""
-
- # Check if result is empty
- if not result:
- self.logger.warning("AI styling returned empty response, using defaults")
- return defaultStyles
-
- # Extract JSON from markdown if present
- json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
- if json_match:
- result = json_match.group(1).strip()
- self.services.utils.debugLogToFile(f"EXTRACTED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
- elif result.startswith('```json'):
- result = re.sub(r'^```json\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
- self.services.utils.debugLogToFile(f"CLEANED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
- elif result.startswith('```'):
- result = re.sub(r'^```\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
- self.services.utils.debugLogToFile(f"CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
-
- # Try to parse JSON
- try:
- styles = json.loads(result)
- except json.JSONDecodeError as json_error:
- self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
- return defaultStyles
-
- # Convert colors to Excel aRGB format
- styles = self._convertColorsFormat(styles)
-
- return styles
-
- except Exception as e:
- self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
- return defaultStyles
-
def _getSafeAlignment(self, alignValue: Any) -> str:
"""Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'."""
if not alignValue:
@@ -627,15 +285,27 @@ class RendererXlsx(BaseRenderer):
except Exception:
return default
+ @staticmethod
+ def _looksLikeColor(value: str) -> bool:
+ """Return True if *value* looks like a hex color (e.g. ``#e1e4e8`` or ``FF24292E``)."""
+ raw = value.lstrip('#')
+ if len(raw) not in (3, 6, 8):
+ return False
+ return all(c in '0123456789abcdefABCDEF' for c in raw)
+
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
- """Convert hex colors to aRGB format for Excel compatibility (without # prefix)."""
+ """Convert hex colors to aRGB format for Excel compatibility (without # prefix).
+
+ Only touches values that actually look like hex colors so that non-color
+ strings (font names, border style keywords, bullet chars, etc.) are
+ preserved intact.
+ """
try:
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
for styleName, styleConfig in styles.items():
if isinstance(styleConfig, dict):
for prop, value in styleConfig.items():
- if isinstance(value, str):
- # Normalize color to aRGB format without # prefix
+ if isinstance(value, str) and self._looksLikeColor(value):
styles[styleName][prop] = self._normalizeColor(value)
return styles
except Exception as e:
@@ -789,199 +459,6 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
- def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
- """Populate a sheet with a single table section."""
- try:
- # Sheet title
- sheet['A1'] = sheetTitle
- title_style = styles.get("title", {})
- sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
- sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left")))
-
- # Get table data from elements (canonical JSON format)
- elements = section.get("elements", [])
- if elements and isinstance(elements, list) and len(elements) > 0:
- table_element = elements[0]
- # Extract from nested content structure
- content = table_element.get("content", {})
- if not isinstance(content, dict):
- headers = []
- rows = []
- else:
- headers = content.get("headers") or []
- rows = content.get("rows") or []
- # Ensure headers and rows are lists
- if not isinstance(headers, list):
- headers = []
- if not isinstance(rows, list):
- rows = []
- else:
- headers = []
- rows = []
-
- if not headers and not rows:
- sheet['A3'] = "No table data available"
- return
-
- # Add headers
- header_style = styles.get("table_header", {})
- for col, header in enumerate(headers, 1):
- cell = sheet.cell(row=3, column=col, value=header)
- if header_style.get("bold"):
- cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
- if header_style.get("background"):
- cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
-
- # Add rows - handle both array format and cells object format
- cell_style = styles.get("table_cell", {})
- header_count = len(headers)
- for row_idx, row_data in enumerate(rows, 4):
- # Handle different row formats
- if isinstance(row_data, list):
- # Array format: [value1, value2, ...]
- cell_values = row_data
- elif isinstance(row_data, dict) and "cells" in row_data:
- # Cells object format: {"cells": [{"value": ...}, ...]}
- cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
- else:
- # Unknown format, skip
- continue
-
- # Validate row column count matches headers - pad or truncate if needed
- if len(cell_values) < header_count:
- # Pad with empty strings if row has fewer columns
- cell_values.extend([""] * (header_count - len(cell_values)))
- elif len(cell_values) > header_count:
- # Truncate if row has more columns than headers
- cell_values = cell_values[:header_count]
-
- for col_idx, cell_value in enumerate(cell_values, 1):
- # Extract value if it's a dict with "value" key
- if isinstance(cell_value, dict):
- actual_value = cell_value.get("value", "")
- else:
- actual_value = cell_value
-
- cell = sheet.cell(row=row_idx, column=col_idx, value=actual_value)
- if cell_style.get("text_color"):
- cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
-
- # Auto-adjust column widths
- for col in range(1, len(headers) + 1):
- sheet.column_dimensions[get_column_letter(col)].width = 20
-
- except Exception as e:
- self.logger.warning(f"Could not populate table sheet: {str(e)}")
-
- def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
- """Populate the main sheet with document overview and all content."""
- try:
- # Document title - use documents[].title as primary source, fallback to metadata.title
- documents = jsonContent.get("documents", [])
- if documents and isinstance(documents[0], dict) and documents[0].get("title"):
- documentTitle = documents[0].get("title")
- else:
- documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
- sheet['A1'] = documentTitle
-
- # Safety check for title style
- title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"})
- try:
- safe_color = self._getSafeColor(title_style["color"])
- sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
- sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
- except Exception as font_error:
- # Try with a safe color
- sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
- sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"]))
-
- # Generation info
- sheet['A3'] = "Generated:"
- sheet['B3'] = self._formatTimestamp()
- sheet['A4'] = "Status:"
- sheet['B4'] = "Generated Successfully"
-
- # Document metadata
- metadata = jsonContent.get("metadata", {})
- if metadata:
- sheet['A6'] = "Document Information:"
- sheet['A6'].font = Font(bold=True)
-
- row = 7
- for key, value in metadata.items():
- if key != "title":
- sheet[f'A{row}'] = f"{key.title()}:"
- sheet[f'B{row}'] = str(value)
- row += 1
-
- # Content overview
- sections = self._extractSections(jsonContent)
- sheet[f'A{row + 1}'] = "Content Overview:"
- sheet[f'A{row + 1}'].font = Font(bold=True)
-
- row += 2
- sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
-
- # Count different content types
- content_types = {}
- for section in sections:
- content_type = section.get("content_type", "unknown")
- content_types[content_type] = content_types.get(content_type, 0) + 1
-
- for content_type, count in content_types.items():
- row += 1
- sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
-
- # Add all content to this sheet
- row += 2
- for section in sections:
- row = self._addSectionToSheet(sheet, section, styles, row)
- row += 1 # Empty row between sections
-
- # Auto-adjust column widths
- sheet.column_dimensions['A'].width = 20
- sheet.column_dimensions['B'].width = 30
-
- except Exception as e:
- self.logger.warning(f"Could not populate main sheet: {str(e)}")
-
- def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
- """Populate additional sheets based on content types."""
- try:
- sections = self._extractSections(jsonContent)
-
- for sheetName in sheetNames:
- if sheetName not in sheets:
- continue
-
- sheet = sheets[sheetName]
- sheetTitle = sheetName.title()
- sheet['A1'] = sheetTitle
- sheet['A1'].font = Font(size=16, bold=True)
-
- row = 3
-
- # Filter sections by content type
- if sheetName == "tables":
- filtered_sections = [s for s in sections if s.get("content_type") == "table"]
- elif sheetName == "lists":
- filtered_sections = [s for s in sections if s.get("content_type") == "list"]
- elif sheetName == "text":
- filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
- else:
- filtered_sections = sections
-
- for section in filtered_sections:
- row = self._addSectionToSheet(sheet, section, styles, row)
- row += 1 # Empty row between sections
-
- # Auto-adjust column widths
- for col in range(1, 6):
- sheet.column_dimensions[get_column_letter(col)].width = 20
-
- except Exception as e:
- self.logger.warning(f"Could not populate content type sheets: {str(e)}")
-
def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a section to a sheet and return the next row."""
try:
@@ -1161,20 +638,21 @@ class RendererXlsx(BaseRenderer):
text = text[:32764] + "..."
return text
+ def _buildTableBorder(self, borderStyle: str, borderColor: str) -> Border:
+ """Build an openpyxl ``Border`` matching the requested *borderStyle*."""
+ if borderStyle == "none":
+ noSide = Side(style=None)
+ return Border(left=noSide, right=noSide, top=noSide, bottom=noSide)
+ if borderStyle == "horizontal":
+ hSide = Side(style="thin", color=borderColor)
+ noSide = Side(style=None)
+ return Border(left=noSide, right=noSide, top=hSide, bottom=hSide)
+ thinSide = Side(style="thin", color=borderColor)
+ return Border(left=thinSide, right=thinSide, top=thinSide, bottom=thinSide)
+
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
- """
- Add a table element to Excel sheet with proper formatting and borders.
-
- PERFORMANCE OPTIMIZATIONS:
- 1. Pre-calculated style objects (Font, PatternFill, Alignment) to avoid repeated creation
- 2. Optimized _sanitizeCellValue() with regex pre-checks for numbers and dates
- 3. Batch cell operations where possible
- 4. Reduced exception handling overhead
-
- Expected performance: 10-30x faster for large tables compared to unoptimized version.
- """
+ """Add a table element to Excel sheet with styling, borders, banding and alignment."""
try:
- # Extract from nested content structure
content = element.get("content", {})
if not isinstance(content, dict):
return startRow
@@ -1182,7 +660,6 @@ class RendererXlsx(BaseRenderer):
headers = content.get("headers", [])
rows = content.get("rows", [])
- # Ensure headers and rows are lists
if not isinstance(headers, list):
headers = []
if not isinstance(rows, list):
@@ -1191,131 +668,102 @@ class RendererXlsx(BaseRenderer):
if not headers and not rows:
return startRow
- # Define border style
- thin_border = Border(
- left=Side(style='thin'),
- right=Side(style='thin'),
- top=Side(style='thin'),
- bottom=Side(style='thin')
+ # --- per-table style override merge ---
+ tableStyleOverride = content.get("tableStyle", {})
+ mergedStyles = deepMerge(styles, tableStyleOverride) if tableStyleOverride else styles
+
+ # --- border ---
+ tableBorderCfg = mergedStyles.get("table_border", {})
+ borderColor = self._getSafeColor(tableBorderCfg.get("color", "FF000000"))
+ borderStyleName = tableBorderCfg.get("style", "grid")
+ tableBorder = self._buildTableBorder(borderStyleName, borderColor)
+
+ # --- header style ---
+ headerStyle = mergedStyles.get("table_header", {})
+ primaryFont = mergedStyles.get("fonts", {}).get("primary")
+ headerFontColor = self._getSafeColor(headerStyle.get("text_color", "FF000000"))
+ headerFontSize = headerStyle.get("font_size")
+ headerFont = Font(name=primaryFont, bold=headerStyle.get("bold", True),
+ size=headerFontSize, color=headerFontColor)
+ headerFill = None
+ if headerStyle.get("background"):
+ hdrBg = self._getSafeColor(headerStyle["background"])
+ headerFill = PatternFill(start_color=hdrBg, end_color=hdrBg, fill_type="solid")
+ headerAlignment = Alignment(
+ horizontal=self._getSafeAlignment(headerStyle.get("align", "left")),
+ vertical="center"
)
headerRow = startRow
- header_style = styles.get("table_header", {})
-
- # Pre-calculate and cache style objects to avoid repeated parsing
- header_font_color = self._getSafeColor(header_style.get("text_color", "FF000000"))
- header_font = Font(bold=header_style.get("bold", True), color=header_font_color)
- header_bg_color = None
- header_fill = None
- if header_style.get("background"):
- header_bg_color = self._getSafeColor(header_style["background"])
- header_fill = PatternFill(start_color=header_bg_color, end_color=header_bg_color, fill_type="solid")
- header_alignment = Alignment(
- horizontal=self._getSafeAlignment(header_style.get("align", "left")),
- vertical="center"
- )
-
- # Add headers with formatting - OPTIMIZED: use cached style objects
for col, header in enumerate(headers, 1):
runs = self._inlineRunsForCell(header)
headerText = self._renderInlineRuns(runs)
- sanitized_header = self._sanitizeCellValue(headerText)
- cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
-
- # Apply styling with fallbacks - use pre-calculated objects
- try:
- cell.font = header_font
- except Exception:
- try:
- cell.font = Font(bold=True, color=self._getSafeColor("FF000000"))
- except Exception:
- pass
-
- try:
- if header_fill:
- cell.fill = header_fill
- except Exception:
- pass
-
- try:
- cell.alignment = header_alignment
- except Exception:
- try:
- cell.alignment = Alignment(horizontal="left", vertical="center")
- except Exception:
- pass
-
- try:
- cell.border = thin_border
- except Exception:
- pass
+ cell = sheet.cell(row=headerRow, column=col, value=self._sanitizeCellValue(headerText))
+ cell.font = headerFont
+ if headerFill:
+ cell.fill = headerFill
+ cell.alignment = headerAlignment
+ cell.border = tableBorder
startRow += 1
- # Add rows with formatting - OPTIMIZED: pre-calculate style objects
- cell_style = styles.get("table_cell", {})
- header_count = len(headers)
+ # --- cell style ---
+ cellStyle = mergedStyles.get("table_cell", {})
+ headerCount = len(headers)
+ cellTextColor = self._getSafeColor(cellStyle.get("text_color")) if cellStyle.get("text_color") else None
+ cellFontSize = cellStyle.get("font_size")
+ cellFont = Font(name=primaryFont, size=cellFontSize,
+ color=cellTextColor) if cellTextColor else None
- # Pre-calculate and cache style objects to avoid repeated parsing
- cell_text_color = None
- cell_font = None
- if cell_style.get("text_color"):
- cell_text_color = self._getSafeColor(cell_style["text_color"])
- cell_font = Font(color=cell_text_color)
- cell_alignment = Alignment(
- horizontal=self._getSafeAlignment(cell_style.get("align", "left")),
- vertical="center"
- )
+ # --- banding ---
+ bandingCfg = mergedStyles.get("table_banding", {})
+ bandingEnabled = bandingCfg.get("enabled", True)
+ evenFill = None
+ oddFill = None
+ if bandingEnabled:
+ evenColor = self._getSafeColor(bandingCfg.get("even", "FFF6F8FA"))
+ oddColor = self._getSafeColor(bandingCfg.get("odd", "FFFFFFFF"))
+ evenFill = PatternFill(start_color=evenColor, end_color=evenColor, fill_type="solid")
+ oddFill = PatternFill(start_color=oddColor, end_color=oddColor, fill_type="solid")
- for row_data in rows:
- # Handle different row formats
- if isinstance(row_data, list):
- cell_values = row_data
- elif isinstance(row_data, dict) and "cells" in row_data:
- cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])]
- else:
- continue
-
- # Validate row column count matches headers - pad or truncate if needed
- if len(cell_values) < header_count:
- # Pad with empty strings if row has fewer columns
- cell_values.extend([""] * (header_count - len(cell_values)))
- elif len(cell_values) > header_count:
- # Truncate if row has more columns than headers
- cell_values = cell_values[:header_count]
-
- for col, cell_value in enumerate(cell_values, 1):
- runs = self._inlineRunsForCell(cell_value)
- cellText = self._renderInlineRuns(runs)
- sanitized_value = self._sanitizeCellValue(cellText)
- cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
-
- # Apply styling with fallbacks - use pre-calculated objects
- try:
- if cell_font:
- cell.font = cell_font
- except Exception:
- pass
-
- try:
- cell.alignment = cell_alignment
- except Exception:
- try:
- cell.alignment = Alignment(horizontal="left", vertical="center")
- except Exception:
- pass
-
- try:
- cell.border = thin_border
- except Exception:
- pass
-
- startRow += 1
+ # --- column alignments ---
+ colAlignments = self._inferColumnAlignments(headers, rows, mergedStyles)
+
+ for dataRowIdx, rowData in enumerate(rows):
+ if isinstance(rowData, list):
+ cellValues = rowData
+ elif isinstance(rowData, dict) and "cells" in rowData:
+ cellValues = [cellObj.get("value", "") for cellObj in rowData.get("cells", [])]
+ else:
+ continue
+
+ if len(cellValues) < headerCount:
+ cellValues.extend([""] * (headerCount - len(cellValues)))
+ elif len(cellValues) > headerCount:
+ cellValues = cellValues[:headerCount]
+
+ for col, cellValue in enumerate(cellValues, 1):
+ runs = self._inlineRunsForCell(cellValue)
+ cellText = self._renderInlineRuns(runs)
+ cell = sheet.cell(row=startRow, column=col, value=self._sanitizeCellValue(cellText))
+
+ if cellFont:
+ cell.font = cellFont
+
+ if bandingEnabled:
+ rowFill = evenFill if dataRowIdx % 2 == 0 else oddFill
+ if rowFill:
+ cell.fill = rowFill
+
+ colAlign = colAlignments[col - 1] if col - 1 < len(colAlignments) else "left"
+ cell.alignment = Alignment(horizontal=colAlign, vertical="center")
+ cell.border = tableBorder
+
+ startRow += 1
- # Auto-adjust column widths
for col in range(1, len(headers) + 1):
- column_letter = get_column_letter(col)
- sheet.column_dimensions[column_letter].width = 20
+ columnLetter = get_column_letter(col)
+ sheet.column_dimensions[columnLetter].width = 20
return startRow
@@ -1334,12 +782,19 @@ class RendererXlsx(BaseRenderer):
listItems = []
listStyle = styles.get("bullet_list", {})
+ bulletChar = listStyle.get("bullet_char", "\u2022")
+ fontSize = listStyle.get("font_size", 11)
+ primaryFont = styles.get("fonts", {}).get("primary")
+ cellFont = Font(
+ name=primaryFont,
+ size=fontSize,
+ color=self._getSafeColor(listStyle.get("color"))
+ )
for item in listItems:
runs = self._inlineRunsForListItem(item)
text = self._renderInlineRuns(runs)
- sheet.cell(row=startRow, column=1, value=f"\u2022 {text}")
- if listStyle.get("color"):
- sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"]))
+ cell = sheet.cell(row=startRow, column=1, value=f"{bulletChar} {text}")
+ cell.font = cellFont
startRow += 1
return startRow
@@ -1362,9 +817,10 @@ class RendererXlsx(BaseRenderer):
if text:
sheet.cell(row=startRow, column=1, value=text)
- paragraph_style = styles.get("paragraph", {})
- if paragraph_style.get("color"):
- sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
+ paragraphStyle = styles.get("paragraph", {})
+ primaryFont = styles.get("fonts", {}).get("primary")
+ if paragraphStyle.get("color"):
+ sheet.cell(row=startRow, column=1).font = Font(name=primaryFont, color=self._getSafeColor(paragraphStyle["color"]))
startRow += 1
@@ -1387,15 +843,13 @@ class RendererXlsx(BaseRenderer):
if text:
sheet.cell(row=startRow, column=1, value=text)
- heading_style = styles.get("heading", {})
- font_size = heading_style.get("font_size", 14)
- if level > 1:
- font_size = max(10, font_size - (level - 1) * 2)
-
+ headingStyle = styles.get(f"heading{level}", styles.get("heading1", {}))
+ primaryFont = styles.get("fonts", {}).get("primary")
sheet.cell(row=startRow, column=1).font = Font(
- size=font_size,
- bold=True,
- color=self._getSafeColor(heading_style.get("color", "FF000000"))
+ name=primaryFont,
+ size=headingStyle.get("font_size", 14),
+ bold=headingStyle.get("bold", True),
+ color=self._getSafeColor(headingStyle.get("color"))
)
startRow += 1
@@ -1506,37 +960,39 @@ class RendererXlsx(BaseRenderer):
language = content.get("language", "")
if code:
- code_style = styles.get("code_block", {})
+ codeStyle = styles.get("code_block", {})
- # Pre-calculate and cache style objects to avoid repeated parsing
- code_font_name = code_style.get("font", "Courier New")
- code_font_size = code_style.get("font_size", 10)
- code_text_color = self._getSafeColor(code_style.get("color", "FF2F2F2F"))
- code_font = Font(name=code_font_name, size=code_font_size, color=code_text_color)
+ codeFontName = codeStyle.get("font", styles.get("fonts", {}).get("monospace", "Consolas"))
+ codeFontSize = codeStyle.get("font_size", 10)
+ codeTextColor = self._getSafeColor(codeStyle.get("color", "FF2F2F2F"))
+ codeFont = Font(name=codeFontName, size=codeFontSize, color=codeTextColor)
- code_bg_color = None
- code_fill = None
- if code_style.get("background"):
- code_bg_color = self._getSafeColor(code_style["background"])
- code_fill = PatternFill(start_color=code_bg_color, end_color=code_bg_color, fill_type="solid")
+ codeFill = None
+ if codeStyle.get("background"):
+ codeBgColor = self._getSafeColor(codeStyle["background"])
+ codeFill = PatternFill(start_color=codeBgColor, end_color=codeBgColor, fill_type="solid")
+
+ codeBorder = None
+ if codeStyle.get("border_color"):
+ codeBorderColor = self._getSafeColor(codeStyle["border_color"])
+ codeSide = Side(style="thin", color=codeBorderColor)
+ codeBorder = Border(left=codeSide, right=codeSide, top=codeSide, bottom=codeSide)
- # Add language label if present
if language:
langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):")
- langCell.font = Font(bold=True, color=code_text_color)
+ langCell.font = Font(bold=True, color=codeTextColor)
startRow += 1
- # Split code into lines and add each line - use cached style objects
- code_lines = code.split('\n')
- for line in code_lines:
+ codeLines = code.split('\n')
+ for line in codeLines:
codeCell = sheet.cell(row=startRow, column=1, value=line)
- codeCell.font = code_font
- # Set background color if specified
- if code_fill:
- codeCell.fill = code_fill
+ codeCell.font = codeFont
+ if codeFill:
+ codeCell.fill = codeFill
+ if codeBorder:
+ codeCell.border = codeBorder
startRow += 1
- # Add spacing after code block
startRow += 1
return startRow
diff --git a/modules/serviceCenter/services/serviceGeneration/styleDefaults.py b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py
index e6447c73..8d60c282 100644
--- a/modules/serviceCenter/services/serviceGeneration/styleDefaults.py
+++ b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py
@@ -11,39 +11,56 @@ DEFAULT_STYLE: Dict[str, Any] = {
"monospace": "Consolas",
},
"colors": {
- "primary": "#1F3864",
- "secondary": "#2C3E50",
- "accent": "#2980B9",
+ "primary": "#24292e",
+ "secondary": "#586069",
+ "accent": "#0366d6",
"background": "#FFFFFF",
},
"documentTitle": {
"sizePt": 28,
"weight": "bold",
- "color": "#1F3864",
+ "color": "#24292e",
"spaceBeforePt": 0,
"spaceAfterPt": 18,
"align": "center",
},
"headings": {
- "h1": {"sizePt": 22, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 22, "spaceAfterPt": 8},
- "h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 20, "spaceAfterPt": 6},
- "h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 16, "spaceAfterPt": 4},
- "h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 12, "spaceAfterPt": 3},
+ "h1": {"sizePt": 22, "weight": "bold", "color": "#24292e", "spaceBeforePt": 24, "spaceAfterPt": 8},
+ "h2": {"sizePt": 18, "weight": "bold", "color": "#24292e", "spaceBeforePt": 20, "spaceAfterPt": 6},
+ "h3": {"sizePt": 14, "weight": "bold", "color": "#586069", "spaceBeforePt": 16, "spaceAfterPt": 4},
+ "h4": {"sizePt": 12, "weight": "bold", "color": "#586069", "spaceBeforePt": 12, "spaceAfterPt": 3},
},
- "paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"},
+ "paragraph": {"sizePt": 11, "lineSpacing": 1.5, "color": "#24292e", "align": "left"},
"table": {
- "headerBg": "#1F3864",
- "headerFg": "#FFFFFF",
+ "headerBg": "#f6f8fa",
+ "headerFg": "#24292e",
"headerSizePt": 10,
"bodySizePt": 10,
- "rowBandingEven": "#F2F6FC",
+ "rowBandingEven": "#f6f8fa",
"rowBandingOdd": "#FFFFFF",
- "borderColor": "#CBD5E1",
+ "borderColor": "#e1e4e8",
"borderWidthPt": 0.5,
+ "borderStyle": "grid",
+ "bandingEnabled": True,
+ "cellPaddingPt": 4,
},
"list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11},
"image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"},
- "codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"},
+ "codeBlock": {"fontSizePt": 9, "background": "#f6f8fa", "borderColor": "#e1e4e8"},
+ "coverPage": {
+ "titleSizePt": 28,
+ "subtitleSizePt": 16,
+ "authorSizePt": 12,
+ "dateSizePt": 12,
+ "titleColor": "#24292e",
+ "subtitleColor": "#586069",
+ },
+ "caption": {
+ "sizePt": 10,
+ "color": "#586069",
+ "italic": True,
+ "align": "center",
+ },
"page": {
"format": "A4",
"marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60},
@@ -57,98 +74,9 @@ DEFAULT_STYLE: Dict[str, Any] = {
}
-# ------------------------------------------------------------------
-# Theme presets (A3): named, purpose-specific style overrides that are
-# deep-merged onto DEFAULT_STYLE. A preset only declares the keys it changes;
-# everything else inherits the default. Explicit per-call `style` overrides
-# always win over the preset.
-# ------------------------------------------------------------------
-THEME_PRESETS: Dict[str, Dict[str, Any]] = {
- # "general" intentionally empty -> identical to DEFAULT_STYLE.
- "general": {},
- "finance": {
- "fonts": {"primary": "Calibri"},
- "colors": {"primary": "#0B3D2E", "secondary": "#14532D", "accent": "#047857"},
- "documentTitle": {"color": "#0B3D2E", "align": "left"},
- "headings": {
- "h1": {"color": "#0B3D2E"},
- "h2": {"color": "#0B3D2E"},
- "h3": {"color": "#14532D"},
- "h4": {"color": "#14532D"},
- },
- "table": {"headerBg": "#0B3D2E", "rowBandingEven": "#ECFDF5"},
- },
- "legal": {
- # Serif, sober, single-column, justified body, no logo banner.
- "fonts": {"primary": "Times New Roman"},
- "colors": {"primary": "#1A1A1A", "secondary": "#333333", "accent": "#5A5A5A"},
- "documentTitle": {"color": "#1A1A1A", "align": "center", "sizePt": 20},
- "headings": {
- "h1": {"color": "#1A1A1A", "sizePt": 16},
- "h2": {"color": "#1A1A1A", "sizePt": 14},
- "h3": {"color": "#333333", "sizePt": 12},
- "h4": {"color": "#333333", "sizePt": 11},
- },
- "paragraph": {"sizePt": 11, "lineSpacing": 1.5, "color": "#1A1A1A", "align": "justify"},
- "table": {"headerBg": "#333333", "rowBandingEven": "#F5F5F5", "borderColor": "#999999"},
- "page": {"showPageNumbers": True},
- },
- "technical": {
- "fonts": {"primary": "Arial", "monospace": "Consolas"},
- "colors": {"primary": "#0F172A", "secondary": "#1E293B", "accent": "#2563EB"},
- "documentTitle": {"color": "#0F172A", "align": "left"},
- "headings": {
- "h1": {"color": "#0F172A"},
- "h2": {"color": "#1E293B"},
- "h3": {"color": "#1E293B"},
- "h4": {"color": "#334155"},
- },
- "paragraph": {"sizePt": 10, "lineSpacing": 1.2},
- "codeBlock": {"fontSizePt": 9, "background": "#0F172A"},
- "table": {"headerBg": "#1E293B", "rowBandingEven": "#EEF2FF"},
- },
- "hr": {
- "fonts": {"primary": "Calibri"},
- "colors": {"primary": "#5B21B6", "secondary": "#6D28D9", "accent": "#9333EA"},
- "documentTitle": {"color": "#5B21B6", "align": "center"},
- "headings": {
- "h1": {"color": "#5B21B6"},
- "h2": {"color": "#6D28D9"},
- "h3": {"color": "#7C3AED"},
- "h4": {"color": "#7C3AED"},
- },
- "table": {"headerBg": "#5B21B6", "rowBandingEven": "#F5F3FF"},
- },
- "marketing": {
- # Bold, image-friendly, generous spacing, larger title.
- "fonts": {"primary": "Verdana"},
- "colors": {"primary": "#BE123C", "secondary": "#E11D48", "accent": "#F59E0B"},
- "documentTitle": {"color": "#BE123C", "sizePt": 34, "align": "center", "spaceAfterPt": 24},
- "headings": {
- "h1": {"color": "#BE123C", "sizePt": 24},
- "h2": {"color": "#E11D48", "sizePt": 19},
- "h3": {"color": "#E11D48", "sizePt": 15},
- "h4": {"color": "#9F1239", "sizePt": 13},
- },
- "paragraph": {"sizePt": 12, "lineSpacing": 1.3},
- "image": {"defaultWidthPt": 540, "maxWidthPt": 900, "alignment": "center"},
- "table": {"headerBg": "#BE123C", "rowBandingEven": "#FFF1F2"},
- },
-}
-def resolveTheme(themeName: str | None) -> Dict[str, Any]:
- """Return the partial style override for a named theme preset.
-
- Unknown / empty names fall back to ``{}`` (i.e. plain DEFAULT_STYLE).
- The lookup is case-insensitive.
- """
- if not themeName:
- return {}
- return dict(THEME_PRESETS.get(str(themeName).strip().lower(), {}))
-
-
-def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
+def deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
result = {}
for key in base:
@@ -156,7 +84,7 @@ def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]
baseVal = base[key]
overVal = override[key]
if isinstance(baseVal, dict) and isinstance(overVal, dict):
- result[key] = _deepMerge(baseVal, overVal)
+ result[key] = deepMerge(baseVal, overVal)
else:
result[key] = overVal
else:
@@ -167,17 +95,15 @@ def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]
return result
-def resolveStyle(agentStyle: dict | None, documentTheme: str | None = None) -> Dict[str, Any]:
- """Resolve the effective style: ``DEFAULT_STYLE <- themePreset <- agentStyle``.
+def resolveStyle(agentStyle: dict | None = None) -> Dict[str, Any]:
+ """Resolve the effective style: ``DEFAULT_STYLE <- agentStyle``.
- Precedence (lowest to highest): platform defaults, the named ``documentTheme``
- preset, then any explicit per-call ``agentStyle`` override. With no theme and
- no override this returns plain :data:`DEFAULT_STYLE`.
+ Precedence (lowest to highest): platform defaults, then any explicit
+ per-call ``agentStyle`` override. With no override this returns plain
+ :data:`DEFAULT_STYLE`. Context-aware styling is handled by the AI
+ enhancement step in ``mainServiceGeneration.renderReport``.
"""
resolved = dict(DEFAULT_STYLE)
- themeOverride = resolveTheme(documentTheme)
- if themeOverride:
- resolved = _deepMerge(resolved, themeOverride)
if agentStyle:
- resolved = _deepMerge(resolved, agentStyle)
+ resolved = deepMerge(resolved, agentStyle)
return resolved
diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py
index 42962ad7..5a1ff0eb 100644
--- a/modules/workflows/methods/methodAi/actions/generateDocument.py
+++ b/modules/workflows/methods/methodAi/actions/generateDocument.py
@@ -22,7 +22,6 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
return ActionResult.isFailure(error="prompt is required")
documentType = parameters.get("documentType")
- documentTheme = parameters.get("documentTheme") or None
# Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers.
resultType = parameters.get("outputFormat") or parameters.get("resultType")
if isinstance(resultType, str):
@@ -83,8 +82,7 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
outputFormat=resultType, # Can be None - AI determines from prompt
title=title,
parentOperationId=parentOperationId,
- generationIntent="document", # NEW: Explicit intent, skips detection
- documentTheme=documentTheme # Named style preset for the renderer
+ generationIntent="document" # NEW: Explicit intent, skips detection
)
# Convert AiResponse to ActionResult
diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py
index 46aac70d..62955b12 100644
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@@ -252,32 +252,37 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
output_format_for_call = output_extension.replace('.', '') if output_extension else (output_format or 'txt')
# Simple mode: fast path without document generation pipeline
+ # Uses the same extraction + chunking pipeline as full mode to avoid oversized prompts
if simpleMode:
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI (simple mode)")
- context_parts = []
- paramContext = parameters.get("context") # already serialized above
+ simpleParts: Optional[List[ContentPart]] = contentParts
+ if not simpleParts and documentList and len(documentList.references) > 0:
+ from modules.datamodels.datamodelDocref import DocumentItemReference
+ fileIdRefs = [r for r in documentList.references if isinstance(r, DocumentItemReference)]
+ if fileIdRefs:
+ simpleParts = _resolve_file_refs_to_content_parts(self.services, fileIdRefs)
+ if not simpleParts:
+ try:
+ documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
+ simpleParts = _action_docs_to_content_parts(self.services, [
+ {"documentData": self.services.interfaceDbComponent.getFileData(doc.fileId),
+ "documentName": getattr(doc, 'fileName', ''),
+ "mimeType": getattr(doc, 'mimeType', 'application/octet-stream')}
+ for doc in documents if hasattr(doc, 'fileId') and doc.fileId
+ ])
+ except Exception as e:
+ logger.warning(f"Error extracting content parts in simple mode: {e}")
+
+ paramContext = parameters.get("context")
+ simplePrompt = aiPrompt
if paramContext and isinstance(paramContext, str) and paramContext.strip():
- context_parts.append(paramContext.strip())
- if documentList and len(documentList.references) > 0:
- try:
- documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
- for doc in documents:
- if hasattr(doc, 'fileId') and doc.fileId:
- fileData = self.services.interfaceDbComponent.getFileData(doc.fileId)
- if fileData:
- if isinstance(fileData, bytes):
- doc_text = fileData.decode('utf-8', errors='ignore')
- else:
- doc_text = str(fileData)
- context_parts.append(doc_text)
- except Exception as e:
- logger.warning(f"Error extracting context from documents in simple mode: {e}")
- context_text = "\n\n".join(context_parts) if context_parts else ""
-
+ simplePrompt = f"{aiPrompt}\n\n--- DATA CONTEXT ---\n{paramContext.strip()}"
+
request = AiCallRequest(
- prompt=aiPrompt,
- context=context_text if context_text else None,
+ prompt=simplePrompt,
+ contentParts=simpleParts if simpleParts else None,
+ context=None,
options=AiCallOptions(
resultFormat=output_format_for_call,
operationType=OperationTypeEnum.DATA_ANALYSE,
diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py
index aacfacaa..55c9a40a 100644
--- a/modules/workflows/methods/methodAi/methodAi.py
+++ b/modules/workflows/methods/methodAi/methodAi.py
@@ -85,15 +85,6 @@ class MethodAi(MethodBase):
default="",
description="Additional context data (string or upstream-bound dict/list, e.g. accounting data) appended to the prompt. Non-string values are JSON-serialized."
),
- "documentTheme": WorkflowActionParameter(
- name="documentTheme",
- type="str",
- frontendType=FrontendType.SELECT,
- frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"],
- required=False,
- default="general",
- description="Named style preset for the document renderer (general/finance/legal/technical/hr/marketing). The agent forwards it to the renderDocument tool's documentTheme."
- ),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
@@ -385,15 +376,6 @@ class MethodAi(MethodBase):
required=False,
description="Type of document (content hint for the model); used as title fallback when title is empty."
),
- "documentTheme": WorkflowActionParameter(
- name="documentTheme",
- type="str",
- frontendType=FrontendType.SELECT,
- frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"],
- required=False,
- default="general",
- description="Named style preset applied by the renderer (colors, fonts, spacing): general, finance, legal, technical, hr, marketing."
- ),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",