From 2eb1a5589d58e74fcf2dda5551a081127301dccb Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 3 Jun 2026 16:45:17 +0200 Subject: [PATCH 1/3] fixes doc generation and renderers --- app.py | 23 +- modules/aicore/aicoreModelSelector.py | 5 +- modules/aicore/aicorePluginAnthropic.py | 4 + modules/connectors/_httpResilience.py | 241 +++ .../providerGoogle/connectorGoogle.py | 65 +- .../providerInfomaniak/connectorInfomaniak.py | 31 +- .../connectors/providerMsft/connectorMsft.py | 48 +- modules/features/trustee/mainTrustee.py | 3 +- modules/interfaces/interfaceAiObjects.py | 12 + .../core/serviceStreaming/eventManager.py | 13 + .../serviceAgent/coreTools/_mediaTools.py | 12 - .../services/serviceAgent/sandboxExecutor.py | 46 + .../services/serviceAi/mainServiceAi.py | 20 +- .../services/serviceAi/subStructureFilling.py | 23 +- .../mainServiceExtraction.py | 31 + .../mainServiceGeneration.py | 84 +- .../serviceGeneration/paths/documentPath.py | 6 +- .../renderers/documentRendererBaseTemplate.py | 139 +- .../renderers/rendererDocx.py | 1116 ++++---------- .../renderers/rendererHtml.py | 171 +-- .../renderers/rendererPdf.py | 564 ++++--- .../renderers/rendererPptx.py | 1365 +++++------------ .../renderers/rendererXlsx.py | 860 ++--------- .../serviceGeneration/styleDefaults.py | 154 +- .../methodAi/actions/generateDocument.py | 4 +- .../methods/methodAi/actions/process.py | 47 +- .../workflows/methods/methodAi/methodAi.py | 18 - 27 files changed, 1812 insertions(+), 3293 deletions(-) create mode 100644 modules/connectors/_httpResilience.py diff --git a/app.py b/app.py index 0ffa9824..ec35a2cf 100644 --- a/app.py +++ b/app.py @@ -61,6 +61,13 @@ class DailyRotatingFileHandler(RotatingFileHandler): return True return False + def doRollover(self): + """Size-based rollover that tolerates Windows file locks.""" + try: + super().doRollover() + except PermissionError: + pass + def emit(self, record): """Emit a log record, switching files if date has changed""" # Check if we need to switch to a new file @@ -454,6 +461,20 @@ async def lifespan(app: FastAPI): except Exception as e: logger.warning(f"Could not shutdown feature containers: {e}") + # 4. Cancel all pending streaming EventManager tasks (cleanup sleeps, agent tasks) + try: + from modules.serviceCenter.core.serviceStreaming.eventManager import get_event_manager as _getStreamingEM + _getStreamingEM().shutdown() + except Exception as e: + logger.warning(f"Streaming EventManager shutdown failed: {e}") + + # 5. Close shared HTTP sessions (ResilientHttp) to avoid TCP keepalive hang + try: + from modules.connectors._httpResilience import closeAllResilientHttp + await closeAllResilientHttp() + except Exception as e: + logger.warning(f"Closing HTTP sessions failed: {e}") + logger.info("Application has been shut down") except asyncio.CancelledError: @@ -734,7 +755,7 @@ if __name__ == "__main__": port = int(os.environ.get("PORT", 8000)) try: - from gunicorn.app.wsgiapp import WSGIApplication # noqa: F401 + import gunicorn.app.wsgiapp # type: ignore[import-untyped] # noqa: F401 import subprocess import sys subprocess.run([ diff --git a/modules/aicore/aicoreModelSelector.py b/modules/aicore/aicoreModelSelector.py index d04472cd..f51d6cec 100644 --- a/modules/aicore/aicoreModelSelector.py +++ b/modules/aicore/aicoreModelSelector.py @@ -140,11 +140,10 @@ class ModelSelector: promptFiltered.append(model) else: maxAllowedTokens = model.contextLength * 0.8 - # Compare prompt tokens (not bytes) with model's token limit - if promptTokens <= maxAllowedTokens: + if totalTokens <= maxAllowedTokens: promptFiltered.append(model) else: - logger.debug(f"Model {model.name} filtered out: promptSize={promptTokens:.0f} tokens > maxAllowed={maxAllowedTokens:.0f} tokens (80% of {model.contextLength} tokens)") + logger.debug(f"Model {model.name} filtered out: totalTokens={totalTokens:.0f} > maxAllowed={maxAllowedTokens:.0f} tokens (80% of {model.contextLength} tokens)") logger.debug(f"After prompt size filtering: {len(promptFiltered)} models") diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index 5c1e87b5..ce6349f0 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -654,6 +654,7 @@ class AiAnthropic(BaseConnectorAi): mimeType = parts[0].replace("data:", "") base64Data = parts[1] + _SUPPORTED = {"image/jpeg", "image/png", "image/gif", "image/webp"} import base64 as _b64 try: rawHead = _b64.b64decode(base64Data[:32]) @@ -667,6 +668,9 @@ class AiAnthropic(BaseConnectorAi): mimeType = "image/webp" except Exception: pass + + if mimeType not in _SUPPORTED: + raise ValueError(f"Unsupported image media_type '{mimeType}' for Anthropic (supported: {', '.join(sorted(_SUPPORTED))})") # Convert to Anthropic's vision format anthropicMessages = [{ diff --git a/modules/connectors/_httpResilience.py b/modules/connectors/_httpResilience.py new file mode 100644 index 00000000..504686c8 --- /dev/null +++ b/modules/connectors/_httpResilience.py @@ -0,0 +1,241 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Shared HTTP resilience helpers for provider connectors. + +Provides a reusable session pool with concurrency limiter and retry-with-backoff +so that Google, MSFT and Infomaniak connectors do not each re-implement +per-request sessions, unbounded parallelism, and missing retry logic. +""" + +import asyncio +import logging +import time +from typing import Any, Dict, Optional, Union + +import aiohttp + +logger = logging.getLogger(__name__) + +_DEFAULT_MAX_CONCURRENT = 8 +_DEFAULT_MAX_RETRIES = 3 +_DEFAULT_TIMEOUT_S = 30 +_RETRYABLE_STATUS = {429, 502, 503, 504} + + +_instances: list["ResilientHttp"] = [] + + +class ResilientHttp: + """Managed aiohttp.ClientSession with semaphore + retry. + + Typical usage inside a connector module-level function:: + + _http = ResilientHttp("Google", maxConcurrent=8) + + async def _googleGet(token, url): + return await _http.getJson(url, headers={"Authorization": f"Bearer {token}"}) + + The session is created lazily on first call, reused across requests, + and closed via ``closeAllResilientHttp()`` at app shutdown. + """ + + def __init__( + self, + providerLabel: str = "HTTP", + maxConcurrent: int = _DEFAULT_MAX_CONCURRENT, + maxRetries: int = _DEFAULT_MAX_RETRIES, + defaultTimeoutS: float = _DEFAULT_TIMEOUT_S, + ): + self._label = providerLabel + self._maxConcurrent = maxConcurrent + self._maxRetries = maxRetries + self._defaultTimeout = aiohttp.ClientTimeout(total=defaultTimeoutS) + self._semaphore: Optional[asyncio.Semaphore] = None + self._session: Optional[aiohttp.ClientSession] = None + _instances.append(self) + + def _ensureReady(self) -> aiohttp.ClientSession: + if self._semaphore is None: + self._semaphore = asyncio.Semaphore(self._maxConcurrent) + if self._session is None or self._session.closed: + self._session = aiohttp.ClientSession(timeout=self._defaultTimeout) + return self._session + + async def close(self): + if self._session and not self._session.closed: + await self._session.close() + await asyncio.sleep(0.25) + self._session = None + + async def getJson( + self, + url: str, + headers: Dict[str, str], + timeout: Optional[aiohttp.ClientTimeout] = None, + allowRedirects: bool = True, + ) -> Dict[str, Any]: + """GET request returning parsed JSON with retry + throttle.""" + session = self._ensureReady() + assert self._semaphore is not None + lastError: Optional[str] = None + for attempt in range(1, self._maxRetries + 1): + async with self._semaphore: + try: + async with session.get( + url, + headers=headers, + timeout=timeout or self._defaultTimeout, + allow_redirects=allowRedirects, + ) as resp: + if resp.status in (200, 201): + return await resp.json() + if resp.status in _RETRYABLE_STATUS: + retryAfter = _parseRetryAfter(resp.headers.get("Retry-After")) + waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30) + logger.warning( + f"{self._label} GET {resp.status} (attempt {attempt}/{self._maxRetries}), " + f"retry in {waitS:.1f}s: {url[:120]}" + ) + await asyncio.sleep(waitS) + continue + errorText = await resp.text() + lastError = f"{resp.status}: {errorText[:200]}" + logger.warning(f"{self._label} GET {url[:120]} -> {lastError[:300]}") + return {"error": lastError} + except asyncio.TimeoutError: + lastError = f"timeout after {self._defaultTimeout.total}s" + if attempt < self._maxRetries: + logger.warning(f"{self._label} GET timeout (attempt {attempt}): {url[:120]}") + await asyncio.sleep(min(2 ** attempt, 10)) + continue + except aiohttp.ClientError as e: + lastError = str(e) + if attempt < self._maxRetries: + logger.warning(f"{self._label} GET client error (attempt {attempt}): {e}") + await asyncio.sleep(min(2 ** attempt, 10)) + continue + return {"error": lastError or "unknown error"} + + async def getBytes( + self, + url: str, + headers: Dict[str, str], + timeout: Optional[aiohttp.ClientTimeout] = None, + allowRedirects: bool = True, + ) -> Optional[bytes]: + """GET request returning raw bytes (for file downloads).""" + session = self._ensureReady() + assert self._semaphore is not None + for attempt in range(1, self._maxRetries + 1): + async with self._semaphore: + try: + async with session.get( + url, + headers=headers, + timeout=timeout or self._defaultTimeout, + allow_redirects=allowRedirects, + ) as resp: + if resp.status == 200: + return await resp.read() + if resp.status in _RETRYABLE_STATUS: + retryAfter = _parseRetryAfter(resp.headers.get("Retry-After")) + waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30) + logger.warning( + f"{self._label} download {resp.status} (attempt {attempt}), " + f"retry in {waitS:.1f}s: {url[:120]}" + ) + await asyncio.sleep(waitS) + continue + errorText = await resp.text() + logger.warning(f"{self._label} download {url[:120]} -> {resp.status}: {errorText[:200]}") + return None + except asyncio.TimeoutError: + if attempt < self._maxRetries: + logger.warning(f"{self._label} download timeout (attempt {attempt}): {url[:120]}") + await asyncio.sleep(min(2 ** attempt, 10)) + continue + except aiohttp.ClientError as e: + if attempt < self._maxRetries: + logger.warning(f"{self._label} download client error (attempt {attempt}): {e}") + await asyncio.sleep(min(2 ** attempt, 10)) + continue + return None + + async def request( + self, + method: str, + url: str, + headers: Dict[str, str], + data: Any = None, + timeout: Optional[aiohttp.ClientTimeout] = None, + ) -> Dict[str, Any]: + """Generic HTTP request with retry for retryable status codes.""" + session = self._ensureReady() + assert self._semaphore is not None + lastError: Optional[str] = None + for attempt in range(1, self._maxRetries + 1): + async with self._semaphore: + try: + kwargs: Dict[str, Any] = {"headers": headers} + if data is not None: + kwargs["data"] = data + async with session.request( + method, url, + timeout=timeout or self._defaultTimeout, + **kwargs, + ) as resp: + if resp.status in (200, 201, 202, 204): + if resp.status == 204: + return {} + return await resp.json() + if resp.status in _RETRYABLE_STATUS: + retryAfter = _parseRetryAfter(resp.headers.get("Retry-After")) + waitS = retryAfter if retryAfter > 0 else min(2 ** attempt, 30) + logger.warning( + f"{self._label} {method} {resp.status} (attempt {attempt}), " + f"retry in {waitS:.1f}s: {url[:120]}" + ) + await asyncio.sleep(waitS) + continue + errorText = await resp.text() + lastError = f"{resp.status}: {errorText[:200]}" + logger.warning(f"{self._label} {method} {url[:120]} -> {lastError[:300]}") + return {"error": lastError} + except asyncio.TimeoutError: + lastError = f"timeout after {(timeout or self._defaultTimeout).total}s" + if attempt < self._maxRetries: + await asyncio.sleep(min(2 ** attempt, 10)) + continue + except aiohttp.ClientError as e: + lastError = str(e) + if attempt < self._maxRetries: + await asyncio.sleep(min(2 ** attempt, 10)) + continue + return {"error": lastError or "unknown error"} + + +async def closeAllResilientHttp() -> None: + """Close all ResilientHttp sessions. Call at app shutdown.""" + for inst in _instances: + try: + await inst.close() + except Exception as e: + logger.debug(f"Error closing {inst._label} session: {e}") + logger.info(f"Closed {len(_instances)} ResilientHttp session(s)") + + +def _parseRetryAfter(value: Optional[str]) -> float: + """Parse Retry-After header (seconds or HTTP-date). Returns 0 if absent/unparseable.""" + if not value: + return 0.0 + try: + return float(value) + except ValueError: + pass + try: + from email.utils import parsedate_to_datetime + dt = parsedate_to_datetime(value) + delta = (dt.timestamp() - time.time()) + return max(delta, 0.5) + except Exception: + return 0.0 diff --git a/modules/connectors/providerGoogle/connectorGoogle.py b/modules/connectors/providerGoogle/connectorGoogle.py index 1c5826de..a1f02a03 100644 --- a/modules/connectors/providerGoogle/connectorGoogle.py +++ b/modules/connectors/providerGoogle/connectorGoogle.py @@ -10,10 +10,13 @@ from typing import Any, Dict, List, Optional import aiohttp from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult +from modules.connectors._httpResilience import ResilientHttp from modules.datamodels.datamodelDataSource import ExternalEntry logger = logging.getLogger(__name__) +_http = ResilientHttp("Google", maxConcurrent=8, defaultTimeoutS=20) + _DRIVE_BASE = "https://www.googleapis.com/drive/v3" _GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1" _CALENDAR_BASE = "https://www.googleapis.com/calendar/v3" @@ -57,17 +60,7 @@ def _parseGoogleDateRange(text: Optional[str]) -> tuple: async def _googleGet(token: str, url: str) -> Dict[str, Any]: headers = {"Authorization": f"Bearer {token}"} - timeout = aiohttp.ClientTimeout(total=20) - try: - async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(url, headers=headers) as resp: - if resp.status in (200, 201): - return await resp.json() - errorText = await resp.text() - logger.warning(f"Google API {resp.status}: {errorText[:300]}") - return {"error": f"{resp.status}: {errorText[:200]}"} - except Exception as e: - return {"error": str(e)} + return await _http.getJson(url, headers=headers) def _raiseGoogleError(result: Dict[str, Any], ctx: str) -> None: @@ -128,37 +121,33 @@ class DriveAdapter(ServiceAdapter): if not fileId: return b"" headers = {"Authorization": f"Bearer {self._token}"} - timeout = aiohttp.ClientTimeout(total=60) + dlTimeout = aiohttp.ClientTimeout(total=60) try: - async with aiohttp.ClientSession(timeout=timeout) as session: - # Try direct download first - url = f"{_DRIVE_BASE}/files/{fileId}?alt=media" - async with session.get(url, headers=headers) as resp: - if resp.status == 200: - return await resp.read() - logger.debug(f"Google Drive direct download returned {resp.status} for {fileId}") + url = f"{_DRIVE_BASE}/files/{fileId}?alt=media" + data = await _http.getBytes(url, headers=headers, timeout=dlTimeout) + if data is not None: + return data + logger.debug(f"Google Drive direct download returned None for {fileId}") - # If 403/404, check if it's a native Google file that needs export - metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name" - async with session.get(metaUrl, headers=headers) as metaResp: - if metaResp.status != 200: - logger.warning(f"Google Drive metadata fetch failed ({metaResp.status}) for {fileId}") - return b"" - meta = await metaResp.json() - fileMime = meta.get("mimeType", "") - fileName = meta.get("name", fileId) + metaUrl = f"{_DRIVE_BASE}/files/{fileId}?fields=mimeType,name" + meta = await _http.getJson(metaUrl, headers=headers) + if "error" in meta: + logger.warning(f"Google Drive metadata fetch failed for {fileId}: {meta['error']}") + return b"" + fileMime = meta.get("mimeType", "") + fileName = meta.get("name", fileId) - exportMime = self._EXPORT_MIME_MAP.get(fileMime) - if not exportMime: - logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})") - return b"" + exportMime = self._EXPORT_MIME_MAP.get(fileMime) + if not exportMime: + logger.warning(f"Google Drive: unsupported mimeType '{fileMime}' for file '{fileName}' ({fileId})") + return b"" - exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}" - logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}") - async with session.get(exportUrl, headers=headers) as exportResp: - if exportResp.status == 200: - return await exportResp.read() - logger.warning(f"Google Drive export failed ({exportResp.status}) for '{fileName}'") + exportUrl = f"{_DRIVE_BASE}/files/{fileId}/export?mimeType={exportMime}" + logger.info(f"Google Drive: exporting '{fileName}' as {exportMime}") + exported = await _http.getBytes(exportUrl, headers=headers, timeout=dlTimeout) + if exported is not None: + return exported + logger.warning(f"Google Drive export failed for '{fileName}'") except Exception as e: logger.error(f"Google Drive download failed for {fileId}: {e}") return b"" diff --git a/modules/connectors/providerInfomaniak/connectorInfomaniak.py b/modules/connectors/providerInfomaniak/connectorInfomaniak.py index 94a54860..dfdc8bab 100644 --- a/modules/connectors/providerInfomaniak/connectorInfomaniak.py +++ b/modules/connectors/providerInfomaniak/connectorInfomaniak.py @@ -44,10 +44,13 @@ from modules.connectors.connectorProviderBase import ( ServiceAdapter, DownloadResult, ) +from modules.connectors._httpResilience import ResilientHttp from modules.datamodels.datamodelDataSource import ExternalEntry logger = logging.getLogger(__name__) +_http = ResilientHttp("Infomaniak", maxConcurrent=6, defaultTimeoutS=20) + _API_BASE = "https://api.infomaniak.com" _CALENDAR_BASE = "https://calendar.infomaniak.com" _CONTACTS_BASE = "https://contacts.infomaniak.com" @@ -82,18 +85,7 @@ async def _infomaniakGet( """ url = f"{baseUrl.rstrip('/')}/{endpoint.lstrip('/')}" headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"} - timeout = aiohttp.ClientTimeout(total=20) - try: - async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(url, headers=headers, allow_redirects=False) as resp: - if resp.status in (200, 201): - return await resp.json() - errorText = await resp.text() - logger.warning(f"Infomaniak GET {url} -> {resp.status}: {errorText[:300]}") - return {"error": f"{resp.status}: {errorText[:200]}"} - except Exception as e: - logger.error(f"Infomaniak GET {url} crashed: {e}") - return {"error": str(e)} + return await _http.getJson(url, headers=headers, allowRedirects=False) def _raiseInfomaniakError(result: Dict[str, Any], ctx: str) -> None: @@ -124,20 +116,7 @@ async def _infomaniakDownload( """ url = f"{baseUrl.rstrip('/')}/{endpoint.lstrip('/')}" headers = {"Authorization": f"Bearer {token}"} - timeout = aiohttp.ClientTimeout(total=120) - try: - async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(url, headers=headers, allow_redirects=True) as resp: - if resp.status == 200: - return await resp.read() - logger.warning( - f"Infomaniak download {url} -> {resp.status}: " - f"{(await resp.text())[:300]}" - ) - return None - except Exception as e: - logger.error(f"Infomaniak download {url} crashed: {e}") - return None + return await _http.getBytes(url, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) def _unwrapData(payload: Any) -> Any: diff --git a/modules/connectors/providerMsft/connectorMsft.py b/modules/connectors/providerMsft/connectorMsft.py index 24f6e25c..0830e6ed 100644 --- a/modules/connectors/providerMsft/connectorMsft.py +++ b/modules/connectors/providerMsft/connectorMsft.py @@ -13,11 +13,13 @@ import urllib.parse from typing import Dict, Any, List, Optional from modules.connectors.connectorProviderBase import ProviderConnector, ServiceAdapter, DownloadResult +from modules.connectors._httpResilience import ResilientHttp from modules.datamodels.datamodelDataSource import ExternalEntry logger = logging.getLogger(__name__) _GRAPH_BASE = "https://graph.microsoft.com/v1.0" +_http = ResilientHttp("Graph", maxConcurrent=10, defaultTimeoutS=30) class _GraphApiMixin: @@ -44,24 +46,14 @@ class _GraphApiMixin: async def _graphDownload(self, endpoint: str) -> Optional[bytes]: """Download binary content from Graph API.""" headers = {"Authorization": f"Bearer {self._accessToken}"} - timeout = aiohttp.ClientTimeout(total=60) url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}" - try: - async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(url, headers=headers) as resp: - if resp.status == 200: - return await resp.read() - logger.error(f"Download failed {resp.status}: {await resp.text()}") - return None - except Exception as e: - logger.error(f"Graph download error: {e}") - return None + return await _http.getBytes(url, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) async def _makeGraphCall( token: str, endpoint: str, method: str = "GET", data: Any = None ) -> Dict[str, Any]: - """Execute a single Microsoft Graph API call.""" + """Execute a single Microsoft Graph API call via shared resilient HTTP client.""" url = f"{_GRAPH_BASE}/{endpoint.lstrip('/')}" contentType = "application/json; charset=utf-8" if method == "PUT" and isinstance(data, bytes): @@ -72,37 +64,7 @@ async def _makeGraphCall( } if "$count=true" in endpoint: headers["ConsistencyLevel"] = "eventual" - timeout = aiohttp.ClientTimeout(total=30) - try: - async with aiohttp.ClientSession(timeout=timeout) as session: - kwargs: Dict[str, Any] = {"headers": headers} - if data is not None: - kwargs["data"] = data - - if method == "GET": - async with session.get(url, **kwargs) as resp: - return await _handleResponse(resp) - elif method == "POST": - async with session.post(url, **kwargs) as resp: - return await _handleResponse(resp) - elif method == "PUT": - async with session.put(url, **kwargs) as resp: - return await _handleResponse(resp) - elif method == "PATCH": - async with session.patch(url, **kwargs) as resp: - return await _handleResponse(resp) - elif method == "DELETE": - async with session.delete(url, **kwargs) as resp: - if resp.status in (200, 204): - return {} - return await _handleResponse(resp) - - except asyncio.TimeoutError: - return {"error": f"Graph API timeout: {endpoint}"} - except Exception as e: - return {"error": f"Graph API error: {e}"} - - return {"error": f"Unsupported method: {method}"} + return await _http.request(method, url, headers=headers, data=data) async def _handleResponse(resp: aiohttp.ClientResponse) -> Dict[str, Any]: diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py index 41903211..b33aaf74 100644 --- a/modules/features/trustee/mainTrustee.py +++ b/modules/features/trustee/mainTrustee.py @@ -494,7 +494,7 @@ TEMPLATE_WORKFLOWS = [ "closingBalance in accountSummary ist bereits der korrekte Ist-Wert.\n\n" "WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN " "Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n" - "Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, " + "Hinweis: Wenn du ein Dokument erstellst, " "verwende einen professionellen Finanz-Stil:\n" "- Schriftart: Calibri\n" "- Primaerfarbe: #1F3864 (Dunkelblau)\n" @@ -504,7 +504,6 @@ TEMPLATE_WORKFLOWS = [ "Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen." ), "resultType": "xlsx", - "documentTheme": "finance", "requireNeutralization": False, "documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index dcf819cc..2d13439c 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -141,6 +141,18 @@ class AiObjects: _MAX_SHORT_RETRY = 15.0 + # Pre-flight guard: reject obviously oversized payloads before entering the failover loop + estimatedTokens = (len(prompt or "") + len(context or "")) // 3 + bestContextLength = max((m.contextLength for m in failoverModelList if m.contextLength > 0), default=0) + if bestContextLength > 0 and estimatedTokens > bestContextLength * 0.95: + errorMsg = (f"Input too large for all available models: ~{estimatedTokens} estimated tokens " + f"vs best model context {bestContextLength}. Use chunking pipeline instead.") + logger.error(errorMsg) + return AiCallResponse( + content=errorMsg, modelName="error", priceCHF=0.0, + processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1, + ) + lastError = None for attempt, model in enumerate(failoverModelList): try: diff --git a/modules/serviceCenter/core/serviceStreaming/eventManager.py b/modules/serviceCenter/core/serviceStreaming/eventManager.py index 00e29f17..bc1fb3c6 100644 --- a/modules/serviceCenter/core/serviceStreaming/eventManager.py +++ b/modules/serviceCenter/core/serviceStreaming/eventManager.py @@ -181,6 +181,19 @@ class EventManager: task = asyncio.create_task(_cleanup()) self._cleanup_tasks[workflow_id] = task + def shutdown(self) -> None: + """Cancel all pending cleanup and agent tasks for fast process exit.""" + for wfId, task in list(self._cleanup_tasks.items()): + if not task.done(): + task.cancel() + self._cleanup_tasks.clear() + for wfId, task in list(self._agent_tasks.items()): + if not task.done(): + task.cancel() + self._agent_tasks.clear() + self._queues.clear() + logger.info("EventManager shutdown: all tasks cancelled") + # Global event manager instance _event_manager: Optional[EventManager] = None diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py index 5d377bc7..c7e292e2 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py @@ -224,7 +224,6 @@ def _registerMediaTools(registry: ToolRegistry, services): title=title, userPrompt=content, style=args.get("style"), - documentTheme=args.get("documentTheme"), imageResolver=_imageBytesResolver if lazyBlockImages else None, ) @@ -315,17 +314,6 @@ def _registerMediaTools(registry: ToolRegistry, services): "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"}, "title": {"type": "string", "description": "Document title", "default": "Document"}, "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"}, - "documentTheme": { - "type": "string", - "enum": ["general", "finance", "legal", "technical", "hr", "marketing"], - "description": ( - "Named style preset applied by the renderer (colors, fonts, spacing). " - "Pick the one that matches the document purpose: 'legal' for serif/justified " - "legal filings, 'marketing' for bold image-friendly layouts, 'finance', " - "'technical', 'hr', or 'general' (default). The explicit 'style' object, if " - "provided, overrides individual preset keys." - ), - }, "style": { "type": "object", "description": ( diff --git a/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py b/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py index 2fbe9c34..4c747e64 100644 --- a/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py +++ b/modules/serviceCenter/services/serviceAgent/sandboxExecutor.py @@ -112,6 +112,50 @@ def _makeReadFile(services): return readFile +_MAX_FILE_BYTES = 50_000_000 # 50 MB safety limit + + +def _makeReadFileBytes(services): + """Create a readFileBytes(fileId) closure for binary file access in the sandbox.""" + def readFileBytes(fileId: str) -> bytes: + mgmt = getattr(services, 'interfaceDbComponent', None) if services else None + if not mgmt: + raise RuntimeError("readFileBytes: no file store available in this session") + data = mgmt.getFileData(str(fileId)) + if data is None: + raise FileNotFoundError(f"File '{fileId}' not found in workspace") + if len(data) > _MAX_FILE_BYTES: + raise ValueError(f"File too large for sandbox analysis ({len(data)} bytes, limit {_MAX_FILE_BYTES})") + return data + return readFileBytes + + +class SafeZipFile: + """Read-only in-memory ZIP analysis wrapper for the sandbox. + Does not expose extract/write -- only namelist, infolist, and in-memory read.""" + + def __init__(self, data: bytes): + import zipfile as _zf + self._zf = _zf.ZipFile(io.BytesIO(data), 'r') + + def namelist(self): + return self._zf.namelist() + + def infolist(self): + return [{"filename": i.filename, "file_size": i.file_size, + "compress_size": i.compress_size, "date_time": i.date_time} + for i in self._zf.infolist()] + + def read(self, name: str) -> bytes: + return self._zf.read(name) + + def __enter__(self): + return self + + def __exit__(self, *args): + self._zf.close() + + async def executePython(code: str, *, services=None) -> Dict[str, Any]: """Execute Python code in a restricted sandbox. Returns {success, output, error}.""" import asyncio @@ -120,8 +164,10 @@ async def executePython(code: str, *, services=None) -> Dict[str, Any]: restrictedGlobals = _buildRestrictedGlobals() vfs = _VirtualFS() restrictedGlobals["__builtins__"]["open"] = vfs.open + restrictedGlobals["__builtins__"]["SafeZipFile"] = SafeZipFile if services: restrictedGlobals["__builtins__"]["readFile"] = _makeReadFile(services) + restrictedGlobals["__builtins__"]["readFileBytes"] = _makeReadFileBytes(services) capturedOutput = io.StringIO() oldStdout = sys.stdout oldStderr = sys.stderr diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py index d5054921..afbde59a 100644 --- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py +++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py @@ -1680,8 +1680,7 @@ Respond with ONLY a JSON object in this exact format: language: str, title: str, userPrompt: str, - parentOperationId: str, - documentTheme: Optional[str] = None + parentOperationId: str ) -> List[RenderedDocument]: """ Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. @@ -1733,8 +1732,7 @@ Respond with ONLY a JSON object in this exact format: title, userPrompt, self, - parentOperationId=renderOperationId, # Parent-Referenz für ChatLog-Hierarchie - documentTheme=documentTheme + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie ) # ChatLog abschließen @@ -1776,8 +1774,7 @@ Respond with ONLY a JSON object in this exact format: outputFormat: Optional[str] = None, title: Optional[str] = None, parentOperationId: Optional[str] = None, - generationIntent: Optional[str] = None, # NEW: Explicit intent from action (skips detection) - documentTheme: Optional[str] = None # Named style preset for document rendering + generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection) ) -> AiResponse: """ Unified AI content generation with explicit intent requirement. @@ -1796,8 +1793,6 @@ Respond with ONLY a JSON object in this exact format: parentOperationId: Optional parent operation ID for hierarchical logging generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action. NO auto-detection - actions must explicitly specify intent. - documentTheme: Optional named style preset (general/finance/legal/technical/ - hr/marketing) forwarded to the renderer for document generation. Returns: AiResponse with content, metadata, and optional documents @@ -1868,8 +1863,7 @@ Respond with ONLY a JSON object in this exact format: contentParts=contentParts, outputFormat=outputFormat, title=title, - parentOperationId=parentOperationId, - documentTheme=documentTheme + parentOperationId=parentOperationId ) # DATA_EXTRACT: Extract content from documents and process with AI (no structure generation) @@ -2085,8 +2079,7 @@ Respond with ONLY a JSON object in this exact format: contentParts: Optional[List[ContentPart]], outputFormat: str, title: str, - parentOperationId: Optional[str], - documentTheme: Optional[str] = None + parentOperationId: Optional[str] ) -> AiResponse: """Handle document generation using document generation path.""" from modules.serviceCenter.services.serviceGeneration.paths.documentPath import DocumentGenerationPath @@ -2103,8 +2096,7 @@ Respond with ONLY a JSON object in this exact format: contentParts=contentParts, outputFormat=outputFormat, title=title or "Generated Document", - parentOperationId=parentOperationId, - documentTheme=documentTheme + parentOperationId=parentOperationId ) diff --git a/modules/serviceCenter/services/serviceAi/subStructureFilling.py b/modules/serviceCenter/services/serviceAi/subStructureFilling.py index dbf289fd..2baf0a84 100644 --- a/modules/serviceCenter/services/serviceAi/subStructureFilling.py +++ b/modules/serviceCenter/services/serviceAi/subStructureFilling.py @@ -27,6 +27,21 @@ class _AiResponseFallback: logger = logging.getLogger(__name__) +def _normalizeImageElement(element: Dict[str, Any]) -> None: + """Ensure image element has nested content dict. + AI sometimes returns flat keys (base64Data, altText, ...) at the top level. + All renderers expect element['content'] to be a dict with those keys.""" + if "content" in element and isinstance(element.get("content"), dict): + return + element["content"] = { + "base64Data": element.pop("base64Data", ""), + "altText": element.pop("altText", ""), + "caption": element.pop("caption", ""), + "mimeType": element.pop("mimeType", "image/png"), + "fileName": element.pop("fileName", element.get("id", "image") + ".png"), + } + + def _elements_from_section_content_ai_json(parsed: Any) -> List[Any]: """Normalize section_content AI JSON (incl. models that return {\"text\": ...}) into elements.""" from modules.serviceCenter.services.serviceAi.subLoopingUseCases import _normalizeSectionContentJson @@ -494,14 +509,18 @@ class StructureFiller: try: jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + _normalizeImageElement(jsonContent) elements.append(jsonContent) logger.debug("AI returned proper JSON image structure") - base64Data = None # Signal that image was already processed + base64Data = None elif isinstance(jsonContent, list) and len(jsonContent) > 0: if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + for item in jsonContent: + if isinstance(item, dict) and item.get("type") == "image": + _normalizeImageElement(item) elements.extend(jsonContent) logger.debug("AI returned proper JSON image structure in list") - base64Data = None # Signal that image was already processed + base64Data = None else: base64Data = "" # Continue with normal processing else: diff --git a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py index 1ffb8557..8747c552 100644 --- a/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/serviceCenter/services/serviceExtraction/mainServiceExtraction.py @@ -1309,6 +1309,7 @@ class ExtractionService: Calls aiObjects._callWithModel() for actual AI calls. """ lastError = None + _VISION_API_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp"} # Check if this is an image - Vision models need special handling isImage = (contentPart.typeGroup == "image") or (contentPart.mimeType and contentPart.mimeType.startswith("image/")) @@ -1316,6 +1317,18 @@ class ExtractionService: # Determine the correct operation type based on content type actualOperationType = options.operationType if isImage: + resolvedMime = contentPart.mimeType or "image/jpeg" + if resolvedMime not in _VISION_API_TYPES and contentPart.data: + resolvedMime = _sniffImageMime(contentPart.data) or resolvedMime + if resolvedMime not in _VISION_API_TYPES: + logger.info(f"Skipping unsupported image type '{resolvedMime}' (supported: {', '.join(sorted(_VISION_API_TYPES))})") + return AiCallResponse( + content=f"[Image skipped: unsupported format {resolvedMime}]", + modelName="skipped", priceCHF=0.0, + processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=0, + ) + contentPart.mimeType = resolvedMime + actualOperationType = OperationTypeEnum.IMAGE_ANALYSE # Get vision-capable models for images availableModels = modelRegistry.getAvailableModels() @@ -1805,6 +1818,24 @@ class ExtractionService: ) +def _sniffImageMime(data) -> Optional[str]: + """Detect image format from magic bytes. Returns None if unrecognised.""" + import base64 as _b64 + try: + raw = data if isinstance(data, bytes) else _b64.b64decode(data[:32]) + if raw[:3] == b"\xff\xd8\xff": + return "image/jpeg" + if raw[:8] == b"\x89PNG\r\n\x1a\n": + return "image/png" + if raw[:4] == b"GIF8": + return "image/gif" + if raw[:4] == b"RIFF" and len(raw) >= 12 and raw[8:12] == b"WEBP": + return "image/webp" + except Exception: + pass + return None + + # Module-level function for use by subPipeline and ExtractionService def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]: """Apply merging strategy to parts with intelligent token-aware merging. diff --git a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py index 5bf86aee..dbbe61c3 100644 --- a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py @@ -14,7 +14,7 @@ from .subDocumentUtility import ( detectMimeTypeFromData, convertDocumentDataToString ) -from .styleDefaults import resolveStyle +from .styleDefaults import resolveStyle, deepMerge logger = logging.getLogger(__name__) @@ -383,7 +383,80 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None, documentTheme: Optional[str] = None, imageResolver=None) -> List[RenderedDocument]: + async def _enhanceStyleWithAi(self, resolvedStyle: Dict[str, Any], extractedContent: Dict[str, Any], userPrompt: str | None, aiService) -> Dict[str, Any]: + """Let AI refine the resolved style based on document content and context. + + Returns the enhanced style dict, or the original on failure. + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + import json as _json, re as _re + + metadata = extractedContent.get("metadata", {}) if isinstance(extractedContent, dict) else {} + docTitle = metadata.get("title", "") if isinstance(metadata, dict) else "" + docType = metadata.get("documentType", "") if isinstance(metadata, dict) else "" + userHint = (userPrompt or "")[:300] + + styleJson = _json.dumps(resolvedStyle, indent=2, default=str) + + prompt = ( + "You are a document styling expert. Given the document context below, " + "return a JSON delta object containing ONLY the style properties you want to change " + "from the current defaults. Return {} if no changes are needed.\n\n" + f"Document title: {docTitle}\n" + f"Document type: {docType}\n" + f"User request (excerpt): {userHint}\n\n" + f"Current style (full schema):\n{styleJson}\n\n" + "You may adjust any property: fonts (primary/monospace), colors, " + "documentTitle (size, alignment), headings (h1-h4 sizes, colors, spacing), " + "paragraph (size, lineSpacing, alignment e.g. justified), " + "table (header colors, banding, borders, cell padding), " + "list (bullet character, indent), image (default width, alignment), " + "codeBlock (font size, background, border), " + "coverPage (title/subtitle sizes and colors), " + "caption (size, color, italic), " + "page (format, margins, header/footer).\n" + "Match the document's purpose and tone. Examples: a legal document should use " + "serif fonts and justified text; a financial report conservative colors; " + "a marketing brochure bold colors and generous spacing.\n" + "Return ONLY a valid JSON object (no markdown fences, no explanation)." + ) + + options = AiCallOptions() + options.operationType = OperationTypeEnum.DATA_GENERATE + request = AiCallRequest(prompt=prompt, context="", options=options) + response = await aiService.callAi(request) + + raw = (response.content or "").strip() if response else "" + if not raw: + return resolvedStyle + + jsonMatch = _re.search(r'```json\s*\n(.*?)\n```', raw, _re.DOTALL) + if jsonMatch: + raw = jsonMatch.group(1).strip() + elif raw.startswith('```'): + raw = _re.sub(r'^```\w*\s*', '', raw) + raw = _re.sub(r'\s*```$', '', raw) + + jsonStart = raw.find('{') + jsonEnd = raw.rfind('}') + if jsonStart == -1 or jsonEnd == -1: + return resolvedStyle + raw = raw[jsonStart:jsonEnd + 1] + + delta = _json.loads(raw) + if not isinstance(delta, dict) or not delta: + return resolvedStyle + + enhanced = deepMerge(resolvedStyle, delta) + logger.info("AI style enhancement applied %d top-level key(s)", len(delta)) + return enhanced + + except Exception as exc: + logger.warning("AI style enhancement failed, using base style: %s", exc) + return resolvedStyle + + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None, imageResolver=None) -> List[RenderedDocument]: """ Render extracted JSON content to the specified output format. Processes EACH document separately and calls renderer for each. @@ -401,9 +474,6 @@ class GenerationService: aiService: AI service instance for generation prompt creation parentOperationId: Optional parent operation ID for hierarchical logging style: Optional style overrides (deep-merged with DEFAULT_STYLE) - documentTheme: Optional named theme preset (general/finance/legal/ - technical/hr/marketing). Resolved as DEFAULT_STYLE <- preset <- style, - so an explicit ``style`` override always wins. imageResolver: Optional callable ``fileId -> bytes`` for lazy, on-demand resolution of block images that carry only a ``fileId`` (no embedded base64). Lets large documents avoid holding every image's bytes in @@ -414,7 +484,9 @@ class GenerationService: Each RenderedDocument represents one rendered file (main document or supporting file) """ try: - resolvedStyle = resolveStyle(style, documentTheme) + resolvedStyle = resolveStyle(style) + if aiService: + resolvedStyle = await self._enhanceStyleWithAi(resolvedStyle, extractedContent, userPrompt, aiService) # Validate JSON input if not isinstance(extractedContent, dict): raise ValueError("extractedContent must be a JSON dictionary") diff --git a/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py b/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py index f7d146f5..4fc6c9d5 100644 --- a/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py +++ b/modules/serviceCenter/services/serviceGeneration/paths/documentPath.py @@ -34,8 +34,7 @@ class DocumentGenerationPath: contentParts: Optional[List[ContentPart]] = None, outputFormat: str = "txt", title: Optional[str] = None, - parentOperationId: Optional[str] = None, - documentTheme: Optional[str] = None + parentOperationId: Optional[str] = None ) -> AiResponse: """ Generate document using existing chapter/section model. @@ -166,8 +165,7 @@ class DocumentGenerationPath: language, # Global fallback (per-document language extracted from structure in renderReport) title or "Generated Document", userPrompt, - docOperationId, - documentTheme=documentTheme + docOperationId ) # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py index 35401f27..61eadee7 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py @@ -154,8 +154,12 @@ class BaseRenderer(ABC): para = style["paragraph"] lst = style["list"] cb = style["codeBlock"] + fonts = style.get("fonts") if isinstance(style.get("fonts"), dict) else {} colors = style.get("colors") if isinstance(style.get("colors"), dict) else {} - primaryColor = colors.get("primary", "#1F3864") + primaryColor = colors.get("primary", "#24292e") + secondaryColor = colors.get("secondary", "#586069") + accentColor = colors.get("accent", "#0366d6") + bgColor = colors.get("background", "#FFFFFF") rawDocTitle = style.get("documentTitle") docTitle = rawDocTitle if isinstance(rawDocTitle, dict) else {} titleSizePt = docTitle.get("sizePt") @@ -168,6 +172,10 @@ class BaseRenderer(ABC): titleAlign = "center" titleSpaceBefore = docTitle.get("spaceBeforePt", 0) titleSpaceAfter = docTitle.get("spaceAfterPt", 18) + img = style.get("image") if isinstance(style.get("image"), dict) else {} + page = style.get("page") if isinstance(style.get("page"), dict) else {} + cover = style.get("coverPage") if isinstance(style.get("coverPage"), dict) else {} + caption = style.get("caption") if isinstance(style.get("caption"), dict) else {} return { "title": { "font_size": titleSizePt, @@ -179,56 +187,169 @@ class BaseRenderer(ABC): }, "heading1": { "font_size": h1["sizePt"], "color": h1["color"], - "bold": h1.get("weight") == "bold", "align": "left", + "bold": h1.get("weight") == "bold", "align": h1.get("align", "left"), "space_before": h1.get("spaceBeforePt", 24), "space_after": h1.get("spaceAfterPt", 8), }, "heading2": { "font_size": h2["sizePt"], "color": h2["color"], - "bold": h2.get("weight") == "bold", "align": "left", + "bold": h2.get("weight") == "bold", "align": h2.get("align", "left"), "space_before": h2.get("spaceBeforePt", 20), "space_after": h2.get("spaceAfterPt", 6), }, "heading3": { "font_size": h3["sizePt"], "color": h3["color"], - "bold": h3.get("weight") == "bold", "align": "left", + "bold": h3.get("weight") == "bold", "align": h3.get("align", "left"), "space_before": h3.get("spaceBeforePt", 16), "space_after": h3.get("spaceAfterPt", 4), }, "heading4": { "font_size": h4["sizePt"], "color": h4["color"], - "bold": h4.get("weight") == "bold", "align": "left", + "bold": h4.get("weight") == "bold", "align": h4.get("align", "left"), "space_before": h4.get("spaceBeforePt", 12), "space_after": h4.get("spaceAfterPt", 3), }, "paragraph": { "font_size": para["sizePt"], "color": para["color"], - "bold": False, "align": "left", - "line_height": para.get("lineSpacing", 1.15), + "bold": False, "align": para.get("align", "left"), + "line_height": para.get("lineSpacing", 1.5), }, "table_header": { "background": tbl["headerBg"], "text_color": tbl["headerFg"], + "font_size": tbl.get("headerSizePt", 10), "bold": True, "align": "center", }, "table_cell": { "background": tbl["rowBandingOdd"], "text_color": para["color"], + "font_size": tbl.get("bodySizePt", 10), "bold": False, "align": "left", }, "table_border": { - "style": "grid", "color": tbl["borderColor"], + "style": tbl.get("borderStyle", "grid"), + "color": tbl["borderColor"], + "width": tbl.get("borderWidthPt", 0.5), }, + "table_banding": { + "enabled": tbl.get("bandingEnabled", True), + "even": tbl.get("rowBandingEven", "#f6f8fa"), + "odd": tbl.get("rowBandingOdd", "#FFFFFF"), + }, + "table_padding": tbl.get("cellPaddingPt", 4), "bullet_list": { "font_size": lst["sizePt"], "color": para["color"], "indent": lst["indentPt"], "bullet_char": lst.get("bulletChar", "\u2022"), }, "code_block": { - "font": style["fonts"]["monospace"], + "font": fonts.get("monospace", "Consolas"), "font_size": cb["fontSizePt"], "color": para["color"], "background": cb["background"], + "border_color": cb.get("borderColor", "#e1e4e8"), + }, + "fonts": { + "primary": fonts.get("primary", "Calibri"), + "monospace": fonts.get("monospace", "Consolas"), + }, + "colors": { + "primary": primaryColor, + "secondary": secondaryColor, + "accent": accentColor, + "background": bgColor, + }, + "image": { + "default_width": img.get("defaultWidthPt", 480), + "max_width": img.get("maxWidthPt", 800), + "alignment": img.get("alignment", "center"), + }, + "page": { + "format": page.get("format", "A4"), + "margins": page.get("marginsPt", {"top": 60, "bottom": 60, "left": 60, "right": 60}), + "show_page_numbers": page.get("showPageNumbers", True), + "header_height": page.get("headerHeight", 30), + "footer_height": page.get("footerHeight", 30), + "header_logo": page.get("headerLogo"), + "header_text": page.get("headerText", ""), + "footer_text": page.get("footerText", ""), + }, + "cover_page": { + "title_size": cover.get("titleSizePt", 28), + "subtitle_size": cover.get("subtitleSizePt", 16), + "author_size": cover.get("authorSizePt", 12), + "date_size": cover.get("dateSizePt", 12), + "title_color": cover.get("titleColor", primaryColor), + "subtitle_color": cover.get("subtitleColor", secondaryColor), + }, + "caption": { + "font_size": caption.get("sizePt", 10), + "color": caption.get("color", secondaryColor), + "italic": caption.get("italic", True), + "align": caption.get("align", "center"), }, } + @staticmethod + def _looksNumeric(values: list) -> bool: + """Return True if most non-empty values in the column look numeric.""" + numCount = 0 + total = 0 + for v in values: + text = str(v).strip() if v is not None else "" + if not text: + continue + total += 1 + cleaned = text.replace(",", "").replace("'", "").replace(" ", "") + cleaned = cleaned.lstrip("$€£CHF").rstrip("%") + try: + float(cleaned) + numCount += 1 + except ValueError: + pass + return total > 0 and numCount / total >= 0.6 + + @staticmethod + def _looksDate(values: list) -> bool: + """Return True if most non-empty values look like dates.""" + dateCount = 0 + total = 0 + datePattern = re.compile(r"^\d{1,4}[./-]\d{1,2}[./-]\d{1,4}$") + for v in values: + text = str(v).strip() if v is not None else "" + if not text: + continue + total += 1 + if datePattern.match(text): + dateCount += 1 + return total > 0 and dateCount / total >= 0.6 + + def _inferColumnAlignments(self, headers: list, rows: list, tableStyle: dict | None = None) -> list: + """Infer per-column alignments from explicit tableStyle or data heuristic. + + Returns a list of ``"left"``/``"center"``/``"right"`` strings, one per column. + """ + colCount = len(headers) if headers else (len(rows[0]) if rows else 0) + if not colCount: + return [] + if tableStyle and tableStyle.get("columnAlignments"): + explicit = tableStyle["columnAlignments"] + if isinstance(explicit, list) and len(explicit) >= colCount: + return [a if a in ("left", "center", "right") else "left" for a in explicit[:colCount]] + alignments = [] + for colIdx in range(colCount): + colValues = [] + for row in rows: + if colIdx < len(row): + cell = row[colIdx] + if isinstance(cell, list): + cell = "".join(r.get("value", "") for r in cell if isinstance(r, dict)) + colValues.append(cell) + if self._looksNumeric(colValues): + alignments.append("right") + elif self._looksDate(colValues): + alignments.append("center") + else: + alignments.append("left") + return alignments + @staticmethod def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any: """Extract inline runs from new-format content, falling back to old format. diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py index 37d191e1..9d7b24ff 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py @@ -10,13 +10,11 @@ from typing import Dict, Any, List, Optional import io import base64 import re -import csv try: from docx import Document from docx.shared import Inches, Pt, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH - from docx.enum.table import WD_TABLE_ALIGNMENT DOCX_AVAILABLE = True except ImportError: DOCX_AVAILABLE = False @@ -122,18 +120,12 @@ class RendererDocx(BaseRenderer): doc = Document() self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s") - # Phase 3: prefer unified style when provided + # Apply unified style (always provided by mainServiceGeneration.renderReport) style_start = time.time() self.logger.debug("_generateDocxFromJson: About to get style set") - if unifiedStyle: - styleSet = self._convertUnifiedStyleToInternal(unifiedStyle) - self._unifiedStyle = unifiedStyle - else: - template_from_metadata = None - if json_content and isinstance(json_content.get("metadata"), dict): - template_from_metadata = json_content["metadata"].get("templateName") - styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata) - self._unifiedStyle = None + styleSet = self._convertUnifiedStyleToInternal(unifiedStyle) + self._unifiedStyle = unifiedStyle + self._styleSet = styleSet self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s") # Setup basic document styles and create all styles from style set @@ -201,108 +193,6 @@ class RendererDocx(BaseRenderer): self.logger.error(f"Error generating DOCX from JSON: {str(e)}") raise Exception(f"DOCX generation failed: {str(e)}") - async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - use styles from document generation metadata if available, - otherwise enhance default styles with AI if userPrompt provided. - - WICHTIG: In a dynamic scalable AI system, styling should come from document generation, - not be generated separately by renderers. Only fall back to AI if styles not provided. - - Args: - extractedContent: Document content with metadata (may contain styles) - userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if styles not in metadata and userPrompt provided) - templateName: Name of template style set (None = default) - - Returns: - Dict with style definitions for all document styles - """ - # Get default style set - if templateName == "corporate": - defaultStyleSet = self._getCorporateStyleSet() - elif templateName == "minimal": - defaultStyleSet = self._getMinimalStyleSet() - else: - defaultStyleSet = self._getDefaultStyleSet() - - # FIRST: Check if styles are provided in document generation metadata (preferred approach) - if extractedContent: - metadata = extractedContent.get("metadata", {}) - if isinstance(metadata, dict): - styles = metadata.get("styles") - if styles and isinstance(styles, dict): - self.logger.debug("Using styles from document generation metadata") - return self._validateStylesContrast(styles) - - # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) - if userPrompt and aiService: - self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") - enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - return self._validateStylesContrast(enhancedStyleSet) - else: - # Use default styles only - return defaultStyleSet - - async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]: - """Enhance default styles with AI based on user prompt.""" - try: - style_template = self._createAiStyleTemplate("docx", userPrompt, defaultStyleSet) - enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet) - return enhanced_styles - except Exception as e: - self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles") - return defaultStyleSet - - def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Validate and fix contrast issues in AI-generated styles.""" - try: - # Fix table header contrast - if "table_header" in styles: - header = styles["table_header"] - bg_color = header.get("background", "#FFFFFF") - text_color = header.get("text_color", "#000000") - - # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": - header["background"] = "#4F4F4F" - header["text_color"] = "#FFFFFF" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": - header["background"] = "#4F4F4F" - header["text_color"] = "#FFFFFF" - - # Fix table cell contrast - if "table_cell" in styles: - cell = styles["table_cell"] - bg_color = cell.get("background", "#FFFFFF") - text_color = cell.get("text_color", "#000000") - - # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": - cell["background"] = "#FFFFFF" - cell["text_color"] = "#2F2F2F" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": - cell["background"] = "#FFFFFF" - cell["text_color"] = "#2F2F2F" - - return styles - - except Exception as e: - self.logger.warning(f"Style validation failed: {str(e)}") - return self._getDefaultStyleSet() - - def _getDefaultStyleSet(self) -> Dict[str, Any]: - """Default DOCX style set - used when no style instructions present.""" - return { - "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "left"}, - "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"}, - "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"}, - "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"}, - "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"}, - "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"}, - "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"}, - "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20}, - "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} - } def _setupBasicDocumentStyles(self, doc: Document) -> None: """Set up basic document styles.""" @@ -318,21 +208,6 @@ class RendererDocx(BaseRenderer): - def _clearTemplateContent(self, doc: Document) -> None: - """Clear template content while preserving styles.""" - try: - # Remove all paragraphs except keep the styles - for paragraph in list(doc.paragraphs): - # Keep the paragraph but clear its content - paragraph.clear() - - # Remove all tables - for table in list(doc.tables): - table._element.getparent().remove(table._element) - - except Exception as e: - self.logger.warning(f"Could not clear template content: {str(e)}") - def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a single JSON section to DOCX using AI-generated styles. Supports three content formats: reference, object (base64), extracted_text. @@ -434,8 +309,8 @@ class RendererDocx(BaseRenderer): def _addMarkdownInlineRuns(self, paragraph, text: str) -> None: """Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph.""" pos = 0 - us = getattr(self, '_unifiedStyle', None) - monoFont = us["fonts"]["monospace"] if us else "Courier New" + ss = getattr(self, '_styleSet', None) + monoFont = ss["fonts"]["monospace"] if ss else "Consolas" for m in self._MD_INLINE_RE.finditer(text): if m.start() > pos: paragraph.add_run(text[pos:m.start()]) @@ -450,15 +325,17 @@ class RendererDocx(BaseRenderer): elif m.group(7): run = paragraph.add_run(m.group(7)) run.font.name = monoFont - run.font.size = Pt(9) + us = getattr(self, '_unifiedStyle', None) + codeSizePt = us["codeBlock"]["fontSizePt"] if us else 9 + run.font.size = Pt(codeSizePt) pos = m.end() if pos < len(text): paragraph.add_run(text[pos:]) def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None: """Process a list of InlineRun dicts into python-docx Runs on a paragraph.""" - us = getattr(self, '_unifiedStyle', None) - monoFont = us["fonts"]["monospace"] if us else "Courier New" + ss = getattr(self, '_styleSet', None) + monoFont = ss["fonts"]["monospace"] if ss else "Consolas" for run in runs: runType = run.get("type", "text") value = run.get("value", "") @@ -471,11 +348,13 @@ class RendererDocx(BaseRenderer): elif runType == "code": r = paragraph.add_run(value) r.font.name = monoFont - r.font.size = Pt(9) + codeSizePt = ss["code_block"]["font_size"] if ss else 9 + r.font.size = Pt(codeSizePt) elif runType == "link": r = paragraph.add_run(value) r.font.underline = True - r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9) + accentHex = ss["colors"]["accent"].lstrip('#') if ss else "0366d6" + r.font.color.rgb = RGBColor(int(accentHex[0:2], 16), int(accentHex[2:4], 16), int(accentHex[4:6], 16)) elif runType == "image": b64 = run.get("base64Data", "") if b64: @@ -505,31 +384,38 @@ class RendererDocx(BaseRenderer): By building the XML directly, we achieve 100-1000x faster performance. """ import time - table_start = time.time() + from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge + + tableStart = time.time() try: - # Extract from nested content structure content = table_data.get("content", {}) if not isinstance(content, dict): return headers = content.get("headers", []) rows = content.get("rows", []) - + if not headers or not rows: return - + + # Per-table style override: merge element-level tableStyle with global styles + perTableStyle = content.get("tableStyle", {}) + if perTableStyle and isinstance(perTableStyle, dict): + mergedStyles = deepMerge(styles, perTableStyle) + else: + mergedStyles = styles + totalRows = len(rows) totalCols = len(headers) totalCells = totalRows * totalCols - + self.logger.debug(f"_renderJsonTable: Starting FAST table render - {totalRows} rows x {totalCols} columns = {totalCells} cells") - - # Use fast XML-based table rendering - self._renderTableFastXml(doc, headers, rows, styles) - - total_time = time.time() - table_start - rate = totalCells / total_time if total_time > 0 else 0 - self.logger.info(f"_renderJsonTable: Table completed in {total_time:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s") - + + self._renderTableFastXml(doc, headers, rows, mergedStyles) + + totalTime = time.time() - tableStart + rate = totalCells / totalTime if totalTime > 0 else 0 + self.logger.info(f"_renderJsonTable: Table completed in {totalTime:.2f}s ({totalRows} rows x {totalCols} cols = {totalCells} cells) - Rate: {rate:.0f} cells/s") + except Exception as e: self.logger.error(f"Error rendering table: {str(e)}", exc_info=True) @@ -544,60 +430,58 @@ class RendererDocx(BaseRenderer): from docx.oxml.shared import OxmlElement, qn from docx.oxml.ns import nsmap from lxml import etree - - create_start = time.time() - - # Get the document body element + + createStart = time.time() + body = doc._body._body - - # Create table element + tbl = OxmlElement('w:tbl') - - # Add table properties + tblPr = OxmlElement('w:tblPr') - - # Table width - auto + tblW = OxmlElement('w:tblW') tblW.set(qn('w:type'), 'auto') tblW.set(qn('w:w'), '0') tblPr.append(tblW) - + jc = OxmlElement('w:jc') jc.set(qn('w:val'), 'left') tblPr.append(jc) - - # Apply table borders directly (works without template styles) + + # Border style wiring: grid / horizontal / none borderStyle = styles.get("table_border", {}).get("style", "grid") - tblBorders = self._createTableBordersXml(borderStyle) + tblBorders = self._createTableBordersXml(borderStyle, styles) tblPr.append(tblBorders) - - # Table cell margins for better readability + + # Table cell margins from style + paddingTwips = str(int(styles.get("table_padding", 4) * 20)) tblCellMar = OxmlElement('w:tblCellMar') for side in ['top', 'left', 'bottom', 'right']: margin = OxmlElement(f'w:{side}') - margin.set(qn('w:w'), '80') # 80 twips = ~4pt padding + margin.set(qn('w:w'), paddingTwips) margin.set(qn('w:type'), 'dxa') tblCellMar.append(margin) tblPr.append(tblCellMar) - + tbl.append(tblPr) - - # Create table grid (column definitions) + tblGrid = OxmlElement('w:tblGrid') for _ in range(len(headers)): gridCol = OxmlElement('w:gridCol') tblGrid.append(gridCol) tbl.append(tblGrid) - - self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - create_start:.3f}s") - - # Build all rows using fast XML - rows_start = time.time() - + + self.logger.debug(f"_renderTableFastXml: Table structure created in {time.time() - createStart:.3f}s") + + # Infer column alignments + colAlignments = self._inferColumnAlignments(headers, rows, styles) + + rowsStart = time.time() + # Resolve header style colors - tableStyle = styles.get("table_header", {}) - headerBg = tableStyle.get("background", "") - headerFg = tableStyle.get("text_color", "") + tableHeaderStyle = styles.get("table_header", {}) + headerBg = tableHeaderStyle.get("background", "") + headerFg = tableHeaderStyle.get("text_color", "") # Flatten inline-run headers to plain strings for fast XML path flatHeaders = [] @@ -605,15 +489,21 @@ class RendererDocx(BaseRenderer): runs = self._inlineRunsForCell(h) flatHeaders.append("".join(r.get("value", "") for r in runs)) - headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None) + headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None, colAlignments=colAlignments) tbl.append(headerRow) - header_time = time.time() - rows_start - self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s") + headerTime = time.time() - rowsStart + self.logger.debug(f"_renderTableFastXml: Header row created in {headerTime:.3f}s") - data_start = time.time() + dataStart = time.time() rowCount = len(rows) + # Banding toggle + bandingDef = styles.get("table_banding", {}) + bandingEnabled = bandingDef.get("enabled", True) + bandEven = bandingDef.get("even", "").lstrip('#') if bandingEnabled else None + bandOdd = bandingDef.get("odd", "").lstrip('#') if bandingEnabled else None + for idx, rowData in enumerate(rows): cellTexts = [] for cell in rowData: @@ -622,49 +512,54 @@ class RendererDocx(BaseRenderer): while len(cellTexts) < len(flatHeaders): cellTexts.append('') - row = self._createTableRowXml(cellTexts, isHeader=False) + rowBg = (bandEven if idx % 2 == 0 else bandOdd) if bandingEnabled else None + row = self._createTableRowXml(cellTexts, isHeader=False, rowBgHex=rowBg, colAlignments=colAlignments) tbl.append(row) - - # Log progress every 10% + if rowCount > 100 and (idx + 1) % (rowCount // 10) == 0: - elapsed = time.time() - data_start + elapsed = time.time() - dataStart rate = (idx + 1) * len(headers) / elapsed if elapsed > 0 else 0 self.logger.debug(f"_renderTableFastXml: Progress {((idx + 1) / rowCount * 100):.0f}% ({idx + 1}/{rowCount} rows) - Rate: {rate:.0f} cells/s") - - data_time = time.time() - data_start - - # Append table to document body - body.append(tbl) - - # Add an empty paragraph after the table to prevent Word from merging consecutive tables + + dataTime = time.time() - dataStart + + sectPr = body.find(qn('w:sectPr')) + if sectPr is not None: + sectPr.addprevious(tbl) + else: + body.append(tbl) + separatorParagraph = OxmlElement('w:p') - body.append(separatorParagraph) - - total_time = time.time() - create_start + if sectPr is not None: + sectPr.addprevious(separatorParagraph) + else: + body.append(separatorParagraph) + + totalTime = time.time() - createStart totalCells = (rowCount + 1) * len(headers) - rate = totalCells / total_time if total_time > 0 else 0 - - self.logger.debug(f"_renderTableFastXml: All rows created in {data_time:.2f}s, total: {total_time:.2f}s, rate: {rate:.0f} cells/s") + rate = totalCells / totalTime if totalTime > 0 else 0 + + self.logger.debug(f"_renderTableFastXml: All rows created in {dataTime:.2f}s, total: {totalTime:.2f}s, rate: {rate:.0f} cells/s") - def _createTableBordersXml(self, borderStyle: str) -> Any: + def _createTableBordersXml(self, borderStyle: str, styles: Dict[str, Any] = None) -> Any: """ Create table borders XML element based on style. Supports: - 'grid': Full grid with all borders (default) - - 'horizontal_only': Only horizontal lines between rows - - 'none' or other: Minimal/no borders + - 'horizontal': Only horizontal lines between rows + - 'none': No borders at all """ from docx.oxml.shared import OxmlElement, qn - + tblBorders = OxmlElement('w:tblBorders') - - # Border color - dark gray for professional look - borderColor = '404040' - borderSize = '4' # 0.5pt (in eighths of a point) - + + borderDef = (styles or {}).get("table_border", {}) + borderColor = borderDef.get("color", "#e1e4e8").lstrip('#') + widthPt = borderDef.get("width", 0.5) + borderSize = str(int(widthPt * 8)) + if borderStyle == "grid": - # Full grid - all borders for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']: border = OxmlElement(f'w:{borderName}') border.set(qn('w:val'), 'single') @@ -672,9 +567,8 @@ class RendererDocx(BaseRenderer): border.set(qn('w:space'), '0') border.set(qn('w:color'), borderColor) tblBorders.append(border) - - elif borderStyle == "horizontal_only": - # Only horizontal lines + + elif borderStyle in ("horizontal", "horizontal_only"): for borderName in ['top', 'bottom', 'insideH']: border = OxmlElement(f'w:{borderName}') border.set(qn('w:val'), 'single') @@ -682,46 +576,52 @@ class RendererDocx(BaseRenderer): border.set(qn('w:space'), '0') border.set(qn('w:color'), borderColor) tblBorders.append(border) - # No vertical borders for borderName in ['left', 'right', 'insideV']: border = OxmlElement(f'w:{borderName}') border.set(qn('w:val'), 'nil') tblBorders.append(border) + + elif borderStyle == "none": + for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']: + border = OxmlElement(f'w:{borderName}') + border.set(qn('w:val'), 'nil') + tblBorders.append(border) + else: - # Minimal - just outer border - for borderName in ['top', 'left', 'bottom', 'right']: + for borderName in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']: border = OxmlElement(f'w:{borderName}') border.set(qn('w:val'), 'single') border.set(qn('w:sz'), borderSize) border.set(qn('w:space'), '0') border.set(qn('w:color'), borderColor) tblBorders.append(border) - + return tblBorders - def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any: + def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None, rowBgHex: str = None, colAlignments: list = None) -> Any: """Create a table row XML element with cells. Fast-path: builds row XML directly via lxml.""" from docx.oxml.shared import OxmlElement, qn + ss = getattr(self, '_styleSet', None) if headerBgHex is None: - us = getattr(self, '_unifiedStyle', None) - headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864' + headerBgHex = ss["table_header"]["background"].lstrip('#') if ss else '1F3864' else: headerBgHex = headerBgHex.lstrip('#') if headerFgHex is None: - us = getattr(self, '_unifiedStyle', None) - headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF' + headerFgHex = ss["table_header"]["text_color"].lstrip('#') if ss else 'FFFFFF' else: headerFgHex = headerFgHex.lstrip('#') + _ALIGN_MAP = {"left": "left", "center": "center", "right": "right"} + tr = OxmlElement('w:tr') if isHeader: trPr = OxmlElement('w:trPr') trPr.append(OxmlElement('w:tblHeader')) tr.append(trPr) - for cellText in cells: + for colIdx, cellText in enumerate(cells): tc = OxmlElement('w:tc') tcPr = OxmlElement('w:tcPr') tcW = OxmlElement('w:tcW') @@ -735,9 +635,26 @@ class RendererDocx(BaseRenderer): shd.set(qn('w:color'), 'auto') shd.set(qn('w:fill'), headerBgHex) tcPr.append(shd) + elif rowBgHex: + shd = OxmlElement('w:shd') + shd.set(qn('w:val'), 'clear') + shd.set(qn('w:color'), 'auto') + shd.set(qn('w:fill'), rowBgHex) + tcPr.append(shd) tc.append(tcPr) p = OxmlElement('w:p') + + # Apply per-column alignment + if colAlignments and colIdx < len(colAlignments): + alignment = _ALIGN_MAP.get(colAlignments[colIdx]) + if alignment: + pPr = OxmlElement('w:pPr') + pJc = OxmlElement('w:jc') + pJc.set(qn('w:val'), alignment) + pPr.append(pJc) + p.append(pPr) + r = OxmlElement('w:r') if isHeader: @@ -759,148 +676,42 @@ class RendererDocx(BaseRenderer): return tr - def _applyHorizontalBordersOnly(self, table) -> None: - """Apply only horizontal borders to the table (no vertical borders).""" - try: - from docx.oxml.shared import OxmlElement, qn - - # Get table properties - tbl_pr = table._element.find(qn('w:tblPr')) - if tbl_pr is None: - tbl_pr = OxmlElement('w:tblPr') - table._element.insert(0, tbl_pr) - - # Remove existing borders - existing_borders = tbl_pr.find(qn('w:tblBorders')) - if existing_borders is not None: - tbl_pr.remove(existing_borders) - - # Create new borders element - tbl_borders = OxmlElement('w:tblBorders') - - # Top border - top_border = OxmlElement('w:top') - top_border.set(qn('w:val'), 'single') - top_border.set(qn('w:sz'), '4') - top_border.set(qn('w:space'), '0') - top_border.set(qn('w:color'), '000000') - tbl_borders.append(top_border) - - # Bottom border - bottom_border = OxmlElement('w:bottom') - bottom_border.set(qn('w:val'), 'single') - bottom_border.set(qn('w:sz'), '4') - bottom_border.set(qn('w:space'), '0') - bottom_border.set(qn('w:color'), '000000') - tbl_borders.append(bottom_border) - - # Left border - none - left_border = OxmlElement('w:left') - left_border.set(qn('w:val'), 'none') - tbl_borders.append(left_border) - - # Right border - none - right_border = OxmlElement('w:right') - right_border.set(qn('w:val'), 'none') - tbl_borders.append(right_border) - - # Inside horizontal border - inside_h_border = OxmlElement('w:insideH') - inside_h_border.set(qn('w:val'), 'single') - inside_h_border.set(qn('w:sz'), '4') - inside_h_border.set(qn('w:space'), '0') - inside_h_border.set(qn('w:color'), '000000') - tbl_borders.append(inside_h_border) - - # Inside vertical border - none - inside_v_border = OxmlElement('w:insideV') - inside_v_border.set(qn('w:val'), 'none') - tbl_borders.append(inside_v_border) - - tbl_pr.append(tbl_borders) - - except Exception as e: - self.logger.warning(f"Could not apply horizontal borders: {str(e)}") - - def _setCellBackground(self, cell, color: RGBColor) -> None: - """Set the background color of a table cell.""" - try: - from docx.oxml.shared import OxmlElement, qn - - # Get cell properties - tc_pr = cell._element.find(qn('w:tcPr')) - if tc_pr is None: - tc_pr = OxmlElement('w:tcPr') - cell._element.insert(0, tc_pr) - - # Remove existing shading - existing_shading = tc_pr.find(qn('w:shd')) - if existing_shading is not None: - tc_pr.remove(existing_shading) - - # Create new shading element - shading = OxmlElement('w:shd') - shading.set(qn('w:val'), 'clear') - shading.set(qn('w:color'), 'auto') - # Convert RGBColor to hex string by unpacking RGB components - red, green, blue = color - hex_color = f"{red:02x}{green:02x}{blue:02x}" - shading.set(qn('w:fill'), hex_color) - tc_pr.append(shading) - - except Exception as e: - self.logger.warning(f"Could not set cell background: {str(e)}") - - def _setCellBackgroundFast(self, cell, hex_color: str) -> None: - """ - Set the background color of a table cell using pre-calculated hex string. - PERFORMANCE OPTIMIZED: Avoids RGBColor unpacking and string formatting in hot loop. - """ - try: - from docx.oxml.shared import OxmlElement, qn - - # Get cell properties - tc_pr = cell._element.find(qn('w:tcPr')) - if tc_pr is None: - tc_pr = OxmlElement('w:tcPr') - cell._element.insert(0, tc_pr) - - # Remove existing shading - existing_shading = tc_pr.find(qn('w:shd')) - if existing_shading is not None: - tc_pr.remove(existing_shading) - - # Create new shading element with pre-calculated hex color - shading = OxmlElement('w:shd') - shading.set(qn('w:val'), 'clear') - shading.set(qn('w:color'), 'auto') - shading.set(qn('w:fill'), hex_color) - tc_pr.append(shading) - - except Exception as e: - self.logger.warning(f"Could not set cell background: {str(e)}") - def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance.""" try: + from docx.oxml.shared import OxmlElement, qn + content = list_data.get("content", {}) if not isinstance(content, dict): return items = content.get("items", []) - bullet_style = styles.get("bullet_list", {}) + bulletStyle = styles.get("bullet_list", {}) - font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None - text_color_rgb = None - if bullet_style.get("color"): - color_hex = bullet_style["color"].lstrip('#') - text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + fontSizePt = Pt(bulletStyle["font_size"]) if bulletStyle.get("font_size") else None + textColorRgb = None + if bulletStyle.get("color"): + colorHex = bulletStyle["color"].lstrip('#') + textColorRgb = RGBColor(int(colorHex[0:2], 16), int(colorHex[2:4], 16), int(colorHex[4:6], 16)) + + indentTwips = int(bulletStyle.get("indent", 18) * 20) + bulletChar = bulletStyle.get("bullet_char", "\u2022") for item in items: itemRuns = self._inlineRunsForListItem(item) if not itemRuns or not any(r.get("value") for r in itemRuns): continue para = doc.add_paragraph(style='List Bullet') + + # Apply indent from style + pPr = para._p.get_or_add_pPr() + ind = pPr.find(qn('w:ind')) + if ind is None: + ind = OxmlElement('w:ind') + pPr.append(ind) + ind.set(qn('w:left'), str(indentTwips)) + ind.set(qn('w:hanging'), '360') + isNewRunFormat = isinstance(item, list) if isNewRunFormat: self._renderInlineRuns(itemRuns, para, styles) @@ -908,12 +719,12 @@ class RendererDocx(BaseRenderer): itemText = "".join(r.get("value", "") for r in itemRuns) self._addMarkdownInlineRuns(para, itemText) - if bullet_style and para.runs and len(para.runs) > 0: - run = para.runs[0] - if font_size_pt: - run.font.size = font_size_pt - if text_color_rgb: - run.font.color.rgb = text_color_rgb + # Apply font size and color to all runs + for run in para.runs: + if fontSizePt: + run.font.size = fontSizePt + if textColorRgb: + run.font.color.rgb = textColorRgb except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") @@ -1000,43 +811,62 @@ class RendererDocx(BaseRenderer): def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON code block to DOCX using AI-generated styles.""" try: + from docx.oxml.shared import OxmlElement, qn + content = code_data.get("content", {}) if not isinstance(content, dict): return code = content.get("code", "") language = content.get("language", "") - code_style = styles.get("code_block", {}) - us = getattr(self, '_unifiedStyle', None) + codeStyle = styles.get("code_block", {}) if code: if language: - lang_para = doc.add_paragraph(f"Code ({language}):") - if len(lang_para.runs) > 0: - lang_para.runs[0].bold = True + langPara = doc.add_paragraph(f"Code ({language}):") + if len(langPara.runs) > 0: + langPara.runs[0].bold = True - code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New") - code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9)) - code_text_color_rgb = None - if "color" in code_style: - color_hex = code_style["color"].lstrip('#') - code_text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) - - code_para = doc.add_paragraph(code) - # Use direct access instead of iterating - if len(code_para.runs) > 0: - run = code_para.runs[0] - run.font.name = code_font_name - run.font.size = code_font_size_pt - if code_text_color_rgb: - run.font.color.rgb = code_text_color_rgb + codeFontName = codeStyle.get("font", styles["fonts"]["monospace"]) + codeFontSizePt = Pt(codeStyle.get("font_size", 9)) + codeTextColorRgb = None + if "color" in codeStyle: + colorHex = codeStyle["color"].lstrip('#') + codeTextColorRgb = RGBColor(int(colorHex[0:2], 16), int(colorHex[2:4], 16), int(colorHex[4:6], 16)) + + codePara = doc.add_paragraph(code) + + # Apply background shading via XML + bgHex = codeStyle.get("background", "").lstrip('#') + borderColorHex = codeStyle.get("border_color", "").lstrip('#') + if bgHex or borderColorHex: + pPr = codePara._p.get_or_add_pPr() + if bgHex: + shd = OxmlElement('w:shd') + shd.set(qn('w:val'), 'clear') + shd.set(qn('w:color'), 'auto') + shd.set(qn('w:fill'), bgHex) + pPr.append(shd) + if borderColorHex: + pBdr = OxmlElement('w:pBdr') + for side in ['top', 'left', 'bottom', 'right']: + bdr = OxmlElement(f'w:{side}') + bdr.set(qn('w:val'), 'single') + bdr.set(qn('w:sz'), '4') + bdr.set(qn('w:space'), '4') + bdr.set(qn('w:color'), borderColorHex) + pBdr.append(bdr) + pPr.append(pBdr) + + # Apply font styling to run + if len(codePara.runs) > 0: + run = codePara.runs[0] else: - # Create run if none exists - run = code_para.add_run() - run.font.name = code_font_name - run.font.size = code_font_size_pt - if code_text_color_rgb: - run.font.color.rgb = code_text_color_rgb - + run = codePara.add_run() + run.font.name = codeFontName + run.font.size = codeFontSizePt + if codeTextColorRgb: + run.font.color.rgb = codeTextColorRgb + except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") @@ -1065,6 +895,15 @@ class RendererDocx(BaseRenderer): content = element.get("content", element) if isinstance(element, dict) else {} if not isinstance(content, dict): content = {} + + coverStyle = styles.get("cover_page", {}) + titleSizePt = coverStyle.get("title_size", 28) + subtitleSizePt = coverStyle.get("subtitle_size", 16) + authorSizePt = coverStyle.get("author_size", 12) + dateSizePt = coverStyle.get("date_size", 12) + titleColor = coverStyle.get("title_color", "") + subtitleColor = coverStyle.get("subtitle_color", "") + for _ in range(6): doc.add_paragraph("") logoStream = self._imageStreamFromContent(content) @@ -1078,14 +917,21 @@ class RendererDocx(BaseRenderer): p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(title) run.bold = True - run.font.size = Pt(28) - for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)): + run.font.size = Pt(titleSizePt) + if titleColor: + cHex = titleColor.lstrip('#') + run.font.color.rgb = RGBColor(int(cHex[0:2], 16), int(cHex[2:4], 16), int(cHex[4:6], 16)) + for key, sizePt, color in (("subtitle", subtitleSizePt, subtitleColor), ("author", authorSizePt, ""), ("date", dateSizePt, "")): val = (content.get(key) or "").strip() if not val: continue p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER - p.add_run(val).font.size = Pt(sizePt) + run = p.add_run(val) + run.font.size = Pt(sizePt) + if color: + cHex = color.lstrip('#') + run.font.color.rgb = RGBColor(int(cHex[0:2], 16), int(cHex[2:4], 16), int(cHex[4:6], 16)) doc.add_page_break() except Exception as e: self.logger.warning(f"Error rendering cover_page: {e}") @@ -1215,8 +1061,23 @@ class RendererDocx(BaseRenderer): caption_text = None if caption_text: - caption_para = doc.add_paragraph(caption_text) - caption_para.runs[0].italic = True + captionPara = doc.add_paragraph(caption_text) + captionStyle = styles.get("caption", {}) + if captionPara.runs: + captionRun = captionPara.runs[0] + captionRun.italic = captionStyle.get("italic", True) + if captionStyle.get("font_size"): + captionRun.font.size = Pt(captionStyle["font_size"]) + if captionStyle.get("color"): + cHex = captionStyle["color"].lstrip('#') + captionRun.font.color.rgb = RGBColor(int(cHex[0:2], 16), int(cHex[2:4], 16), int(cHex[4:6], 16)) + captionAlign = captionStyle.get("align", "center") + if captionAlign == "center": + captionPara.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif captionAlign == "right": + captionPara.alignment = WD_ALIGN_PARAGRAPH.RIGHT + else: + captionPara.alignment = WD_ALIGN_PARAGRAPH.LEFT except Exception as embedError: # Image decoding or embedding failed raise Exception(f"Failed to decode or embed image: {str(embedError)}") @@ -1228,152 +1089,6 @@ class RendererDocx(BaseRenderer): if errorPara.runs: errorPara.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error - def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]: - """Extract document structure from user prompt.""" - structure = { - 'title': title, - 'sections': [], - 'format': 'standard' - } - - if not userPrompt: - return structure - - # Extract title from prompt if not provided - if not title or title == "Generated Document": - # Look for "create a ... document" or "generate a ... report" - title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower()) - if title_match: - structure['title'] = title_match.group(1).strip().title() - - # Extract sections from numbered lists in prompt - section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)' - sections = re.findall(section_pattern, userPrompt) - - for num, section_text in sections: - structure['sections'].append({ - 'number': int(num), - 'title': section_text.strip(), - 'level': 2 # H2 level - }) - - # If no numbered sections found, try to extract from "including:" patterns - if not structure['sections']: - including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL) - if including_match: - including_text = including_match.group(1) - # Split by common separators - parts = re.split(r'[,;]\s*', including_text) - for i, part in enumerate(parts, 1): - part = part.strip() - if part: - structure['sections'].append({ - 'number': i, - 'title': part, - 'level': 2 - }) - - # If still no sections, extract from any list-like patterns - if not structure['sections']: - # Look for bullet points or dashes - bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)' - bullets = re.findall(bullet_pattern, userPrompt) - for i, bullet in enumerate(bullets, 1): - bullet = bullet.strip() - if bullet and len(bullet) > 3: - structure['sections'].append({ - 'number': i, - 'title': bullet, - 'level': 2 - }) - - # If still no sections, extract from sentence structure - if not structure['sections']: - # Split prompt into sentences and use as sections - sentences = re.split(r'[.!?]\s+', userPrompt) - for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections - sentence = sentence.strip() - if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')): - structure['sections'].append({ - 'number': i, - 'title': sentence[:50] + "..." if len(sentence) > 50 else sentence, - 'level': 2 - }) - - # Final fallback: create sections from prompt keywords - if not structure['sections']: - # Extract key action words from prompt - action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate'] - found_actions = [] - for action in action_words: - if action in userPrompt.lower(): - found_actions.append(action.title()) - - if found_actions: - for i, action in enumerate(found_actions[:3], 1): - structure['sections'].append({ - 'number': i, - 'title': f"{action} Document Content", - 'level': 2 - }) - else: - # Last resort: generic but meaningful sections - structure['sections'] = [ - {'number': 1, 'title': 'Document Analysis', 'level': 2}, - {'number': 2, 'title': 'Key Information', 'level': 2}, - {'number': 3, 'title': 'Summary and Conclusions', 'level': 2} - ] - - return structure - - def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]): - """Generate DOCX content based on extracted structure.""" - # Add sections based on prompt structure - for section in structure['sections']: - # Add section heading - doc.add_heading(f"{section['number']}) {section['title']}", level=section['level']) - - # Add AI-generated content for this section - # Try to extract relevant content for this section from the AI response - section_content = self._extractSectionContent(content, section['title']) - - if section_content: - doc.add_paragraph(section_content) - else: - # If no specific content found, add a note - doc.add_paragraph(f"Content for {section['title']} based on document analysis.") - - # Add some spacing - doc.add_paragraph() - - # Add the complete AI-generated content as additional analysis - if content and content.strip(): - doc.add_heading("Complete Analysis", level=1) - doc.add_paragraph(content) - - def _extractSectionContent(self, content: str, section_title: str) -> str: - """Extract relevant content for a specific section from AI response.""" - if not content or not section_title: - return "" - - # Look for content that matches the section title - section_keywords = section_title.lower().split() - - # Split content into paragraphs - paragraphs = content.split('\n\n') - - relevant_paragraphs = [] - for paragraph in paragraphs: - paragraph_lower = paragraph.lower() - # Check if paragraph contains keywords from section title - if any(keyword in paragraph_lower for keyword in section_keywords if len(keyword) > 3): - relevant_paragraphs.append(paragraph.strip()) - - if relevant_paragraphs: - return '\n\n'.join(relevant_paragraphs[:2]) # Max 2 paragraphs per section - - return "" - def _setupDocumentStyles(self, doc: Document, styleSet: Dict[str, Any]) -> None: """Create all styles in document from style set. @@ -1386,11 +1101,15 @@ class RendererDocx(BaseRenderer): if "title" in styleSet: self._createStyle(doc, "Title", styleSet["title"], WD_STYLE_TYPE.PARAGRAPH) - # Create Heading styles (Heading 1, Heading 2) + # Create Heading styles (Heading 1 through Heading 4) if "heading1" in styleSet: self._createStyle(doc, "Heading 1", styleSet["heading1"], WD_STYLE_TYPE.PARAGRAPH) if "heading2" in styleSet: self._createStyle(doc, "Heading 2", styleSet["heading2"], WD_STYLE_TYPE.PARAGRAPH) + if "heading3" in styleSet: + self._createStyle(doc, "Heading 3", styleSet["heading3"], WD_STYLE_TYPE.PARAGRAPH) + if "heading4" in styleSet: + self._createStyle(doc, "Heading 4", styleSet["heading4"], WD_STYLE_TYPE.PARAGRAPH) # Create Paragraph style if "paragraph" in styleSet: @@ -1426,6 +1145,10 @@ class RendererDocx(BaseRenderer): font.color.rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) if "font" in styleConfig: font.name = styleConfig["font"] + else: + us = getattr(self, '_unifiedStyle', None) + if us: + font.name = us["fonts"]["primary"] # Set paragraph formatting for alignment if "align" in styleConfig: @@ -1440,328 +1163,3 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not create style '{styleName}': {str(e)}") - - def _processSection(self, doc, lines: list): - """Process a section of content into DOCX elements.""" - for line in lines: - if not line.strip(): - continue - - # Check for tables (lines with |) - if '|' in line and not line.startswith('|'): - # This might be part of a table, process as table - table_data = self._extractTableData(lines) - if table_data: - self._addTable(doc, table_data) - return - - # Check for lists - if line.startswith('- ') or line.startswith('* '): - # This is a list item - doc.add_paragraph(line[2:], style='List Bullet') - elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')): - # This is a numbered list item - doc.add_paragraph(line[3:], style='List Number') - else: - # Regular paragraph - doc.add_paragraph(line) - - def _extractTableData(self, lines: list) -> list: - """Extract table data from lines.""" - table_data = [] - in_table = False - - for line in lines: - if '|' in line: - if not in_table: - in_table = True - # Split by | and clean up - cells = [cell.strip() for cell in line.split('|') if cell.strip()] - if cells: - table_data.append(cells) - elif in_table and not line.strip(): - # Empty line, might be end of table - break - - return table_data if len(table_data) > 1 else [] - - def _addTable(self, doc, table_data: list): - """Add a table to the document.""" - try: - if not table_data: - return - - # Create table - table = doc.add_table(rows=len(table_data), cols=len(table_data[0])) - table.alignment = WD_TABLE_ALIGNMENT.LEFT - - # Add data to table - for row_idx, row_data in enumerate(table_data): - for col_idx, cell_data in enumerate(row_data): - if col_idx < len(table.rows[row_idx].cells): - table.rows[row_idx].cells[col_idx].text = cell_data - - # Style the table - self._styleTable(table) - - # Add an empty paragraph after the table to prevent Word from merging consecutive tables - doc.add_paragraph() - - except Exception as e: - self.logger.warning(f"Could not add table: {str(e)}") - - def _styleTable(self, table): - """Apply styling to the table.""" - try: - # Style header row - if len(table.rows) > 0: - header_cells = table.rows[0].cells - for cell in header_cells: - for paragraph in cell.paragraphs: - for run in paragraph.runs: - run.bold = True - except Exception as e: - self.logger.warning(f"Could not style table: {str(e)}") - - def _processTableRow(self, doc, line: str): - """Process a table row and add it to the document.""" - if not line.strip(): - return - - # Split by pipe separator - parts = [part.strip() for part in line.split('|')] - - if len(parts) >= 2: - # This is a table row - create a table if it doesn't exist - if not hasattr(self, '_current_table') or self._current_table is None: - # Create new table - self._current_table = doc.add_table(rows=1, cols=len(parts)) - self._current_table.style = 'Table Grid' - - # Add header row - for i, part in enumerate(parts): - if i < len(self._current_table.rows[0].cells): - cell = self._current_table.rows[0].cells[i] - cell.text = part - # Make header bold - for paragraph in cell.paragraphs: - for run in paragraph.runs: - run.bold = True - else: - # Add data row to existing table - row = self._current_table.add_row() - for i, part in enumerate(parts): - if i < len(row.cells): - row.cells[i].text = part - else: - # Not a table row, treat as regular text - doc.add_paragraph(line) - - def _cleanAiContent(self, content: str) -> str: - """Clean AI-generated content by removing debug information and duplicates.""" - if not content: - return "" - - # Remove debug information - lines = content.split('\n') - clean_lines = [] - - for line in lines: - # Skip debug lines and separators - if (line.startswith('[Skipped ') or - line.startswith('=== DOCUMENT:') or - line.startswith('---') or - line.startswith('FILENAME:') or - line.strip() == '' or - line.strip() == '---'): - continue - clean_lines.append(line) - - # Join lines and remove duplicate content - clean_content = '\n'.join(clean_lines) - - # Remove duplicate sections by keeping only the first occurrence - sections = clean_content.split('\n\n') - seen_sections = set() - unique_sections = [] - - for section in sections: - section_key = section.strip()[:50] # Use first 50 chars as key - if section_key not in seen_sections and section.strip(): - seen_sections.add(section_key) - unique_sections.append(section) - - return '\n\n'.join(unique_sections) - - def _processTables(self, doc, content: str) -> str: - """ - Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. - Returns the content with tables replaced by placeholders. - """ - # csv is already imported at module level - - lines = content.split('\n') - processed_lines = [] - i = 0 - - while i < len(lines): - line = lines[i].strip() - - # Check if this line looks like a table (contains pipes or commas with multiple fields) - is_pipe_table = '|' in line and len(line.split('|')) >= 2 - is_csv_table = ',' in line and len(line.split(',')) >= 2 - - if is_pipe_table or is_csv_table: - # Collect consecutive table lines - table_lines = [] - j = i - - # Determine separator and collect lines - separator = '|' if is_pipe_table else ',' - while j < len(lines): - current_line = lines[j].strip() - if separator in current_line and len(current_line.split(separator)) >= 2: - table_lines.append(current_line) - j += 1 - else: - break - - if len(table_lines) >= 2: # At least header + 1 data row - # Create Word table - try: - if separator == '|': - # Process pipe-separated table - rows = [] - for table_line in table_lines: - # Split by pipe and clean up - cells = [cell.strip() for cell in table_line.split('|')] - rows.append(cells) - else: - # Process CSV table - csv_content = '\n'.join(table_lines) - csv_reader = csv.reader(io.StringIO(csv_content)) - rows = list(csv_reader) - - if rows and len(rows[0]) > 0: - # Create Word table - table = doc.add_table(rows=len(rows), cols=len(rows[0])) - table.style = 'Table Grid' - - # Populate table - for row_idx, row_data in enumerate(rows): - for col_idx, cell_data in enumerate(row_data): - if col_idx < len(table.rows[row_idx].cells): - table.rows[row_idx].cells[col_idx].text = cell_data.strip() - - # Make header row bold - if row_idx == 0: - for cell in table.rows[row_idx].cells: - for paragraph in cell.paragraphs: - for run in paragraph.runs: - run.bold = True - - # Add an empty paragraph after the table to prevent Word from merging consecutive tables - doc.add_paragraph() - - # Add placeholder to mark where table was inserted - processed_lines.append(f"[TABLE_INSERTED_{len(processed_lines)}]") - - # Skip the table lines - i = j - continue - except Exception as e: - # If table parsing fails, treat as regular text - pass - - processed_lines.append(line) - i += 1 - - return '\n'.join(processed_lines) - - def _parseAndFormatContent(self, doc, content: str, title: str): - """Parse AI-generated content in standardized format and apply proper DOCX formatting.""" - if not content: - return - - # Process tables and replace them with placeholders - content = self._processTables(doc, content) - - # Parse content line by line in exact sequence - lines = content.split('\n') - - for line in lines: - line = line.strip() - if not line: - # Empty line - add paragraph break - doc.add_paragraph() - continue - - # Skip table placeholders (already processed) - if line.startswith('[TABLE_INSERTED_'): - continue - - # Check if this is a Markdown heading (# ## ###) - if line.startswith('#'): - level = len(line) - len(line.lstrip('#')) - heading_text = line.lstrip('# ').strip() - doc.add_heading(heading_text, level=min(level, 3)) - - # Check if this is a numbered heading (1) Title, 2) Title, etc.) - elif re.match(r'^\d+\)\s+.+', line): - heading_text = re.sub(r'^\d+\)\s+', '', line) - doc.add_heading(heading_text, level=1) - - # Check if this is a Markdown list item - elif line.startswith('- ') or re.match(r'^\d+\.\s+', line): - bullet_text = re.sub(r'^[-•]\s+|\d+\.\s+', '', line) - self._add_bullet_point(doc, bullet_text) - - # Check if this is a code block - elif line.startswith('```'): - if not line.endswith('```'): - # Start of code block - collect until end - code_lines = [line] - continue - else: - # End of code block - if 'code_lines' in locals(): - code_lines.append(line) - code_text = '\n'.join(code_lines) - para = doc.add_paragraph() - run = para.add_run(code_text) - run.font.name = 'Courier New' - del code_lines - - # Regular paragraph - else: - self._addParagraphToDoc(doc, line) - - def _addParagraphToDoc(self, doc, text: str): - """Add a paragraph to the document with proper formatting.""" - if not text.strip(): - return - - # Check for Markdown formatting (**bold**, *italic*) - para = doc.add_paragraph() - - # Split by bold markers - parts = text.split('**') - for i, part in enumerate(parts): - if i % 2 == 0: - # Regular text - check for italic - italic_parts = part.split('*') - for j, italic_part in enumerate(italic_parts): - if j % 2 == 0: - # Regular text - if italic_part: - para.add_run(italic_part) - else: - # Italic text - if italic_part: - run = para.add_run(italic_part) - run.italic = True - else: - # Bold text - if part: - run = para.add_run(part) - run.bold = True \ No newline at end of file diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py index 33093b8e..16c1cdfd 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py @@ -115,8 +115,10 @@ class RendererHtml(BaseRenderer): styles = self._convertUnifiedStyleToInternal(style) self._unifiedStyle = style else: - styles = await self._getStyleSet(jsonContent, userPrompt, aiService) - self._unifiedStyle = None + from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle + style = resolveStyle() + styles = self._convertUnifiedStyleToInternal(style) + self._unifiedStyle = style # Validate JSON structure if not self._validateJsonStructure(jsonContent): @@ -174,107 +176,6 @@ class RendererHtml(BaseRenderer): self.logger.error(f"Error generating HTML from JSON: {str(e)}") raise Exception(f"HTML generation failed: {str(e)}") - async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - use styles from document generation metadata if available, - otherwise enhance default styles with AI if userPrompt provided. - - WICHTIG: In a dynamic scalable AI system, styling should come from document generation, - not be generated separately by renderers. Only fall back to AI if styles not provided. - - Args: - extractedContent: Document content with metadata (may contain styles) - userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if styles not in metadata and userPrompt provided) - templateName: Name of template style set (None = default) - - Returns: - Dict with style definitions for all document styles - """ - # Get default style set - defaultStyleSet = self._getDefaultStyleSet() - - # FIRST: Check if styles are provided in document generation metadata (preferred approach) - if extractedContent: - metadata = extractedContent.get("metadata", {}) - if isinstance(metadata, dict): - styles = metadata.get("styles") - if styles and isinstance(styles, dict): - self.logger.debug("Using styles from document generation metadata") - return self._validateStylesContrast(styles) - - # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) - if userPrompt and aiService: - self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") - enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - return self._validateStylesContrast(enhancedStyleSet) - else: - # Use default styles only - return defaultStyleSet - - async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]: - """Enhance default styles with AI based on user prompt.""" - try: - style_template = self._createAiStyleTemplate("html", userPrompt, defaultStyleSet) - enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet) - return enhanced_styles - except Exception as e: - self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles") - return defaultStyleSet - - def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Validate and fix contrast issues in AI-generated styles.""" - try: - # Fix table header contrast - if "table_header" in styles: - header = styles["table_header"] - bgColor = header.get("background", "#FFFFFF") - textColor = header.get("color", "#000000") - - # If both are white or both are dark, fix it - if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": - header["background"] = "#4F4F4F" - header["color"] = "#FFFFFF" - elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": - header["background"] = "#4F4F4F" - header["color"] = "#FFFFFF" - - # Fix table cell contrast - if "table_cell" in styles: - cell = styles["table_cell"] - bgColor = cell.get("background", "#FFFFFF") - textColor = cell.get("color", "#000000") - - # If both are white or both are dark, fix it - if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": - cell["background"] = "#FFFFFF" - cell["color"] = "#2F2F2F" - elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": - cell["background"] = "#FFFFFF" - cell["color"] = "#2F2F2F" - - return styles - - except Exception as e: - self.logger.warning(f"Style validation failed: {str(e)}") - return self._getDefaultStyleSet() - - def _getDefaultStyleSet(self) -> Dict[str, Any]: - """Default HTML style set - used when no style instructions present.""" - return { - "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}, - "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}, - "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}, - "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"}, - "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"}, - "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"}, - "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"}, - "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"}, - "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"}, - "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"}, - "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"} - } - - def _generateCssStyles(self, styles: Dict[str, Any]) -> str: """Generate CSS from style definitions.""" # When unified style is available, generate CSS directly from it @@ -440,7 +341,9 @@ class RendererHtml(BaseRenderer): css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;") css_parts.append(f" color: {h.get('color', primaryColor)};") css_parts.append(f" font-weight: {h.get('weight', 'bold')};") - css_parts.append(f" margin: 1.2em 0 0.4em 0;") + spBefore = h.get('spaceBeforePt', max(24 - (level - 1) * 4, 12)) + spAfter = h.get('spaceAfterPt', max(8 - (level - 1) * 2, 3)) + css_parts.append(f" margin: {spBefore}pt 0 {spAfter}pt 0;") css_parts.append("}") # Paragraphs @@ -453,11 +356,16 @@ class RendererHtml(BaseRenderer): # Tables borderColor = tbl.get("borderColor", "#DEE2E6") + borderStyle = tbl.get("borderStyle", "grid") css_parts.append("table {") - css_parts.append(f" border-collapse: collapse;") - css_parts.append(f" width: 100%;") - css_parts.append(f" margin: 1em 0;") - css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append(" border-collapse: collapse;") + css_parts.append(" width: 100%;") + css_parts.append(" margin: 1em 0;") + if borderStyle == "grid": + css_parts.append(f" border: 1px solid {borderColor};") + elif borderStyle == "horizontal": + css_parts.append(f" border-top: 1px solid {borderColor};") + css_parts.append(f" border-bottom: 1px solid {borderColor};") css_parts.append("}") # Table headers @@ -466,17 +374,30 @@ class RendererHtml(BaseRenderer): css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};") css_parts.append(" font-weight: bold;") css_parts.append(" text-align: center;") - css_parts.append(f" padding: 10px;") - css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append(" padding: 10px;") + if borderStyle == "grid": + css_parts.append(f" border: 1px solid {borderColor};") + elif borderStyle == "horizontal": + css_parts.append(f" border-bottom: 1px solid {borderColor};") css_parts.append("}") # Table cells css_parts.append("td {") css_parts.append(f" color: {paraColor};") css_parts.append(" padding: 8px;") - css_parts.append(f" border: 1px solid {borderColor};") + if borderStyle == "grid": + css_parts.append(f" border: 1px solid {borderColor};") + elif borderStyle == "horizontal": + css_parts.append(f" border-bottom: 1px solid {borderColor};") css_parts.append("}") + # Row banding + if tbl.get("bandingEnabled", True): + evenBg = tbl.get("rowBandingEven", "#f6f8fa") + oddBg = tbl.get("rowBandingOdd", "#FFFFFF") + css_parts.append(f"tbody tr:nth-child(even) {{ background: {evenBg}; }}") + css_parts.append(f"tbody tr:nth-child(odd) {{ background: {oddBg}; }}") + # Lists css_parts.append("ul {") css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;") @@ -499,13 +420,33 @@ class RendererHtml(BaseRenderer): css_parts.append("}") # Images + imgStyle = style.get("image", {}) + imgMaxWidth = imgStyle.get("maxWidthPt", 800) + imgAlignment = imgStyle.get("alignment", "center") css_parts.append("img {") - css_parts.append(" max-width: 100%;") + css_parts.append(f" max-width: min({imgMaxWidth}pt, 100%);") css_parts.append(" height: auto;") - css_parts.append(" margin: 1em 0;") + css_parts.append(" display: block;") + if imgAlignment == "center": + css_parts.append(" margin: 1em auto;") + elif imgAlignment == "right": + css_parts.append(" margin: 1em 0 1em auto;") + else: + css_parts.append(" margin: 1em 0;") css_parts.append(" border-radius: 4px;") css_parts.append("}") + # Figcaptions + captionStyle = style.get("caption", {}) + css_parts.append("figcaption {") + css_parts.append(f" font-size: {captionStyle.get('sizePt', 10)}pt;") + css_parts.append(f" color: {captionStyle.get('color', '#586069')};") + if captionStyle.get("italic", True): + css_parts.append(" font-style: italic;") + css_parts.append(f" text-align: {captionStyle.get('align', 'center')};") + css_parts.append(" margin-top: 0.5em;") + css_parts.append("}") + # Generated info css_parts.append(".generated-info {") css_parts.append(" font-size: 0.9em;") @@ -839,9 +780,7 @@ class RendererHtml(BaseRenderer): # Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris # Include a marker so we can find and replace it imageMarker = f"" - # Add max-width and max-height to ensure image fits within page dimensions - # Typical page width is ~800-1200px, height varies but we limit to 600px for readability - imgTag = f'{altTextEscaped}' + imgTag = f'{altTextEscaped}' if captionEscaped: return f'{imageMarker}
{imgTag}
{captionEscaped}
' diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py index 9bfe3788..fc6bd51b 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py @@ -20,7 +20,7 @@ try: from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.lib import colors - from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY + from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY, TA_RIGHT REPORTLAB_AVAILABLE = True except ImportError: REPORTLAB_AVAILABLE = False @@ -28,12 +28,71 @@ except ImportError: import re as _re_pdf from ._pdfFontFallback import wrapEmojiSpansInXml as _wrapEmojiSpansInXml +from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge as _deepMergeStyle # A4 width in pt; margins must match SimpleDocTemplate(leftMargin/rightMargin) _PDF_MARGIN_LR_PT = 72.0 _PDF_A4_WIDTH_PT = 595.27 _PDF_CONTENT_WIDTH_PT = _PDF_A4_WIDTH_PT - (2 * _PDF_MARGIN_LR_PT) +# Font resolution: map CSS/system font names to ReportLab built-in equivalents. +# ReportLab core fonts: Helvetica, Times-Roman, Courier, Symbol, ZapfDingbats. +_FONT_FALLBACK_MAP = { + "calibri": "Helvetica", + "arial": "Helvetica", + "verdana": "Helvetica", + "segoe ui": "Helvetica", + "helvetica": "Helvetica", + "times new roman": "Times-Roman", + "times": "Times-Roman", + "georgia": "Times-Roman", + "consolas": "Courier", + "courier new": "Courier", + "courier": "Courier", + "monospace": "Courier", +} +_BOLD_VARIANT = { + "Helvetica": "Helvetica-Bold", + "Times-Roman": "Times-Bold", + "Courier": "Courier-Bold", +} +_registeredTtfFonts: set = set() + + +def _resolveFontFamily(fontName: str, bold: bool = False) -> str: + """Resolve a CSS/system font name to a ReportLab-compatible font name. + + Tries TTF registration from the system font dir first; on failure falls + back to the closest built-in core font. + """ + if not fontName: + return "Helvetica-Bold" if bold else "Helvetica" + key = fontName.strip().lower() + if key in _registeredTtfFonts: + return f"{fontName}-Bold" if bold else fontName + if key not in _FONT_FALLBACK_MAP: + try: + from reportlab.pdfbase import pdfmetrics + from reportlab.pdfbase.ttfonts import TTFont + import os + winFontsDir = os.path.join(os.environ.get("WINDIR", r"C:\Windows"), "Fonts") + candidates = [ + os.path.join(winFontsDir, f"{fontName}.ttf"), + os.path.join(winFontsDir, f"{fontName.lower()}.ttf"), + f"/usr/share/fonts/truetype/{fontName.lower()}/{fontName.lower()}.ttf", + ] + for path in candidates: + if os.path.isfile(path): + pdfmetrics.registerFont(TTFont(fontName, path)) + _registeredTtfFonts.add(key) + return fontName + except Exception: + pass + coreFont = _FONT_FALLBACK_MAP.get(key, "Helvetica") + if bold: + return _BOLD_VARIANT.get(coreFont, f"{coreFont}-Bold") + return coreFont + def _boxDrawingCharToAscii(ch: str) -> str: """Map one box-drawing character to ASCII (Courier has no glyphs for U+2500–U+257F).""" @@ -170,25 +229,23 @@ class RendererPdf(BaseRenderer): # memory simultaneously. Collected here, deleted after the build. self._tempImageFiles = [] try: - # Get style set from unified style or legacy approach + self._unifiedStyle = unifiedStyle if unifiedStyle: styles = self._convertUnifiedStyleToInternal(unifiedStyle) - self._unifiedStyle = unifiedStyle - for level in range(1, 7): - hKey = f"heading{level}" - if hKey not in styles: - styles[hKey] = self._defaultHeadingStyleDef(level) - else: - styles[hKey].setdefault("space_after", 12) - styles[hKey].setdefault("space_before", 12) - styles["paragraph"].setdefault("space_after", 6) - styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2)) - styles["bullet_list"].setdefault("space_after", 3) - styles["code_block"].setdefault("space_after", 6) - styles["code_block"].setdefault("align", "left") else: - styles = await self._getStyleSet(json_content, userPrompt, aiService) - self._unifiedStyle = None + styles = self._convertUnifiedStyleToInternal({}) + for level in range(1, 7): + hKey = f"heading{level}" + if hKey not in styles: + styles[hKey] = self._defaultHeadingStyleDef(level) + else: + styles[hKey].setdefault("space_after", 12) + styles[hKey].setdefault("space_before", 12) + styles["paragraph"].setdefault("space_after", 6) + styles["paragraph"].setdefault("line_height", (unifiedStyle or {}).get("paragraph", {}).get("lineSpacing", 1.5)) + styles["bullet_list"].setdefault("space_after", 3) + styles["code_block"].setdefault("space_after", 6) + styles["code_block"].setdefault("align", "left") # Validate JSON structure if not self._validateJsonStructure(json_content): @@ -307,247 +364,6 @@ class RendererPdf(BaseRenderer): if not removed: raise - async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - use styles from document generation metadata if available, - otherwise enhance default styles with AI if userPrompt provided. - - WICHTIG: In a dynamic scalable AI system, styling should come from document generation, - not be generated separately by renderers. Only fall back to AI if styles not provided. - - Args: - extractedContent: Document content with metadata (may contain styles) - userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if styles not in metadata and userPrompt provided) - templateName: Name of template style set (None = default) - - Returns: - Dict with style definitions for all document styles - """ - # Get default style set - defaultStyleSet = self._getDefaultStyleSet() - - # FIRST: Check if styles are provided in document generation metadata (preferred approach) - if extractedContent: - metadata = extractedContent.get("metadata", {}) - if isinstance(metadata, dict): - styles = metadata.get("styles") - if styles and isinstance(styles, dict): - self.logger.debug("Using styles from document generation metadata") - enhancedStyleSet = self._convertColorsFormat(styles) - return self._validateStylesContrast(enhancedStyleSet) - - # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) - if userPrompt and aiService: - self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") - enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - # Convert colors to PDF format after getting styles - enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) - return self._validateStylesContrast(enhancedStyleSet) - else: - # Use default styles only - return defaultStyleSet - - async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]: - """Enhance default styles with AI based on user prompt.""" - try: - style_template = self._createAiStyleTemplate("pdf", userPrompt, defaultStyleSet) - enhanced_styles = await self._getAiStyles(aiService, style_template, defaultStyleSet) - return enhanced_styles - except Exception as e: - self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles") - return defaultStyleSet - - def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Validate and fix contrast issues in AI-generated styles.""" - try: - # Fix table header contrast - if "table_header" in styles: - header = styles["table_header"] - bg_color = header.get("background", "#FFFFFF") - text_color = header.get("text_color", "#000000") - - # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": - header["background"] = "#4F4F4F" - header["text_color"] = "#FFFFFF" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": - header["background"] = "#4F4F4F" - header["text_color"] = "#FFFFFF" - - # Fix table cell contrast - if "table_cell" in styles: - cell = styles["table_cell"] - bg_color = cell.get("background", "#FFFFFF") - text_color = cell.get("text_color", "#000000") - - # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": - cell["background"] = "#FFFFFF" - cell["text_color"] = "#2F2F2F" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": - cell["background"] = "#FFFFFF" - cell["text_color"] = "#2F2F2F" - - return styles - - except Exception as e: - self.logger.warning(f"Style validation failed: {str(e)}") - return self._getDefaultStyleSet() - - def _getDefaultStyleSet(self) -> Dict[str, Any]: - """Default PDF style set - used when no style instructions present.""" - return { - "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, - # Markdown #..###### — sizes must strictly decrease (H1 largest … H6 smallest). - "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12}, - "heading2": {"font_size": 15, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 10, "space_before": 10}, - "heading3": {"font_size": 13, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8}, - "heading4": {"font_size": 12, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6}, - "heading5": {"font_size": 11, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 6, "space_before": 6}, - "heading6": {"font_size": 10, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 4, "space_before": 4}, - "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2}, - "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "left", "font_size": 12}, - "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10}, - "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3}, - "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6, "align": "left"} - } - - async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: - """Get AI styles with proper PDF color conversion.""" - if not ai_service: - return default_styles - - try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - - request_options = AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_GENERATE - - request = AiCallRequest(prompt=style_template, context="", options=request_options) - - # Check if AI service is properly configured - if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects: - self.logger.warning("AI service not properly configured, using defaults") - return default_styles - - response = await ai_service.callAi(request) - - # Check if response is valid - if not response: - self.logger.warning("AI service returned no response, using defaults") - return default_styles - - import json - import re - - # Clean and parse JSON - result = response.content.strip() if response and response.content else "" - - # Check if result is empty - if not result: - self.logger.warning("AI styling returned empty response, using defaults") - return default_styles - - # Log the raw response for debugging - self.logger.debug(f"AI styling raw response: {result[:200]}...") - - # Extract JSON from various formats - json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if json_match: - result = json_match.group(1).strip() - elif result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - # Try to extract JSON from explanatory text - json_patterns = [ - r'\{[^{}]*"title"[^{}]*\}', # Simple JSON object - r'\{.*?"title".*?\}', # JSON with title field - r'\{.*?"font_size".*?\}', # JSON with font_size field - ] - - for pattern in json_patterns: - json_match = re.search(pattern, result, re.DOTALL) - if json_match: - result = json_match.group(0) - break - - # Additional cleanup - remove any leading/trailing whitespace and newlines - result = result.strip() - - # Check if result is still empty after cleanup - if not result: - self.logger.warning("AI styling returned empty content after cleanup, using defaults") - return default_styles - - # Try to parse JSON - try: - styles = json.loads(result) - self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}") - except json.JSONDecodeError as json_error: - self.logger.warning(f"AI styling returned invalid JSON: {json_error}") - - # Use print instead of logger to avoid truncation - self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER") - self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER") - - self.logger.warning(f"Raw content that failed to parse: {result}") - - # Try to fix incomplete JSON by adding missing closing braces - open_braces = result.count('{') - close_braces = result.count('}') - - if open_braces > close_braces: - # JSON is incomplete, add missing closing braces - missing_braces = open_braces - close_braces - result = result + '}' * missing_braces - self.logger.info(f"Added {missing_braces} missing closing brace(s)") - - # Try parsing the fixed JSON - try: - styles = json.loads(result) - self.logger.info("Successfully fixed incomplete JSON") - except json.JSONDecodeError as fix_error: - self.logger.warning(f"Fixed JSON still invalid: {fix_error}") - # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] - try: - styles = json.loads(json_part) - self.logger.info("Successfully extracted JSON from explanatory text") - except json.JSONDecodeError: - self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles - else: - return default_styles - else: - # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] - try: - styles = json.loads(json_part) - self.logger.info("Successfully extracted JSON from explanatory text") - except json.JSONDecodeError: - self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles - else: - return default_styles - - # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them) - styles = self._convertColorsFormat(styles) - - return styles - - except Exception as e: - self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return default_styles def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert colors to proper format for PDF compatibility.""" @@ -580,9 +396,13 @@ class RendererPdf(BaseRenderer): sizes = {1: 18, 2: 15, 3: 13, 4: 12, 5: 11, 6: 10} fs = sizes.get(level, 10) sb = max(4, 14 - level) + us = getattr(self, '_unifiedStyle', None) or {} + clrs = us.get("colors", {}) + primary = clrs.get("primary", "#24292e") + secondary = clrs.get("secondary", "#586069") return { "font_size": fs, - "color": "#2F2F2F" if level <= 2 else "#4F4F4F", + "color": primary if level <= 2 else secondary, "bold": True, "align": "left", "space_after": sb, @@ -594,14 +414,19 @@ class RendererPdf(BaseRenderer): title_style_def = styles.get("title") or {} fs = title_style_def.get("font_size", 26) bold = title_style_def.get("bold", True) + us = getattr(self, '_unifiedStyle', None) + primaryFont = us["fonts"]["primary"] if us else "Calibri" + coverTitleColor = styles.get("cover_page", {}).get("title_color") + colorsFallback = styles.get("colors", {}).get("primary", "#24292e") + titleColor = title_style_def.get("color", coverTitleColor or colorsFallback) return ParagraphStyle( "DocumentTitle", - fontName="Helvetica-Bold" if bold else "Helvetica", + fontName=_resolveFontFamily(primaryFont, bold), fontSize=fs, spaceAfter=title_style_def.get("space_after", 18), spaceBefore=title_style_def.get("space_before", 0), alignment=self._getAlignment(title_style_def.get("align", "center")), - textColor=self._hexToColor(title_style_def.get("color", "#1F3864")), + textColor=self._hexToColor(titleColor), leading=fs * 1.25, ) @@ -611,28 +436,32 @@ class RendererPdf(BaseRenderer): heading_style_def = styles.get(heading_key) or self._defaultHeadingStyleDef(level) fs = heading_style_def.get("font_size", self._defaultHeadingStyleDef(level)["font_size"]) bold = heading_style_def.get("bold", True) + us = getattr(self, '_unifiedStyle', None) + primaryFont = us["fonts"]["primary"] if us else "Calibri" return ParagraphStyle( f'CustomHeading{level}', - fontName="Helvetica-Bold" if bold else "Helvetica", + fontName=_resolveFontFamily(primaryFont, bold), fontSize=fs, spaceAfter=heading_style_def.get("space_after", 12), spaceBefore=heading_style_def.get("space_before", 12), alignment=self._getAlignment(heading_style_def.get("align", "left")), - textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F")), + textColor=self._hexToColor(heading_style_def.get("color", styles.get("colors", {}).get("primary", "#24292e"))), leading=fs * 1.35, ) def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle: """Create normal paragraph style from style definitions.""" paragraph_style_def = styles.get("paragraph", {}) - + us = getattr(self, '_unifiedStyle', None) + primaryFont = us["fonts"]["primary"] if us else "Calibri" return ParagraphStyle( 'CustomNormal', + fontName=_resolveFontFamily(primaryFont, False), fontSize=paragraph_style_def.get("font_size", 11), spaceAfter=paragraph_style_def.get("space_after", 6), alignment=self._getAlignment(paragraph_style_def.get("align", "left")), - textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")), - leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11) + textColor=self._hexToColor(paragraph_style_def.get("color", "#24292e")), + leading=paragraph_style_def.get("line_height", 1.5) * paragraph_style_def.get("font_size", 11) ) def _getAlignment(self, align: str) -> int: @@ -644,10 +473,10 @@ class RendererPdf(BaseRenderer): "center": TA_CENTER, "left": TA_LEFT, "justify": TA_JUSTIFY, - "right": TA_LEFT, # ReportLab doesn't have TA_RIGHT, use LEFT as fallback - "0": TA_LEFT, # Handle numeric strings + "right": TA_RIGHT, + "0": TA_LEFT, "1": TA_CENTER, - "2": TA_JUSTIFY + "2": TA_JUSTIFY, } return align_map.get(align.lower().strip(), TA_LEFT) @@ -687,7 +516,7 @@ class RendererPdf(BaseRenderer): """Convert inline runs to ReportLab Paragraph XML.""" parts = [] us = getattr(self, '_unifiedStyle', None) - monoFont = us["fonts"]["monospace"] if us else "Courier" + monoFont = _resolveFontFamily(us["fonts"]["monospace"] if us else "Courier") for run in runs: runType = run.get("type", "text") value = self._escapeReportlabXml(run.get("value", "")) @@ -730,13 +559,15 @@ class RendererPdf(BaseRenderer): if not text: return "" text = _normalizePdfMonospaceText(text) + us = getattr(self, '_unifiedStyle', None) + monoFont = _resolveFontFamily(us["fonts"]["monospace"] if us else "Courier") out: List[str] = [] pos = 0 for m in _re_pdf.finditer(r"`([^`]*)`", text): before = text[pos:m.start()] out.append(self._applyInlineMarkdownToEscapedPlain(before)) code = m.group(1) - out.append(f'{self._escapeReportlabXml(code)}') + out.append(f'{self._escapeReportlabXml(code)}') pos = m.end() out.append(self._applyInlineMarkdownToEscapedPlain(text[pos:])) return _wrapEmojiSpansInXml("".join(out)) @@ -750,16 +581,75 @@ class RendererPdf(BaseRenderer): """Paragraph style for table cells (word wrap within colWidth).""" tdef = styles.get(tableStyleKey, {}) fs = tdef.get("font_size", 12 if header else 10) - defaultTc = "#FFFFFF" if header else "#2F2F2F" + defaultTc = "#24292e" + us = getattr(self, '_unifiedStyle', None) + primaryFont = us["fonts"]["primary"] if us else "Calibri" + isBold = header and tdef.get("bold", True) return ParagraphStyle( f"TblCell{'H' if header else 'B'}{tableStyleKey}", fontSize=fs, leading=fs * 1.25, - alignment=TA_LEFT, + alignment=self._getAlignment(tdef.get("align", "left")), textColor=self._hexToColor(tdef.get("text_color", defaultTc)), - fontName="Helvetica-Bold" if header and tdef.get("bold", True) else "Helvetica", + fontName=_resolveFontFamily(primaryFont, isBold), ) + def _createCaptionStyle(self, styles: Dict[str, Any]) -> ParagraphStyle: + """Paragraph style for image/figure captions driven by styles["caption"].""" + captionDef = styles.get("caption", {}) + us = getattr(self, '_unifiedStyle', None) + primaryFont = us["fonts"]["primary"] if us else "Calibri" + fs = captionDef.get("font_size", 10) + colorFallback = styles.get("colors", {}).get("secondary", "#586069") + return ParagraphStyle( + "CaptionStyle", + fontName=_resolveFontFamily(primaryFont, False), + fontSize=fs, + leading=fs * 1.25, + textColor=self._hexToColor(captionDef.get("color", colorFallback)), + alignment=self._getAlignment(captionDef.get("align", "center")), + spaceAfter=4, + ) + + def _inferColumnAlignments(self, headers: List, rows: List, mergedTableStyle: Dict[str, Any]) -> List[str]: + """Infer per-column text alignment from explicit style or cell content heuristics. + + Numeric-majority columns (>60 %) get right-aligned; everything else left. + An explicit ``columnAlignments`` list in *mergedTableStyle* takes precedence. + """ + numCols = len(headers) + explicit = mergedTableStyle.get("columnAlignments", []) + if explicit and len(explicit) >= numCols: + return list(explicit[:numCols]) + alignments = list(explicit) if explicit else [] + for colIdx in range(len(alignments), numCols): + numericCount = 0 + totalCount = 0 + for row in rows: + if colIdx < len(row): + cell = row[colIdx] + if isinstance(cell, list): + val = "".join( + r.get("value", "") if isinstance(r, dict) else str(r) for r in cell + ).strip() + elif cell is not None: + val = str(cell).strip() + else: + val = "" + if val: + totalCount += 1 + cleaned = val.replace(",", "").replace("%", "").replace("$", "").replace("\u20ac", "").replace("'", "").strip() + try: + float(cleaned) + numericCount += 1 + except (ValueError, TypeError): + pass + if totalCount > 0 and numericCount / totalCount > 0.6: + alignments.append("right") + else: + alignments.append("left") + return alignments + def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a single JSON section to PDF elements using AI-generated styles. Supports three content formats: reference, object (base64), extracted_text. @@ -841,7 +731,12 @@ class RendererPdf(BaseRenderer): return [Paragraph(f"[Error rendering section: {str(e)}]", self._createNormalStyle(styles))] def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: - """Render a JSON table: left-aligned, width capped to printable area, cells wrap.""" + """Render a JSON table: left-aligned, width capped to printable area, cells wrap. + + Supports per-table style overrides via ``content["tableStyle"]``, border + style variants (grid / horizontal / none), banding toggle, configurable + cell padding, and auto-inferred column alignments. + """ try: content = table_data.get("content", {}) if not isinstance(content, dict): @@ -852,12 +747,30 @@ class RendererPdf(BaseRenderer): if not headers or not rows: return [] + # Per-table style override merged onto global table style + us = getattr(self, '_unifiedStyle', None) or {} + globalTableStyle = us.get("table", {}) + perTableOverride = content.get("tableStyle", {}) + mergedTableStyle = _deepMergeStyle(globalTableStyle, perTableOverride) if perTableOverride else dict(globalTableStyle) + numCols = len(headers) colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1) colWidths = [colWidth] * numCols + colAligns = self._inferColumnAlignments(headers, rows, mergedTableStyle) + hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header") - cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell") + + cellBasePs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell") + colCellStyles: List[ParagraphStyle] = [] + for colIdx in range(numCols): + colAlign = colAligns[colIdx] if colIdx < len(colAligns) else "left" + colPs = ParagraphStyle( + f"TblCellB_c{colIdx}", + parent=cellBasePs, + alignment=self._getAlignment(colAlign), + ) + colCellStyles.append(colPs) def _cellPara(cell, ps): runs = self._inlineRunsForCell(cell) @@ -871,28 +784,45 @@ class RendererPdf(BaseRenderer): for row in rows: padded = list(row) + [""] * max(0, numCols - len(row)) padded = padded[:numCols] - bodyRows.append([_cellPara(c, cellPs) for c in padded]) + bodyRows.append([_cellPara(padded[i], colCellStyles[i]) for i in range(numCols)]) table_matrix = [headerRow] + bodyRows table = Table(table_matrix, colWidths=colWidths, repeatRows=1) table_header_style = styles.get("table_header", {}) - table_cell_style = styles.get("table_cell", {}) + borderColor = self._hexToColor(mergedTableStyle.get("borderColor", "#e1e4e8")) + borderWidth = mergedTableStyle.get("borderWidthPt", 0.5) + evenBg = self._hexToColor(mergedTableStyle.get("rowBandingEven", "#f6f8fa")) + oddBg = self._hexToColor(mergedTableStyle.get("rowBandingOdd", "#FFFFFF")) + cellPad = mergedTableStyle.get("cellPaddingPt", 4) - table_style = [ - ("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#4F4F4F"))), - ("BACKGROUND", (0, 1), (-1, -1), self._hexToColor(table_cell_style.get("background", "#FFFFFF"))), + tableStyleCmds = [ + ("BACKGROUND", (0, 0), (-1, 0), self._hexToColor(table_header_style.get("background", "#f6f8fa"))), ("ALIGN", (0, 0), (-1, -1), "LEFT"), ("VALIGN", (0, 0), (-1, -1), "TOP"), - ("LEFTPADDING", (0, 0), (-1, -1), 4), - ("RIGHTPADDING", (0, 0), (-1, -1), 4), - ("TOPPADDING", (0, 0), (-1, 0), 6), - ("BOTTOMPADDING", (0, 0), (-1, 0), 8), - ("TOPPADDING", (0, 1), (-1, -1), 4), - ("BOTTOMPADDING", (0, 1), (-1, -1), 4), - ("GRID", (0, 0), (-1, -1), 0.5, colors.black), + ("LEFTPADDING", (0, 0), (-1, -1), cellPad), + ("RIGHTPADDING", (0, 0), (-1, -1), cellPad), + ("TOPPADDING", (0, 0), (-1, 0), cellPad + 2), + ("BOTTOMPADDING", (0, 0), (-1, 0), cellPad + 4), + ("TOPPADDING", (0, 1), (-1, -1), cellPad), + ("BOTTOMPADDING", (0, 1), (-1, -1), cellPad), ] - table.setStyle(TableStyle(table_style)) + + borderStyleName = mergedTableStyle.get("borderStyle", "grid") + if borderStyleName == "grid": + tableStyleCmds.append(("GRID", (0, 0), (-1, -1), borderWidth, borderColor)) + elif borderStyleName == "horizontal": + tableStyleCmds.append(("LINEABOVE", (0, 0), (-1, 0), borderWidth, borderColor)) + for rowIdx in range(len(table_matrix)): + tableStyleCmds.append(("LINEBELOW", (0, rowIdx), (-1, rowIdx), borderWidth, borderColor)) + + bandingEnabled = mergedTableStyle.get("bandingEnabled", True) + if bandingEnabled: + for rowIdx in range(1, len(table_matrix)): + bg = evenBg if rowIdx % 2 == 0 else oddBg + tableStyleCmds.append(("BACKGROUND", (0, rowIdx), (-1, rowIdx), bg)) + + table.setStyle(TableStyle(tableStyleCmds)) return [table, Spacer(1, 12)] except Exception as e: @@ -911,7 +841,7 @@ class RendererPdf(BaseRenderer): bulletStyle = ParagraphStyle( "BulletItem", fontSize=bulletStyleDef.get("font_size", 11), - textColor=self._hexToColor(bulletStyleDef.get("color", "#333333")), + textColor=self._hexToColor(bulletStyleDef.get("color", styles.get("colors", {}).get("primary", "#24292e"))), leftIndent=indent, firstLineIndent=-indent, spaceAfter=2, @@ -1006,11 +936,13 @@ class RendererPdf(BaseRenderer): fs = code_style_def.get("font_size", 9) mono = code_style_def.get("font", "Courier") + textColorFallback = styles.get("colors", {}).get("primary", "#24292e") + if language: lang_style = ParagraphStyle( "CodeLanguage", fontSize=fs, - textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")), + textColor=self._hexToColor(code_style_def.get("color", textColorFallback)), fontName="Helvetica-Bold", alignment=TA_LEFT, ) @@ -1024,7 +956,7 @@ class RendererPdf(BaseRenderer): approxCharWPt = max(fs * 0.52, 4.5) usableWidth = _PDF_CONTENT_WIDTH_PT - 16 # left+right padding maxLineChars = max(48, int(usableWidth / approxCharWPt)) - bg_col = self._hexToColor(code_style_def.get("background", "#F5F5F5")) + bg_col = self._hexToColor(code_style_def.get("background", "#f6f8fa")) leading = fs * 1.2 spaceAfter = code_style_def.get("space_after", 6) @@ -1054,17 +986,19 @@ class RendererPdf(BaseRenderer): fontName=mono, fontSize=fs, leading=leading, - textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")), + textColor=self._hexToColor(code_style_def.get("color", textColorFallback)), alignment=TA_LEFT, leftIndent=0, rightIndent=0, ) pf = Preformatted(chunkText, codePrStyle, dedent=0, maxLineLength=maxLineChars) + borderCol = self._hexToColor(code_style_def.get("border_color", "#e1e4e8")) tbl = Table([[pf]], colWidths=[_PDF_CONTENT_WIDTH_PT]) tbl.setStyle( TableStyle( [ ("BACKGROUND", (0, 0), (-1, -1), bg_col), + ("BOX", (0, 0), (-1, -1), 0.5, borderCol), ("VALIGN", (0, 0), (-1, -1), "TOP"), ("LEFTPADDING", (0, 0), (-1, -1), 8), ("RIGHTPADDING", (0, 0), (-1, -1), 8), @@ -1103,11 +1037,26 @@ class RendererPdf(BaseRenderer): if title: out.append(self._paragraphFromInlineMarkdown(title, self._createDocumentTitleStyle(styles))) out.append(Spacer(1, 18)) - for key, sizePt in (("subtitle", 16), ("author", 12), ("date", 12)): + coverDef = styles.get("cover_page", {}) + coverSizes = { + "subtitle": coverDef.get("subtitle_size", 16), + "author": coverDef.get("author_size", 12), + "date": coverDef.get("date_size", 12), + } + coverColors = { + "subtitle": coverDef.get("subtitle_color"), + "author": None, + "date": None, + } + for key in ("subtitle", "author", "date"): val = (content.get(key) or "").strip() if not val: continue - st = ParagraphStyle(f"cover_{key}", parent=self._createNormalStyle(styles), alignment=1, fontSize=sizePt) + sizePt = coverSizes[key] + kwargs: Dict[str, Any] = {"alignment": 1, "fontSize": sizePt} + if coverColors[key]: + kwargs["textColor"] = self._hexToColor(coverColors[key]) + st = ParagraphStyle(f"cover_{key}", parent=self._createNormalStyle(styles), **kwargs) out.append(Paragraph(self._escapeReportlabXml(val), st)) out.append(Spacer(1, 8)) out.append(PageBreak()) @@ -1310,26 +1259,27 @@ class RendererPdf(BaseRenderer): # Add caption if available if caption: - captionStyle = self._createNormalStyle(styles) - captionStyle.fontSize = 10 - captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666")) - elements.append(Paragraph(f"{caption}", captionStyle)) + captionDef = styles.get("caption", {}) + capStyle = self._createCaptionStyle(styles) + capXml = self._escapeReportlabXml(caption) + if captionDef.get("italic", True): + capXml = f"{capXml}" + elements.append(Paragraph(capXml, capStyle)) elif alt_text and alt_text != "Image": - # Use alt text as caption if no caption provided, but avoid usageHint format if "Render as visual element:" in alt_text: - # Extract filename from usageHint if possible parts = alt_text.split("Render as visual element:") if len(parts) > 1: - filename = parts[1].strip() - caption_text = f"Figure: {filename}" + caption_text = f"Figure: {parts[1].strip()}" else: caption_text = alt_text else: caption_text = f"Figure: {alt_text}" - captionStyle = self._createNormalStyle(styles) - captionStyle.fontSize = 10 - captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666")) - elements.append(Paragraph(f"{caption_text}", captionStyle)) + captionDef = styles.get("caption", {}) + capStyle = self._createCaptionStyle(styles) + capXml = self._escapeReportlabXml(caption_text) + if captionDef.get("italic", True): + capXml = f"{capXml}" + elements.append(Paragraph(capXml, capStyle)) return elements diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py index 49ee8048..7064ea1e 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py @@ -9,6 +9,7 @@ from datetime import datetime, UTC from typing import Dict, Any, Optional, List from .documentRendererBaseTemplate import BaseRenderer from modules.datamodels.datamodelDocument import RenderedDocument +from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge logger = logging.getLogger(__name__) @@ -22,6 +23,16 @@ _PPTX_MD_INLINE_RE = re.compile( ) +def _parseHexColor(hexStr: str) -> tuple: + """Parse a hex color string like '#1F3864' into an RGB tuple.""" + hexStr = hexStr.lstrip('#') + if len(hexStr) == 6: + return (int(hexStr[0:2], 16), int(hexStr[2:4], 16), int(hexStr[4:6], 16)) + elif len(hexStr) == 8: + return (int(hexStr[2:4], 16), int(hexStr[4:6], 16), int(hexStr[6:8], 16)) + return (0, 0, 0) + + class RendererPptx(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" @@ -74,26 +85,18 @@ class RendererPptx(BaseRenderer): Base64-encoded PowerPoint presentation as string """ try: - # Import python-pptx from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor import re - # Get style set: prefer unified style, then metadata, then AI-enhanced - if style: - internalStyle = self._convertUnifiedStyleToInternal(style) - defaultPptx = self._getDefaultStyleSet() - for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"): - internalStyle[key] = defaultPptx.get(key) - internalStyle["heading"] = internalStyle["heading1"] - internalStyle["subheading"] = internalStyle["heading2"] - styles = internalStyle - self._unifiedStyle = style - else: - styles = await self._getStyleSet(extractedContent, userPrompt, aiService) - self._unifiedStyle = None + if not style: + style = {} + internalStyle = self._convertUnifiedStyleToInternal(style) + styles = internalStyle + self._styles = styles + self._unifiedStyle = style # Create new presentation prs = Presentation() @@ -140,8 +143,9 @@ class RendererPptx(BaseRenderer): p = tf.paragraphs[0] p.font.size = Pt(titleStyle.get("font_size", 36)) p.font.bold = titleStyle.get("bold", True) - tColor = self._getSafeColor(titleStyle.get("color", (31, 78, 121))) + tColor = self._getSafeColor(titleStyle.get("color", self._primaryColor())) p.font.color.rgb = RGBColor(*tColor) + self._applyPrimaryFont(p.font) except Exception as titleErr: logger.warning(f"Could not style title slide: {titleErr}") # Clear subtitle placeholder @@ -207,23 +211,20 @@ class RendererPptx(BaseRenderer): title_font_size = max(10, min(title_font_size, 32)) titleFrame.paragraphs[0].font.size = Pt(title_font_size) titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) + title_color = self._getSafeColor(title_style.get("color", self._primaryColor())) titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT titleFrame.word_wrap = True + self._applyPrimaryFont(titleFrame.paragraphs[0].font) # Render sections with proper PowerPoint objects (tables, lists, etc.) - # Organize content into frames for better layout if hasSections: - # Organize sections into content groups for frame-based layout - # Images are handled within the frame rendering method self._renderSlideContentWithFrames(slide, slide_sections, slide_images, styles, prs) # Fallback: if no sections but has content text, render in textbox elif slide_content and not hasImages: - # Create textbox for content (no placeholders in blank layout) from pptx.util import Inches - title_height_used = Inches(1.0) # Title height for blank slides + title_height_used = Inches(1.0) content_left = Inches(0.5) content_top = title_height_used + Inches(0.3) content_width = prs.slide_width - Inches(1) @@ -233,7 +234,6 @@ class RendererPptx(BaseRenderer): text_frame.word_wrap = True text_frame.auto_size = None - # Split content into paragraphs paragraphs = slide_content.split('\n\n') for paragraph in paragraphs: @@ -241,10 +241,8 @@ class RendererPptx(BaseRenderer): p = text_frame.add_paragraph() p.text = paragraph.strip() - # Apply AI-generated styling with adaptive sizing paragraph_style = styles.get("paragraph", {}) base_font_size = paragraph_style.get("font_size", 18) - # Calculate adaptive font size based on content length try: total_chars = len(slide_content) chars_per_line = max(1, int(content_width / Pt(10))) @@ -253,16 +251,16 @@ class RendererPptx(BaseRenderer): font_multiplier = 1.0 if available_lines > 0 and lines_needed > available_lines: font_multiplier = max(0.6, min(1.0, (available_lines / lines_needed) * 1.1)) - calculated_size = max(6, int(base_font_size * font_multiplier)) # Minimum 6pt + calculated_size = max(6, int(base_font_size * font_multiplier)) except (ZeroDivisionError, ValueError, TypeError): - calculated_size = max(6, base_font_size) # Fallback to base size with minimum + calculated_size = max(6, base_font_size) p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) - paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) + paragraph_color = self._getSafeColor(paragraph_style.get("color", self._paragraphColor())) p.font.color.rgb = RGBColor(*paragraph_color) + self._applyPrimaryFont(p.font) - # Apply alignment align = paragraph_style.get("align", "left") if align == "center": p.alignment = PP_ALIGN.CENTER @@ -283,8 +281,9 @@ class RendererPptx(BaseRenderer): if title_shape.text_frame.paragraphs[0].font: title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48)) title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) + title_color = self._getSafeColor(title_style.get("color", self._primaryColor())) title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) + self._applyPrimaryFont(title_shape.text_frame.paragraphs[0].font) # Clear subtitle placeholder instead of adding filler text try: @@ -356,242 +355,38 @@ class RendererPptx(BaseRenderer): ) ] - def _parseContentToSlides(self, content: str, title: str) -> list: - """ - Parse content into slide data structure. - - Args: - content: Content to parse - title: Presentation title - - Returns: - List of slide data dictionaries - """ - slides = [] - - # Split content by slide markers or headers - slide_sections = self._splitContentIntoSlides(content) - - for i, section in enumerate(slide_sections): - if section.strip(): - slide_data = { - "title": f"Slide {i + 1}", - "content": section.strip() - } - - # Extract title from content if it starts with # - lines = section.strip().split('\n') - if lines and lines[0].startswith('#'): - # Remove # symbols and clean up title - slide_title = lines[0].lstrip('#').strip() - slide_data["title"] = slide_title - slide_data["content"] = '\n'.join(lines[1:]).strip() - elif lines and lines[0].strip(): - # Use first line as title if it looks like a title - first_line = lines[0].strip() - if len(first_line) < 100 and not first_line.endswith('.'): - slide_data["title"] = first_line - slide_data["content"] = '\n'.join(lines[1:]).strip() - - slides.append(slide_data) - - return slides - - def _splitContentIntoSlides(self, content: str) -> list: - """ - Split content into individual slides based on headers and structure. - - Args: - content: Content to split - - Returns: - List of slide content strings - """ - # re is already imported at module level - - # First, try to split by major headers (# or ##) - # This is the most common case for AI-generated content - header_pattern = r'^(#{1,2})\s+(.+)$' - lines = content.split('\n') - slides = [] - current_slide = [] - - for line in lines: - # Check if this line is a header - header_match = re.match(header_pattern, line.strip()) - if header_match: - # If we have content in current slide, save it - if current_slide: - slide_content = '\n'.join(current_slide).strip() - if slide_content: - slides.append(slide_content) - current_slide = [] - - # Start new slide with this header - current_slide.append(line) - else: - # Add line to current slide - current_slide.append(line) - - # Add the last slide - if current_slide: - slide_content = '\n'.join(current_slide).strip() - if slide_content: - slides.append(slide_content) - - # If we found slides with headers, return them - if len(slides) > 1: - return slides - - # Fallback: Split by double newlines - sections = content.split('\n\n\n') - if len(sections) > 1: - return [s.strip() for s in sections if s.strip()] - - # Another fallback: Split by double newlines - sections = content.split('\n\n') - if len(sections) > 1: - return [s.strip() for s in sections if s.strip()] - - # Last resort: return as single slide - return [content.strip()] - - def getOutputMimeType(self) -> str: """Get MIME type for rendered output.""" return self.outputMimeType - - async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - use styles from document generation metadata if available, - otherwise enhance default styles with AI if userPrompt provided. - - WICHTIG: In a dynamic scalable AI system, styling should come from document generation, - not be generated separately by renderers. Only fall back to AI if styles not provided. - - Args: - extractedContent: Document content with metadata (may contain styles) - userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if styles not in metadata and userPrompt provided) - templateName: Name of template style set (None = default) - - Returns: - Dict with style definitions for all document styles - """ - # Get default style set - defaultStyleSet = self._getDefaultStyleSet() - - # FIRST: Check if styles are provided in document generation metadata (preferred approach) - if extractedContent: - metadata = extractedContent.get("metadata", {}) - if isinstance(metadata, dict): - styles = metadata.get("styles") - if styles and isinstance(styles, dict): - self.logger.debug("Using styles from document generation metadata") - enhancedStyleSet = self._convertColorsFormat(styles) - return self._validateStylesReadability(enhancedStyleSet) - - # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) - if userPrompt and aiService: - self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") - enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - # Colors already converted in _getAiStylesWithPptxColors - return self._validateStylesReadability(enhancedStyleSet) - else: - # Use default styles only - return defaultStyleSet - - async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]: - """Enhance default styles with AI based on user prompt.""" - try: - style_template = self._createProfessionalPptxTemplate(userPrompt, defaultStyleSet) - enhanced_styles = await self._getAiStylesWithPptxColors(aiService, style_template, defaultStyleSet) - return enhanced_styles - except Exception as e: - self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles") - return defaultStyleSet - - def _validateStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Validate and fix readability issues in AI-generated styles.""" - try: - # Ensure minimum font sizes for PowerPoint readability - min_font_sizes = { - "title": 36, - "heading": 24, - "subheading": 20, - "paragraph": 14, - "bullet_list": 14, - "table_header": 12, - "table_cell": 12 - } - - for style_name, min_size in min_font_sizes.items(): - if style_name in styles: - current_size = styles[style_name].get("font_size", 12) - if current_size < min_size: - styles[style_name]["font_size"] = min_size - - return styles - - except Exception as e: - logger.warning(f"Style validation failed: {str(e)}") - return self._getDefaultStyleSet() - - def _getDefaultStyleSet(self) -> Dict[str, Any]: - """Default PowerPoint style set - used when no style instructions present.""" - return { - "title": {"font_size": 32, "color": "#1B365D", "bold": True, "align": "left"}, - "heading": {"font_size": 24, "color": "#1B365D", "bold": True, "align": "left"}, - "subheading": {"font_size": 20, "color": "#4A90E2", "bold": True, "align": "left"}, - "paragraph": {"font_size": 14, "color": "#2F2F2F", "bold": False, "align": "left"}, - "bullet_list": {"font_size": 14, "color": "#2F2F2F", "indent": 20}, - "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"}, - "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"}, - "slide_size": "16:9", - "content_per_slide": "concise", - "design_theme": "corporate", - "color_scheme": "professional", - "background_style": "clean", - "accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"], - "professional_grade": True, - "executive_ready": True - } - - def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str: - """Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" - # json is already imported at module level - schema_json = json.dumps(style_schema, indent=4) - - return f"""Customize the JSON below for professional PowerPoint slides. -User Request: {userPrompt or "Create professional corporate slides"} + def _primaryColor(self) -> tuple: + """Return the primary color from the internal styles as an RGB tuple.""" + s = getattr(self, '_styles', None) + if s: + return self._getSafeColor(s.get("colors", {}).get("primary", "#24292e")) + return self._getSafeColor("#24292e") -Rules: -- Use professional colors (blues, grays, deep greens) -- Large, readable font sizes -- High contrast -- Sophisticated color palettes + def _paragraphColor(self) -> tuple: + """Return the paragraph text color from the internal styles as an RGB tuple.""" + s = getattr(self, '_styles', None) + if s: + return self._getSafeColor(s.get("paragraph", {}).get("color", "#24292e")) + return self._getSafeColor("#24292e") -Return ONLY this JSON with your changes: + def _applyPrimaryFont(self, font) -> None: + """Set font.name to the primary font from unified style when available.""" + us = getattr(self, '_unifiedStyle', None) + if us and isinstance(us.get("fonts"), dict): + primaryFont = us["fonts"].get("primary") + if primaryFont: + font.name = primaryFont -{schema_json} - -JSON ONLY. NO OTHER TEXT.""" - - async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: - """Get AI styles with proper PowerPoint color conversion. Uses base _getAiStyles for debug file writing.""" - if not aiService: - return default_styles - - try: - # Use base template method which handles debug file writing - enhanced_styles = await self._getAiStyles(aiService, style_template, default_styles) - - # Convert colors to PPTX format (RGB tuples) - return self._convertColorsFormat(enhanced_styles) - - except Exception as e: - self.logger.warning(f"AI style enhancement failed: {str(e)}, using defaults") - return default_styles + def _monoFont(self) -> str: + """Return the monospace font name from the internal styles.""" + s = getattr(self, '_styles', None) + if s: + return s.get("fonts", {}).get("monospace", "Consolas") + return "Consolas" def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert hex colors to RGB format for PowerPoint compatibility.""" @@ -600,14 +395,13 @@ JSON ONLY. NO OTHER TEXT.""" if isinstance(style_config, dict): for prop, value in style_config.items(): if isinstance(value, str) and value.startswith('#'): - # Convert hex to RGB tuple for PowerPoint hex_color = value.lstrip('#') if len(hex_color) == 6: r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) styles[style_name][prop] = (r, g, b) - elif len(hex_color) == 8: # aRGB format + elif len(hex_color) == 8: r = int(hex_color[2:4], 16) g = int(hex_color[4:6], 16) b = int(hex_color[6:8], 16) @@ -628,7 +422,7 @@ JSON ONLY. NO OTHER TEXT.""" g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) return (r, g, b) - elif len(hex_color) == 8: # aRGB format + elif len(hex_color) == 8: r = int(hex_color[2:4], 16) g = int(hex_color[4:6], 16) b = int(hex_color[6:8], 16) @@ -661,7 +455,7 @@ JSON ONLY. NO OTHER TEXT.""" document_title = title if title else metadata.get("title", "Generated Document") - # Title slide (clean — just the document title, no filler text) + # Title slide (clean - just the document title, no filler text) slides.append({ "title": document_title, "content": "", @@ -682,7 +476,6 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.error(f"Error parsing JSON to slides: {str(e)}") - # Return minimal fallback slides return [ { "title": title, @@ -690,319 +483,6 @@ JSON ONLY. NO OTHER TEXT.""" } ] - def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: - """Create a slide from a JSON section.""" - try: - # Get section title from data or use default - section_title = "Untitled Section" - if section.get("content_type") == "heading": - # Extract text from elements array - use nested content structure - for element in section.get("elements", []): - if isinstance(element, dict): - content = element.get("content", {}) - if isinstance(content, dict): - text = content.get("text", "") - if text: - section_title = text - break - elif section.get("title"): - section_title = section.get("title") - - content_type = section.get("content_type", "paragraph") - elements = section.get("elements", []) - - # Check for three content formats from Phase 5D in elements - content_parts = [] - for element in elements: - if not isinstance(element, dict): - continue - - element_type = element.get("type", "") - - # Support three content formats from Phase 5D - if element_type == "reference": - # Document reference format - doc_ref = element.get("documentReference", "") - label = element.get("label", "Reference") - content_parts.append(f"[Reference: {label}]") - continue - elif element_type == "extracted_text": - # Extracted text format - content = element.get("content", "") - source = element.get("source", "") - if content: - source_text = f" (Source: {source})" if source else "" - content_parts.append(f"{content}{source_text}") - continue - - # Handle image sections specially - if content_type == "image": - # Extract image data from nested content structure - images = [] - for element in elements: - if isinstance(element, dict): - # Extract from nested content structure - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - altText = content.get("altText", "Image") - caption = content.get("caption", "") - else: - # Fallback to direct element fields - base64Data = element.get("base64Data") - altText = element.get("altText", "Image") - caption = element.get("caption", "") - - if base64Data: - images.append({ - "base64Data": base64Data, - "altText": altText, - "caption": caption - }) - - return { - "title": section_title or (elements[0].get("content", {}).get("altText", "Image") if elements and isinstance(elements[0], dict) else "Image"), - "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present - "images": images - } - - # Build slide content based on section type - iterate over elements and format each - if not content_parts: # Only if we didn't process reference/extracted_text above - for element in elements: - if not isinstance(element, dict): - continue - - element_type = element.get("type", "") - # Use element type if available, otherwise fall back to section content_type - if not element_type: - element_type = content_type - - if element_type == "table": - formatted = self._formatTableForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "bullet_list" or element_type == "list": - formatted = self._formatListForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "heading": - formatted = self._formatHeadingForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "paragraph": - formatted = self._formatParagraphForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "code_block" or element_type == "code": - formatted = self._formatCodeForSlide(element) - if formatted: - content_parts.append(formatted) - else: - # Fallback to paragraph formatting - formatted = self._formatParagraphForSlide(element) - if formatted: - content_parts.append(formatted) - - # Combine content parts - slide_content = "\n\n".join(filter(None, content_parts)) - - return { - "title": section_title, - "content": slide_content, - "images": [] # No images for non-image sections - } - - except Exception as e: - logger.warning(f"Error creating slide from section: {str(e)}") - return None - - def _formatTableForSlide(self, element: Dict[str, Any]) -> str: - """Format table data for slide presentation.""" - try: - # Extract table data from element - handle nested content structure - if not isinstance(element, dict): - return "" - - # Extract from nested content structure - content = element.get("content", {}) - if not isinstance(content, dict): - return "" - - headers = content.get("headers", []) - rows = content.get("rows", []) - - if not headers: - return "" - - # Create table representation - table_lines = [] - - # Add headers - header_line = " | ".join(str(h) for h in headers) - table_lines.append(header_line) - - # Add separator - separator = "-" * len(header_line) - table_lines.append(separator) - - # Add data rows (limit based on content density) - max_rows = 5 # Default limit - for row in rows[:max_rows]: - row_line = " | ".join(str(cell) for cell in row) - table_lines.append(row_line) - - if len(rows) > max_rows: - table_lines.append(f"... and {len(rows) - max_rows} more rows") - - return "\n".join(table_lines) - - except Exception as e: - logger.warning(f"Error formatting table for slide: {str(e)}") - return "" - - def _formatListForSlide(self, list_data: Dict[str, Any]) -> str: - """Format list data for slide presentation.""" - try: - # Extract from nested content structure - content = list_data.get("content", {}) - if not isinstance(content, dict): - return "" - items = content.get("items", []) - - if not items: - return "" - - # Create list representation - list_lines = [] - - for item in items: - if isinstance(item, dict): - text = item.get("text", "") - list_lines.append(f"• {text}") - - # Add subitems (limit to 3 for readability) - subitems = item.get("subitems", [])[:3] - for subitem in subitems: - if isinstance(subitem, dict): - list_lines.append(f" - {subitem.get('text', '')}") - else: - list_lines.append(f" - {subitem}") - else: - list_lines.append(f"• {str(item)}") - - return "\n".join(list_lines) - - except Exception as e: - logger.warning(f"Error formatting list for slide: {str(e)}") - return "" - - def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str: - """Format heading data for slide presentation.""" - try: - # Extract from nested content structure - content = heading_data.get("content", {}) - if not isinstance(content, dict): - return "" - text = content.get("text", "") - level = content.get("level", 1) - - if text: - return f"{'#' * level} {text}" - - return "" - - except Exception as e: - logger.warning(f"Error formatting heading for slide: {str(e)}") - return "" - - def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str: - """Format paragraph data for slide presentation.""" - try: - # Extract from nested content structure - content = paragraph_data.get("content", {}) - if isinstance(content, dict): - if content.get("inlineRuns"): - text = "".join(r.get("value", "") for r in content["inlineRuns"]) - else: - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - - if text: - max_length = 200 - if len(text) > max_length: - text = text[:max_length] + "..." - - return text - - return "" - - except Exception as e: - logger.warning(f"Error formatting paragraph for slide: {str(e)}") - return "" - - def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str: - """Format code data for slide presentation.""" - try: - # Extract from nested content structure - content = code_data.get("content", {}) - if not isinstance(content, dict): - return "" - code = content.get("code", "") - language = content.get("language", "") - - if code: - # Limit code length based on content density - max_length = 100 # Default limit - if len(code) > max_length: - code = code[:max_length] + "..." - - if language: - return f"Code ({language}):\n{code}" - else: - return f"Code:\n{code}" - - return "" - - except Exception as e: - logger.warning(f"Error formatting code for slide: {str(e)}") - return "" - - def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int: - """Determine the best professional slide layout based on content.""" - try: - content = slide_data.get("content", "") - title = slide_data.get("title", "") - - if not content: - return 0 - - # Professional layout selection based on content - if "|" in content and "-" in content: - # Has both tables and lists - use content with caption for professional look - return 2 - elif "|" in content: - # Has tables - use content layout for clean table presentation - return 1 - elif content.count("•") > 2: - # Has many bullet points - use content layout for better readability - return 1 - elif len(content) > 200: - # Long content - use content layout for better text flow - return 1 - elif title and len(title) > 20: - # Long title - use title and content layout - return 1 - else: - # Default to title and content layout for professional appearance - return 1 - - except Exception as e: - logger.warning(f"Error determining slide layout: {str(e)}") - return 1 # Default to title and content layout - def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: """Create slides from sections: each top-level heading creates a new slide. @@ -1078,191 +558,6 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error creating slides from sections: {str(e)}") return [] - def _formatSectionContent(self, section: Dict[str, Any]) -> str: - """Format section content for slide presentation.""" - try: - content_type = section.get("content_type", "paragraph") - elements = section.get("elements", []) - - # Image sections return empty content (handled separately) - if content_type == "image": - return "" - - # Process each element in the section - use element type, not section type - content_parts = [] - for element in elements: - if not isinstance(element, dict): - continue - - element_type = element.get("type", "") - # Use element type if available, otherwise fall back to section content_type - if not element_type: - element_type = content_type - - if element_type == "table": - formatted = self._formatTableForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "bullet_list" or element_type == "list": - formatted = self._formatListForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "heading": - formatted = self._formatHeadingForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "paragraph": - formatted = self._formatParagraphForSlide(element) - if formatted: - content_parts.append(formatted) - elif element_type == "code_block" or element_type == "code": - formatted = self._formatCodeForSlide(element) - if formatted: - content_parts.append(formatted) - else: - # Fallback to paragraph formatting - formatted = self._formatParagraphForSlide(element) - if formatted: - content_parts.append(formatted) - - return "\n\n".join(filter(None, content_parts)) - - except Exception as e: - logger.warning(f"Error formatting section content: {str(e)}") - return "" - - def _addImagesToSlide(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any]) -> None: - """Add images to a PowerPoint slide.""" - try: - from pptx.util import Inches, Pt - from pptx.enum.text import PP_ALIGN - from pptx.dml.color import RGBColor - import base64 - import io - - if not images: - return - - # Get slide dimensions from presentation - if hasattr(self, '_currentPresentation'): - prs = self._currentPresentation - else: - prs = slide.presentation - slideWidth = prs.slide_width - slideHeight = prs.slide_height - titleHeight = Inches(1.5) # Approximate title height - - # Available area for images - availableWidth = slideWidth - Inches(1) # Margins - availableHeight = slideHeight - titleHeight - Inches(1) # Title + margins - - # Position images - if len(images) == 1: - # Single image: center it - img = images[0] - base64Data = img.get("base64Data") - # Validate base64Data is present and not empty - if not base64Data or not isinstance(base64Data, str) or len(base64Data.strip()) == 0: - logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}") - return - - try: - imageBytes = base64.b64decode(base64Data) - if len(imageBytes) == 0: - logger.error("Decoded image bytes are empty") - return - imageStream = io.BytesIO(imageBytes) - except Exception as decode_error: - logger.error(f"Failed to decode base64 image data: {str(decode_error)}") - return - - # Get image dimensions - try: - from PIL import Image as PILImage - pilImage = PILImage.open(imageStream) - imgWidth, imgHeight = pilImage.size - - # Scale to fit available space (max 90% of slide for better visibility) - # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) - # Conversion: pixels * (72/96) = points - imgWidthPoints = imgWidth * (72.0 / 96.0) - imgHeightPoints = imgHeight * (72.0 / 96.0) - - maxWidth = availableWidth * 0.9 - maxHeight = availableHeight * 0.9 - - scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) - finalWidth = imgWidthPoints * scale - finalHeight = imgHeightPoints * scale - - # Center image - left = (slideWidth - finalWidth) / 2 - top = titleHeight + (availableHeight - finalHeight) / 2 - - imageStream.seek(0) - except Exception: - # Fallback: use default size - finalWidth = Inches(6) - finalHeight = Inches(4.5) - left = (slideWidth - finalWidth) / 2 - top = titleHeight + Inches(1) - imageStream.seek(0) - - # Add image to slide - try: - slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) - except Exception as add_error: - # If add_picture fails, try with explicit format - imageStream.seek(0) - # Ensure we have valid image data - if len(imageBytes) > 0: - slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) - else: - raise Exception(f"Empty image data: {add_error}") - - # Add caption if available - caption = img.get("caption") or img.get("altText") - if caption and caption != "Image": - # Add text box below image - captionTop = top + finalHeight + Inches(0.2) - captionBox = slide.shapes.add_textbox( - Inches(1), - captionTop, - slideWidth - Inches(2), - Inches(0.5) - ) - captionFrame = captionBox.text_frame - captionFrame.text = caption - captionFrame.paragraphs[0].font.size = Pt(12) - captionFrame.paragraphs[0].font.italic = True - captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER - else: - # Multiple images: arrange in grid - cols = 2 if len(images) <= 4 else 3 - rows = (len(images) + cols - 1) // cols - - imgWidth = (availableWidth - Inches(0.5) * (cols - 1)) / cols - imgHeight = (availableHeight - Inches(0.5) * (rows - 1)) / rows - - for idx, img in enumerate(images): - base64Data = img.get("base64Data") - if base64Data: - row = idx // cols - col = idx % cols - - imageBytes = base64.b64decode(base64Data) - imageStream = io.BytesIO(imageBytes) - - left = Inches(0.5) + col * (imgWidth + Inches(0.5)) - top = titleHeight + Inches(0.5) + row * (imgHeight + Inches(0.5)) - - slide.shapes.add_picture(imageStream, left, top, width=imgWidth, height=imgHeight) - - except Exception as e: - logger.error(f"Error embedding images in PPTX slide: {str(e)}") - import traceback - logger.error(f"Traceback: {traceback.format_exc()}") - def _addMarkdownInlineRuns(self, paragraph, text: str, fontSize=None, fontColor=None, fontBold=None) -> None: """Parse markdown inline formatting and add Runs to a pptx paragraph. @@ -1282,6 +577,7 @@ JSON ONLY. NO OTHER TEXT.""" run.font.bold = bold elif fontBold is not None: run.font.bold = fontBold + self._applyPrimaryFont(run.font) pos = 0 for m in _PPTX_MD_INLINE_RE.finditer(text): @@ -1301,7 +597,7 @@ JSON ONLY. NO OTHER TEXT.""" elif m.group(7): r = paragraph.add_run() r.text = m.group(7) - r.font.name = "Courier New" + r.font.name = self._monoFont() if fontSize and hasattr(fontSize, 'pt'): r.font.size = Pt(max(8, int(fontSize.pt * 0.85))) elif fontSize: @@ -1320,8 +616,7 @@ JSON ONLY. NO OTHER TEXT.""" """Process InlineRun dicts into pptx text runs.""" from pptx.util import Pt paragraph.text = "" - us = getattr(self, '_unifiedStyle', None) - monoFont = us["fonts"]["monospace"] if us else "Courier New" + monoFont = self._monoFont() for run in runs: runType = run.get("type", "text") value = run.get("value", "") @@ -1331,6 +626,7 @@ JSON ONLY. NO OTHER TEXT.""" r.font.size = fontSize if fontColor: r.font.color.rgb = fontColor + self._applyPrimaryFont(r.font) if runType == "bold": r.font.bold = True elif runType == "italic": @@ -1359,6 +655,17 @@ JSON ONLY. NO OTHER TEXT.""" if not headers: return + # Per-table style override (deep-merge with global table styles) + perTableOverride = content.get("tableStyle", {}) + globalTableStyle = { + "table_header": styles.get("table_header", {}), + "table_cell": styles.get("table_cell", {}), + "table_border": styles.get("table_border", {}), + "table_banding": styles.get("table_banding", {}), + "table_padding": styles.get("table_padding", 4), + } + mergedTableStyle = deepMerge(globalTableStyle, perTableOverride) if perTableOverride else globalTableStyle + num_cols = int(len(headers)) num_rows = int(len(rows) + 1) left = Inches(0.5) @@ -1382,107 +689,142 @@ JSON ONLY. NO OTHER TEXT.""" table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height) table = table_shape.table - # Set column widths - width is in EMU, divide evenly - # python-pptx expects EMU values (914400 EMU = 1 inch) - col_width_emu = int(width) // num_cols # Ensure integer division for EMU + # Set column widths evenly (EMU values) + col_width_emu = int(width) // num_cols for col_idx in range(num_cols): table.columns[col_idx].width = col_width_emu - # Add headers with styling - OPTIMIZED: pre-calculate color/style objects - header_style = styles.get("table_header", {}) - header_bg_color = self._getSafeColor(header_style.get("background", (31, 78, 121))) - header_text_color = self._getSafeColor(header_style.get("text_color", (255, 255, 255))) - header_font_size = header_style.get("font_size", 18) + # Infer per-column alignments from data or explicit tableStyle + columnAlignments = self._inferColumnAlignments(headers, rows, mergedTableStyle) - # Pre-calculate and cache RGB color objects - header_bg_rgb = RGBColor(*header_bg_color) - header_text_rgb = RGBColor(*header_text_color) - header_font_size_pt = Pt(header_font_size) - header_bold = header_style.get("bold", True) + # Add headers with styling + headerStyle = mergedTableStyle.get("table_header", {}) + headerBgColor = self._getSafeColor(headerStyle.get("background", self._tableHeaderBg())) + headerTextColor = self._getSafeColor(headerStyle.get("text_color", (255, 255, 255))) + headerFontSize = headerStyle.get("font_size", 10) - # Determine alignment once - align = header_style.get("align", "center") - if align == "left": - header_alignment = PP_ALIGN.LEFT - elif align == "right": - header_alignment = PP_ALIGN.RIGHT + headerBgRgb = RGBColor(*headerBgColor) + headerTextRgb = RGBColor(*headerTextColor) + headerFontSizePt = Pt(headerFontSize) + headerBold = headerStyle.get("bold", True) + + headerAlign = headerStyle.get("align", "center") + if headerAlign == "left": + headerAlignment = PP_ALIGN.LEFT + elif headerAlign == "right": + headerAlignment = PP_ALIGN.RIGHT else: - header_alignment = PP_ALIGN.CENTER + headerAlignment = PP_ALIGN.CENTER for col_idx, header in enumerate(headers): cell = table.cell(0, col_idx) - # Clear existing text and set new text cell.text_frame.clear() cellRuns = self._inlineRunsForCell(header) - header_text = "".join(r.get("value", "") for r in cellRuns) - cell.text = header_text + headerText = "".join(r.get("value", "") for r in cellRuns) + cell.text = headerText - # Ensure paragraph exists if len(cell.text_frame.paragraphs) == 0: cell.text_frame.add_paragraph() - # Apply styling - use cached objects cell.fill.solid() - cell.fill.fore_color.rgb = header_bg_rgb + cell.fill.fore_color.rgb = headerBgRgb para = cell.text_frame.paragraphs[0] - para.font.bold = header_bold - para.font.size = header_font_size_pt - para.font.color.rgb = header_text_rgb - para.alignment = header_alignment + para.font.bold = headerBold + para.font.size = headerFontSizePt + para.font.color.rgb = headerTextRgb + para.alignment = headerAlignment + self._applyPrimaryFont(para.font) - # Ensure text is set on paragraph if not para.text: - para.text = header_text + para.text = headerText - # Add data rows with styling - OPTIMIZED: pre-calculate color/style objects - cell_style = styles.get("table_cell", {}) - cell_bg_color = self._getSafeColor(cell_style.get("background", (255, 255, 255))) - cell_text_color = self._getSafeColor(cell_style.get("text_color", (47, 47, 47))) - cell_font_size = cell_style.get("font_size", 16) + # Add data rows with optional row banding + cellStyle = mergedTableStyle.get("table_cell", {}) + cellTextColor = self._getSafeColor(cellStyle.get("text_color", self._paragraphColor())) + cellFontSize = cellStyle.get("font_size", 10) - # Pre-calculate and cache RGB color objects - cell_bg_rgb = RGBColor(*cell_bg_color) - cell_text_rgb = RGBColor(*cell_text_color) - cell_font_size_pt = Pt(cell_font_size) - cell_bold = cell_style.get("bold", False) + bandingConfig = mergedTableStyle.get("table_banding", {}) + bandingEnabled = bandingConfig.get("enabled", True) + bandingEven = self._getSafeColor(bandingConfig.get("even", self._tableRowBandingEven())) + bandingOdd = self._getSafeColor(bandingConfig.get("odd", self._tableRowBandingOdd())) - # Determine alignment once - align = cell_style.get("align", "left") - if align == "center": - cell_alignment = PP_ALIGN.CENTER - elif align == "right": - cell_alignment = PP_ALIGN.RIGHT + cellTextRgb = RGBColor(*cellTextColor) + cellFontSizePt = Pt(cellFontSize) + cellBold = cellStyle.get("bold", False) + + cellAlign = cellStyle.get("align", "left") + if cellAlign == "center": + cellAlignment = PP_ALIGN.CENTER + elif cellAlign == "right": + cellAlignment = PP_ALIGN.RIGHT else: - cell_alignment = PP_ALIGN.LEFT + cellAlignment = PP_ALIGN.LEFT for row_idx, row_data in enumerate(rows, 1): + dataRowIdx = row_idx - 1 + + if bandingEnabled: + bandColor = bandingEven if dataRowIdx % 2 == 0 else bandingOdd + else: + bandColor = self._getSafeColor("#FFFFFF") + cellBgRgb = RGBColor(*bandColor) + for col_idx, cell_data in enumerate(row_data[:num_cols]): cell = table.cell(row_idx, col_idx) - # Clear existing text and set new text cell.text_frame.clear() cellRuns = self._inlineRunsForCell(cell_data) - cell_text = "".join(r.get("value", "") for r in cellRuns) - cell.text = cell_text + cellText = "".join(r.get("value", "") for r in cellRuns) + cell.text = cellText - # Ensure paragraph exists if len(cell.text_frame.paragraphs) == 0: cell.text_frame.add_paragraph() - # Apply styling - use cached objects cell.fill.solid() - cell.fill.fore_color.rgb = cell_bg_rgb + cell.fill.fore_color.rgb = cellBgRgb para = cell.text_frame.paragraphs[0] - para.font.size = cell_font_size_pt - para.font.bold = cell_bold - para.font.color.rgb = cell_text_rgb - para.alignment = cell_alignment + para.font.size = cellFontSizePt + para.font.bold = cellBold + para.font.color.rgb = cellTextRgb + self._applyPrimaryFont(para.font) + + # Apply per-column alignment (inferred or explicit) + if col_idx < len(columnAlignments): + colAlign = columnAlignments[col_idx] + if colAlign == "center": + para.alignment = PP_ALIGN.CENTER + elif colAlign == "right": + para.alignment = PP_ALIGN.RIGHT + else: + para.alignment = PP_ALIGN.LEFT + else: + para.alignment = cellAlignment - # Ensure text is set on paragraph if not para.text: - para.text = cell_text + para.text = cellText except Exception as e: logger.warning(f"Error adding table to slide: {str(e)}") + + def _tableHeaderBg(self) -> tuple: + """Return table header background color from the internal styles.""" + s = getattr(self, '_styles', None) + if s: + return self._getSafeColor(s.get("table_header", {}).get("background", "#f6f8fa")) + return self._getSafeColor("#f6f8fa") + + def _tableRowBandingEven(self) -> tuple: + """Return even row banding color from the internal styles.""" + s = getattr(self, '_styles', None) + if s: + return self._getSafeColor(s.get("table_banding", {}).get("even", "#f6f8fa")) + return self._getSafeColor("#f6f8fa") + + def _tableRowBandingOdd(self) -> tuple: + """Return odd row banding color from the internal styles.""" + s = getattr(self, '_styles', None) + if s: + return self._getSafeColor(s.get("table_banding", {}).get("odd", "#FFFFFF")) + return self._getSafeColor("#FFFFFF") def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add bullet list to slide text frame with consistent formatting.""" @@ -1498,9 +840,12 @@ JSON ONLY. NO OTHER TEXT.""" if not items: return - listStyle = styles.get("paragraph", {}) - fontSize = Pt(max(10, int(listStyle.get("font_size", 14) * font_size_multiplier))) - fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47)))) + bulletStyle = styles.get("bullet_list", {}) + bulletChar = bulletStyle.get("bullet_char", "\u2022") + baseFontSize = bulletStyle.get("font_size", 14) + fontSize = Pt(max(10, int(baseFontSize * font_size_multiplier))) + fontColor = RGBColor(*self._getSafeColor(bulletStyle.get("color", styles.get("paragraph", {}).get("color", self._paragraphColor())))) + indentPt = bulletStyle.get("indent", 18) for item in items: runs = self._inlineRunsForListItem(item) @@ -1511,15 +856,18 @@ JSON ONLY. NO OTHER TEXT.""" p.alignment = PP_ALIGN.LEFT p.space_before = Pt(2) p.space_after = Pt(2) + if hasattr(p, 'paragraph_format'): + p.paragraph_format.left_indent = Pt(indentPt) + bulletPrefix = f"{bulletChar} " if isNewFormat: - bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs + bulletRuns = [{"type": "text", "value": bulletPrefix}] + runs self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor) else: itemText = item.get("text", "") if isinstance(item, dict) else str(item) if not itemText or not itemText.strip(): continue - self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) + self._addMarkdownInlineRuns(p, f"{bulletPrefix}{itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) # Subitems (only for dict-style items) if isinstance(item, dict): @@ -1531,14 +879,16 @@ JSON ONLY. NO OTHER TEXT.""" sp.alignment = PP_ALIGN.LEFT sp.space_before = Pt(1) sp.space_after = Pt(1) + if hasattr(sp, 'paragraph_format'): + sp.paragraph_format.left_indent = Pt(indentPt * 2) if isSubNew: - subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns + subBulletRuns = [{"type": "text", "value": "\u2013 "}] + subRuns self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor) else: subText = sub.get("text", "") if isinstance(sub, dict) else str(sub) if not subText: continue - self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) + self._addMarkdownInlineRuns(sp, f"\u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") @@ -1549,7 +899,6 @@ JSON ONLY. NO OTHER TEXT.""" from pptx.util import Pt from pptx.dml.color import RGBColor - # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return @@ -1561,23 +910,17 @@ JSON ONLY. NO OTHER TEXT.""" p = text_frame.add_paragraph() p.level = 0 - heading_style = styles.get("heading", {}) - if level == 1: - base_font_size = heading_style.get("font_size", 28) - elif level == 2: - base_font_size = heading_style.get("font_size", 22) - elif level == 3: - base_font_size = heading_style.get("font_size", 18) - else: - base_font_size = heading_style.get("font_size", 16) + headingKey = f"heading{min(level, 4)}" + headingStyle = styles.get(headingKey, styles.get("heading1", {})) + baseFontSize = headingStyle.get("font_size", 22) - calculated_size = max(12, int(base_font_size * font_size_multiplier)) - fSize = Pt(calculated_size) - fColor = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121)))) - self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=True) - # Add spacing before and after headings - p.space_before = Pt(12 if level == 1 else 8) # More space before H1 - p.space_after = Pt(6) # Space after heading + calculatedSize = max(12, int(baseFontSize * font_size_multiplier)) + fSize = Pt(calculatedSize) + fColor = RGBColor(*self._getSafeColor(headingStyle.get("color", styles.get("colors", {}).get("primary", self._primaryColor())))) + fBold = headingStyle.get("bold", True) + self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold) + p.space_before = Pt(headingStyle.get("space_before", 12)) + p.space_after = Pt(headingStyle.get("space_after", 6)) except Exception as e: logger.warning(f"Error adding heading to slide: {str(e)}") @@ -1589,7 +932,6 @@ JSON ONLY. NO OTHER TEXT.""" from pptx.dml.color import RGBColor from pptx.enum.text import PP_ALIGN - # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): inlineRuns = self._inlineRunsFromContent(content) @@ -1620,7 +962,7 @@ JSON ONLY. NO OTHER TEXT.""" base_font_size = paragraph_style.get("font_size", 14) calculated_size = max(10, int(base_font_size * font_size_multiplier)) fSize = Pt(calculated_size) - fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", self._paragraphColor()))) fBold = paragraph_style.get("bold", False) if hasInlineRuns: @@ -1643,13 +985,12 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.warning(f"Error adding paragraph to slide: {str(e)}") - def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: - """Add code block to slide text frame.""" + def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame=None, font_size_multiplier: float = 1.0, top=None) -> None: + """Add code block to slide as a separate shape with background fill and border.""" try: - from pptx.util import Pt + from pptx.util import Inches, Pt from pptx.dml.color import RGBColor - # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return @@ -1657,135 +998,184 @@ JSON ONLY. NO OTHER TEXT.""" code = content.get("code", "") language = content.get("language", "") - if code: - code_style = styles.get("code_block", {}) - code_font = code_style.get("font", "Courier New") - base_code_font_size = code_style.get("font_size", 9) - code_font_size = max(6, int(base_code_font_size * font_size_multiplier)) # Minimum 6pt for code - code_color = self._getSafeColor(code_style.get("color", (47, 47, 47))) - - p = text_frame.add_paragraph() - if language: - p.text = f"Code ({language}):" - p.font.bold = True - p.font.size = Pt(code_font_size) - p = text_frame.add_paragraph() - - p.text = code - p.font.name = code_font - p.font.size = Pt(code_font_size) - p.font.color.rgb = RGBColor(*code_color) + if not code: + return + + codeStyle = styles.get("code_block", {}) + codeFont = codeStyle.get("font", self._monoFont()) + baseCodeFontSize = codeStyle.get("font_size", 9) + codeFontSize = max(6, int(baseCodeFontSize * font_size_multiplier)) + codeColor = self._getSafeColor(codeStyle.get("color", styles.get("paragraph", {}).get("color", self._paragraphColor()))) + bgColor = self._getSafeColor(codeStyle.get("background", "#f6f8fa")) + borderColor = self._getSafeColor(codeStyle.get("border_color", "#e1e4e8")) + + if hasattr(self, '_currentPresentation'): + prs = self._currentPresentation + else: + prs = slide.presentation + + if top is not None: + codeTop = top + else: + maxBottom = Inches(1.5) + for shape in slide.shapes: + shapeBottom = shape.top + shape.height + if shapeBottom > maxBottom: + maxBottom = shapeBottom + codeTop = maxBottom + Inches(0.15) + codeLeft = Inches(0.5) + codeWidth = prs.slide_width - Inches(1) + + lineCount = len(code.split('\n')) + estimatedHeight = Pt(codeFontSize * 1.4) * lineCount + Pt(16) + if language: + estimatedHeight += Pt(codeFontSize * 1.6) + maxHeight = prs.slide_height - codeTop - Inches(0.3) + codeHeight = min(estimatedHeight, maxHeight) + + codeBox = slide.shapes.add_textbox(codeLeft, codeTop, codeWidth, codeHeight) + codeTf = codeBox.text_frame + codeTf.word_wrap = True + codeTf.auto_size = None + + codeBox.fill.solid() + codeBox.fill.fore_color.rgb = RGBColor(*bgColor) + codeBox.line.color.rgb = RGBColor(*borderColor) + codeBox.line.width = Pt(0.75) + + if language: + p = codeTf.paragraphs[0] + p.text = f"Code ({language}):" + p.font.bold = True + p.font.size = Pt(codeFontSize) + self._applyPrimaryFont(p.font) + p = codeTf.add_paragraph() + else: + p = codeTf.paragraphs[0] + + p.text = code + p.font.name = codeFont + p.font.size = Pt(codeFontSize) + p.font.color.rgb = RGBColor(*codeColor) except Exception as e: logger.warning(f"Error adding code block to slide: {str(e)}") def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" - # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") + def _estimateElementHeightPt(self, element: Dict[str, Any], section: Dict[str, Any], styles: Dict[str, Any]) -> float: + """Estimate the rendered height of a text element in points.""" + elementType = element.get("type", "") or section.get("content_type", "paragraph") + content = element.get("content", {}) + charsPerLine = 65 + + if elementType == "heading": + level = content.get("level", 1) if isinstance(content, dict) else 1 + headingKey = f"heading{min(level, 4)}" + hs = styles.get(headingKey, styles.get("heading1", {})) + return hs.get("font_size", 22) * 1.3 + hs.get("space_before", 12) + hs.get("space_after", 6) + + if elementType in ("bullet_list", "list"): + items = content.get("items", []) if isinstance(content, dict) else [] + total = len(items) + for item in items: + if isinstance(item, dict): + total += len(item.get("subitems", [])) + fontSize = styles.get("bullet_list", {}).get("font_size", 14) + return max(total, 1) * fontSize * 1.5 + 4 + + text = "" + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + pFontSize = styles.get("paragraph", {}).get("font_size", 14) + lines = max(1, len(text) / charsPerLine) if text else 1 + return lines * pFontSize * 1.5 + 12 + def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None: - """Render all sections sequentially: text/bullets/headings into a shared - textbox, tables and images as separate shapes placed below.""" + """Render slide content with sequential Y-tracking. + Text elements are batched into sized textboxes; tables and code blocks + are placed as separate shapes at the tracked Y position.""" try: from pptx.util import Inches, Pt margin = Inches(0.5) - contentTop = Inches(1.3) + currentY = Inches(1.3) availableWidth = prs.slide_width - Inches(1) - availableHeight = prs.slide_height - contentTop - Inches(0.3) + slideBottom = prs.slide_height - Inches(0.3) + spacing = Inches(0.1) - # Create a single textbox for all non-table, non-image content - textbox = slide.shapes.add_textbox(margin, contentTop, availableWidth, availableHeight) - textFrame = textbox.text_frame - textFrame.word_wrap = True - textFrame.auto_size = None + pendingTextElements: List[tuple] = [] + + def _flushText(): + nonlocal currentY, pendingTextElements + if not pendingTextElements: + return + totalPt = sum(self._estimateElementHeightPt(el, sec, styles) for sec, el in pendingTextElements) + estimatedHeight = Pt(totalPt) + remaining = slideBottom - currentY + boxHeight = min(estimatedHeight, remaining) if remaining > Inches(0.2) else estimatedHeight + + textbox = slide.shapes.add_textbox(margin, currentY, availableWidth, boxHeight) + tf = textbox.text_frame + tf.word_wrap = True + tf.auto_size = None + + for sec, el in pendingTextElements: + syntheticSection = {"content_type": sec.get("content_type", "paragraph"), "elements": [el]} + self._renderSectionToTextFrame(slide, syntheticSection, styles, tf, font_size_multiplier=1.0) + + currentY += boxHeight + spacing + pendingTextElements = [] for section in slide_sections: - self._renderSectionToTextFrame(slide, section, styles, textFrame, font_size_multiplier=1.0) + sectionType = section.get("content_type", "paragraph") + elements = section.get("elements", []) + if not elements: + continue + + for element in elements: + if not isinstance(element, dict): + continue + elementType = element.get("type", "") or sectionType + + if elementType == "image": + continue + if elementType == "paragraph" and self._isHorizontalRule(element): + continue + + if elementType == "table": + _flushText() + self._addTableToSlide(slide, element, styles, top=currentY, max_width=availableWidth) + content = element.get("content", {}) + numRows = (len(content.get("rows", [])) + 1) if isinstance(content, dict) else 1 + currentY += Inches(0.4) * numRows + spacing + + elif elementType in ("code_block", "code"): + _flushText() + self._addCodeBlockToSlide(slide, element, styles, top=currentY, font_size_multiplier=1.0) + content = element.get("content", {}) + code = content.get("code", "") if isinstance(content, dict) else "" + lineCount = max(1, len(code.split('\n'))) + codeFontSize = styles.get("code_block", {}).get("font_size", 9) + currentY += Pt(codeFontSize * 1.4) * lineCount + Pt(16) + spacing + + else: + pendingTextElements.append((section, element)) + + _flushText() - # Render standalone images that were passed alongside sections if slide_images: - self._addImagesToSlideInFrame(slide, slide_images, styles, margin, contentTop, availableWidth, availableHeight) + remainingHeight = slideBottom - currentY + if remainingHeight > Inches(0.5): + self._addImagesToSlideInFrame(slide, slide_images, styles, margin, currentY, availableWidth, remainingHeight) except Exception as e: logger.error(f"Error rendering slide content: {str(e)}") - def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None: - """Render text sections (paragraphs, lists, headings) in a text frame.""" - try: - from pptx.util import Inches, Pt - from pptx.enum.text import PP_ALIGN - from pptx.dml.color import RGBColor - - # Calculate total text length for adaptive font sizing - total_text_length = 0 - if adaptiveFontSize: - for section in text_sections: - elements = section.get("elements", []) - for element in elements: - if isinstance(element, dict): - element_type = element.get("type", "") - if element_type in ["paragraph", "bullet_list", "list", "heading"]: - content = element.get("content", "") - if isinstance(content, dict): - if "text" in content: - total_text_length += len(str(content["text"])) - elif "items" in content: - for item in content.get("items", []): - total_text_length += len(str(item)) - elif isinstance(content, str): - total_text_length += len(content) - - # Calculate adaptive font size multiplier based on text length and frame size - font_size_multiplier = 1.0 - if adaptiveFontSize and total_text_length > 0: - try: - # More accurate calculation: estimate characters per line based on average character width - # Average character width is approximately 0.6 * font_size in points - # For 14pt font, average char width ≈ 8.4pt - avg_char_width_pt = 8.4 # Approximate for 14pt font - chars_per_line = max(1, int(float(width) / avg_char_width_pt)) - - # Estimate lines needed - lines_needed = total_text_length / max(chars_per_line, 1) - - # Available lines based on height (line height ≈ 1.2 * font_size) - line_height_pt = 16.8 # Approximate for 14pt font with 1.2 spacing - available_lines = max(1, int(float(height) / line_height_pt)) - - if available_lines > 0 and lines_needed > available_lines: - # More aggressive scaling for long texts - # Calculate exact scale needed, then add 10% buffer - scale_needed = available_lines / lines_needed - font_size_multiplier = scale_needed * 0.9 # 10% buffer - # Allow scaling down to 50% for very long texts (minimum readable) - font_size_multiplier = max(0.5, min(1.0, font_size_multiplier)) - elif lines_needed <= available_lines * 0.7: - # If text is much shorter than available space, can use slightly larger font - font_size_multiplier = min(1.1, (available_lines / lines_needed) * 0.8) - except (ZeroDivisionError, ValueError, TypeError) as calc_error: - logger.debug(f"Font size calculation error: {str(calc_error)}") - # Fallback to default if calculation fails - font_size_multiplier = 1.0 - - textbox = slide.shapes.add_textbox(left, top, width, height) - text_frame = textbox.text_frame - text_frame.word_wrap = True - text_frame.auto_size = None # Disable auto-size for fixed frame - # Ensure text frame can display bullets - text_frame.margin_left = Pt(0) - text_frame.margin_right = Pt(0) - text_frame.margin_top = Pt(0) - text_frame.margin_bottom = Pt(0) - - # Pass font size multiplier to rendering methods - for section in text_sections: - self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier) - - except Exception as e: - logger.warning(f"Error rendering text sections in frame: {str(e)}") - @staticmethod def _isHorizontalRule(element: Dict[str, Any]) -> bool: """Detect markdown horizontal rules (---, ***, ___) that should be skipped on slides.""" @@ -1829,14 +1219,14 @@ JSON ONLY. NO OTHER TEXT.""" elif elementType == "paragraph": self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier) elif elementType in ("code_block", "code"): - self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier) + self._addCodeBlockToSlide(slide, element, styles, font_size_multiplier=font_size_multiplier) elif elementType == "extracted_text": content = element.get("content", "") if content: p = text_frame.add_paragraph() pStyle = styles.get("paragraph", {}) fSize = Pt(max(10, int(pStyle.get("font_size", 14) * font_size_multiplier))) - fColor = RGBColor(*self._getSafeColor(pStyle.get("color", (47, 47, 47)))) + fColor = RGBColor(*self._getSafeColor(pStyle.get("color", self._paragraphColor()))) self._addMarkdownInlineRuns(p, content, fontSize=fSize, fontColor=fColor) p.alignment = PP_ALIGN.LEFT elif elementType == "reference": @@ -1845,6 +1235,7 @@ JSON ONLY. NO OTHER TEXT.""" p.text = f"[Reference: {label}]" p.font.italic = True p.alignment = PP_ALIGN.LEFT + self._applyPrimaryFont(p.font) else: self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier) @@ -1865,9 +1256,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.info(f"Rendering {len(images)} image(s) in frame at ({left}, {top}), size ({width}, {height})") - # Calculate image dimensions within frame if len(images) == 1: - # Single image: fit to frame img = images[0] base64Data = img.get("base64Data") @@ -1878,13 +1267,10 @@ JSON ONLY. NO OTHER TEXT.""" # Clean base64 data (remove data URI prefix if present) if isinstance(base64Data, str): if base64Data.startswith("data:image/"): - # Extract base64 from data URI base64Data = base64Data.split(",", 1)[1] - # Remove any whitespace base64Data = base64Data.strip() try: - # Decode base64 imageBytes = base64.b64decode(base64Data, validate=True) if len(imageBytes) == 0: logger.error("Decoded image bytes are empty") @@ -1892,15 +1278,13 @@ JSON ONLY. NO OTHER TEXT.""" imageStream = io.BytesIO(imageBytes) - # Get image dimensions using PIL imgWidth, imgHeight = None, None try: from PIL import Image as PILImage pilImage = PILImage.open(imageStream) imgWidth, imgHeight = pilImage.size - imageStream.seek(0) # Reset stream for PowerPoint + imageStream.seek(0) - # Validate image dimensions - ensure they're reasonable if imgWidth <= 1 or imgHeight <= 1: logger.warning(f"Image has invalid dimensions: {imgWidth}x{imgHeight}, using default size") imgWidth, imgHeight = 800, 600 @@ -1909,43 +1293,34 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Image dimensions very small: {imgWidth}x{imgHeight}, may appear tiny") except ImportError: logger.warning("PIL not available, using default image size") - imgWidth, imgHeight = 800, 600 # Default dimensions + imgWidth, imgHeight = 800, 600 except Exception as pil_error: logger.warning(f"Error getting image dimensions with PIL: {str(pil_error)}, using default size") imgWidth, imgHeight = 800, 600 imageStream.seek(0) - # Ensure we have valid dimensions if not imgWidth or not imgHeight or imgWidth <= 1 or imgHeight <= 1: logger.warning("Invalid image dimensions, using default 800x600") imgWidth, imgHeight = 800, 600 - # Scale to fit frame while maintaining aspect ratio - # width and height parameters are already in Inches (from pptx.util.Inches) - # Convert PIL pixel dimensions to Inches (assuming 96 DPI for PIL images) imgWidthInches = Inches(imgWidth / 96.0) imgHeightInches = Inches(imgHeight / 96.0) - # Calculate scale to fit within frame - # Inches objects support division, result is a float try: scale_width = width / imgWidthInches if imgWidthInches > 0 else 1.0 scale_height = height / imgHeightInches if imgHeightInches > 0 else 1.0 - scale = min(scale_width, scale_height, 1.0) # Don't scale up, only down + scale = min(scale_width, scale_height, 1.0) finalWidth = imgWidthInches * scale finalHeight = imgHeightInches * scale - # Ensure minimum size (at least 1 inch) to prevent tiny rendering minSize = Inches(1) if finalWidth < minSize or finalHeight < minSize: - # Use minimum size while maintaining aspect ratio min_scale = max(minSize / imgWidthInches if imgWidthInches > 0 else 1.0, minSize / imgHeightInches if imgHeightInches > 0 else 1.0) finalWidth = max(minSize, imgWidthInches * min_scale) finalHeight = max(minSize, imgHeightInches * min_scale) - # Ensure we don't exceed frame bounds if finalWidth > width: finalWidth = width finalHeight = imgHeightInches * (width / imgWidthInches) if imgWidthInches > 0 else finalHeight @@ -1954,14 +1329,13 @@ JSON ONLY. NO OTHER TEXT.""" finalWidth = imgWidthInches * (height / imgHeightInches) if imgHeightInches > 0 else finalWidth except (ZeroDivisionError, TypeError, AttributeError) as calc_error: logger.warning(f"Error calculating image size: {str(calc_error)}, using frame size") - finalWidth = width * 0.9 # Use 90% of frame width - finalHeight = height * 0.9 # Use 90% of frame height + finalWidth = width * 0.9 + finalHeight = height * 0.9 # Center in frame frame_left = left + (width - finalWidth) / 2 frame_top = top + (height - finalHeight) / 2 - # Add image to slide imageStream.seek(0) slide.shapes.add_picture(imageStream, frame_left, frame_top, width=finalWidth, height=finalHeight) logger.info(f"Successfully added image to slide at ({frame_left}, {frame_top}), size ({finalWidth}, {finalHeight})") @@ -1969,13 +1343,23 @@ JSON ONLY. NO OTHER TEXT.""" # Add caption if available caption = img.get("caption") or img.get("altText") if caption and caption != "Image": + captionStyle = styles.get("caption", {}) captionTop = frame_top + finalHeight + Inches(0.1) captionBox = slide.shapes.add_textbox(left, captionTop, width, Inches(0.4)) captionFrame = captionBox.text_frame captionFrame.text = caption - captionFrame.paragraphs[0].font.size = Pt(10) - captionFrame.paragraphs[0].font.italic = True - captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + captionFrame.paragraphs[0].font.size = Pt(captionStyle.get("font_size", 10)) + captionFrame.paragraphs[0].font.italic = captionStyle.get("italic", True) + captionColor = self._getSafeColor(captionStyle.get("color", self._paragraphColor())) + captionFrame.paragraphs[0].font.color.rgb = RGBColor(*captionColor) + captionAlignVal = captionStyle.get("align", "center") + if captionAlignVal == "left": + captionFrame.paragraphs[0].alignment = PP_ALIGN.LEFT + elif captionAlignVal == "right": + captionFrame.paragraphs[0].alignment = PP_ALIGN.RIGHT + else: + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + self._applyPrimaryFont(captionFrame.paragraphs[0].font) except base64.binascii.Error as b64_error: logger.error(f"Invalid base64 data: {str(b64_error)}") except Exception as img_error: @@ -1993,7 +1377,6 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Image {idx} has no base64Data") continue - # Clean base64 data if isinstance(base64Data, str): if base64Data.startswith("data:image/"): base64Data = base64Data.split(",", 1)[1] @@ -2012,22 +1395,18 @@ JSON ONLY. NO OTHER TEXT.""" imageStream = io.BytesIO(imageBytes) - # Try to get dimensions for better scaling try: from PIL import Image as PILImage pilImage = PILImage.open(imageStream) imgW, imgH = pilImage.size - # Scale to fit grid cell while maintaining aspect ratio scale = min(imgWidth / (imgW * (72.0 / 96.0)), imgHeight / (imgH * (72.0 / 96.0)), 1.0) finalW = (imgW * (72.0 / 96.0)) * scale finalH = (imgH * (72.0 / 96.0)) * scale - # Center in grid cell cell_left = img_left + (imgWidth - finalW) / 2 cell_top = img_top + (imgHeight - finalH) / 2 imageStream.seek(0) slide.shapes.add_picture(imageStream, cell_left, cell_top, width=finalW, height=finalH) except (ImportError, Exception): - # Fallback: use grid cell size directly imageStream.seek(0) slide.shapes.add_picture(imageStream, img_left, img_top, width=imgWidth, height=imgHeight) diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py index 3c6fdd5e..0b544dd8 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py @@ -6,6 +6,7 @@ Excel renderer for report generation using openpyxl. from .documentRendererBaseTemplate import BaseRenderer from modules.datamodels.datamodelDocument import RenderedDocument +from modules.serviceCenter.services.serviceGeneration.styleDefaults import deepMerge from typing import Dict, Any, List, Optional import io import base64 @@ -128,176 +129,6 @@ class RendererXlsx(BaseRenderer): ) ] - def _generateExcel(self, content: str, title: str) -> str: - """Generate Excel content using openpyxl.""" - try: - # Create workbook - wb = Workbook() - - # Remove default sheet - wb.remove(wb.active) - - # Create sheets - summarySheet = wb.create_sheet("Summary", 0) - dataSheet = wb.create_sheet("Data", 1) - analysisSheet = wb.create_sheet("Analysis", 2) - - # Add content to sheets - self._populateSummarySheet(summarySheet, title, wb) - self._populateDataSheet(dataSheet, content) - self._populateAnalysisSheet(analysisSheet, content) - - # Ensure workbook has at least one sheet (Excel requirement) - if len(wb.worksheets) == 0: - wb.create_sheet("Sheet1") - - # Save to buffer with error handling - buffer = io.BytesIO() - try: - wb.save(buffer) - buffer.seek(0) - except Exception as save_error: - self.logger.error(f"Error saving Excel workbook: {str(save_error)}") - # Try to fix common issues and retry - try: - # Remove any invalid sheet names or empty sheets - for sheet in list(wb.worksheets): - if not sheet.title or len(sheet.title.strip()) == 0: - wb.remove(sheet) - # Ensure at least one sheet exists - if len(wb.worksheets) == 0: - wb.create_sheet("Sheet1") - # Retry save - buffer = io.BytesIO() - wb.save(buffer) - buffer.seek(0) - except Exception as retry_error: - self.logger.error(f"Retry save also failed: {str(retry_error)}") - raise Exception(f"Failed to save Excel workbook: {str(save_error)}") - - # Convert to base64 - excelBytes = buffer.getvalue() - excelBase64 = base64.b64encode(excelBytes).decode('utf-8') - - return excelBase64 - - except Exception as e: - self.logger.error(f"Error generating Excel: {str(e)}") - raise - - def _populateSummarySheet(self, sheet, title: str, wb: Workbook = None): - """Populate the summary sheet.""" - try: - # Title - sheet['A1'] = title - sheet['A1'].font = Font(size=16, bold=True) - sheet['A1'].alignment = Alignment(horizontal='left') - - # Generation info - sheet['A3'] = "Generated:" - sheet['B3'] = self._formatTimestamp() - sheet['A4'] = "Status:" - sheet['B4'] = "Generated Successfully" - - # Key metrics placeholder - sheet['A6'] = "Key Metrics:" - sheet['A6'].font = Font(bold=True) - sheet['A7'] = "Total Items:" - # Only add formula if Data sheet exists (check workbook sheets) - if wb and "Data" in [s.title for s in wb.worksheets]: - sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet - else: - sheet['B7'] = "N/A" # Data sheet not available - - # Auto-adjust column widths - sheet.column_dimensions['A'].width = 20 - sheet.column_dimensions['B'].width = 30 - - except Exception as e: - self.logger.warning(f"Could not populate summary sheet: {str(e)}") - - def _populateDataSheet(self, sheet, content: str): - """Populate the data sheet.""" - try: - # Headers - headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"] - for col, header in enumerate(headers, 1): - cell = sheet.cell(row=1, column=col, value=header) - cell.font = Font(bold=True) - cell.fill = PatternFill(start_color="FFCCCCCC", end_color="FFCCCCCC", fill_type="solid") - - # Process content - lines = content.split('\n') - row = 2 - - for line in lines: - line = line.strip() - if not line: - continue - - # Check for table data (lines with |) - if '|' in line: - cells = [cell.strip() for cell in line.split('|') if cell.strip()] - for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns - sheet.cell(row=row, column=col, value=cellData) - row += 1 - else: - # Regular content - sheet.cell(row=row, column=1, value=line) - row += 1 - - # Auto-adjust column widths - for col in range(1, 6): - sheet.column_dimensions[get_column_letter(col)].width = 20 - - except Exception as e: - self.logger.warning(f"Could not populate data sheet: {str(e)}") - - def _populateAnalysisSheet(self, sheet, content: str): - """Populate the analysis sheet.""" - try: - # Title - sheet['A1'] = "Analysis & Insights" - sheet['A1'].font = Font(size=14, bold=True) - - # Content analysis - lines = content.split('\n') - row = 3 - - sheet['A3'] = "Content Analysis:" - sheet['A3'].font = Font(bold=True) - row += 1 - - # Count different types of content - tableLines = sum(1 for line in lines if '|' in line) - listLines = sum(1 for line in lines if line.startswith(('- ', '* '))) - textLines = len(lines) - tableLines - listLines - - sheet[f'A{row}'] = f"Total Lines: {len(lines)}" - row += 1 - sheet[f'A{row}'] = f"Table Rows: {tableLines}" - row += 1 - sheet[f'A{row}'] = f"List Items: {listLines}" - row += 1 - sheet[f'A{row}'] = f"Text Lines: {textLines}" - row += 2 - - # Recommendations - sheet[f'A{row}'] = "Recommendations:" - sheet[f'A{row}'].font = Font(bold=True) - row += 1 - sheet[f'A{row}'] = "1. Review data accuracy" - row += 1 - sheet[f'A{row}'] = "2. Consider additional analysis" - row += 1 - sheet[f'A{row}'] = "3. Update regularly" - - # Auto-adjust column width - sheet.column_dimensions['A'].width = 30 - - except Exception as e: - self.logger.warning(f"Could not populate analysis sheet: {str(e)}") - async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: @@ -308,12 +139,9 @@ class RendererXlsx(BaseRenderer): # Store unified style for use by inline-run helpers self._unifiedStyle = style - # Get style set: prefer unified style, fall back to legacy approach - if style: - styles = self._convertUnifiedStyleToInternal(style) - styles = self._convertColorsFormat(styles) - else: - styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + # Convert unified style to internal format + styles = self._convertUnifiedStyleToInternal(style) + styles = self._convertColorsFormat(styles) # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) if not self._validateJsonStructure(jsonContent): @@ -380,109 +208,6 @@ class RendererXlsx(BaseRenderer): self.logger.error(f"Error generating Excel from JSON: {str(e)}") raise Exception(f"Excel generation failed: {str(e)}") - async def _getStyleSet(self, extractedContent: Dict[str, Any] = None, userPrompt: str = None, aiService=None, templateName: str = None) -> Dict[str, Any]: - """Get style set - use styles from document generation metadata if available, - otherwise enhance default styles with AI if userPrompt provided. - - WICHTIG: In a dynamic scalable AI system, styling should come from document generation, - not be generated separately by renderers. Only fall back to AI if styles not provided. - - Args: - extractedContent: Document content with metadata (may contain styles) - userPrompt: User's prompt (AI will detect style instructions in any language) - aiService: AI service (used only if styles not in metadata and userPrompt provided) - templateName: Name of template style set (None = default) - - Returns: - Dict with style definitions for all document styles - """ - # Get default style set - defaultStyleSet = self._getDefaultStyleSet() - - # FIRST: Check if styles are provided in document generation metadata (preferred approach) - if extractedContent: - metadata = extractedContent.get("metadata", {}) - if isinstance(metadata, dict): - styles = metadata.get("styles") - if styles and isinstance(styles, dict): - self.logger.debug("Using styles from document generation metadata") - enhancedStyleSet = self._convertColorsFormat(styles) - return self._validateStylesContrast(enhancedStyleSet) - - # FALLBACK: Enhance with AI if userPrompt provided (only if styles not in metadata) - if userPrompt and aiService: - self.logger.info(f"Styles not in metadata, enhancing with AI based on user prompt...") - enhancedStyleSet = await self._enhanceStylesWithAI(userPrompt, defaultStyleSet, aiService) - # Convert colors to Excel format after getting styles - enhancedStyleSet = self._convertColorsFormat(enhancedStyleSet) - return self._validateStylesContrast(enhancedStyleSet) - else: - # Use default styles only - return defaultStyleSet - - async def _enhanceStylesWithAI(self, userPrompt: str, defaultStyleSet: Dict[str, Any], aiService) -> Dict[str, Any]: - """Enhance default styles with AI based on user prompt.""" - try: - style_template = self._createAiStyleTemplate("xlsx", userPrompt, defaultStyleSet) - enhanced_styles = await self._getAiStylesWithExcelColors(aiService, style_template, defaultStyleSet) - return enhanced_styles - except Exception as e: - self.logger.warning(f"AI style enhancement failed: {str(e)}, using default styles") - return defaultStyleSet - - def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Validate and fix contrast issues in AI-generated styles.""" - try: - # Fix table header contrast - if "table_header" in styles: - header = styles["table_header"] - bgColor = header.get("background", "FFFFFFFF") - textColor = header.get("text_color", "FF000000") - - # Normalize colors (remove # if present, ensure aRGB format) - bgColor = self._normalizeColor(bgColor) - textColor = self._normalizeColor(textColor) - - # If both are white or both are dark, fix it - if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF": - header["background"] = "FF4F4F4F" - header["text_color"] = "FFFFFFFF" - elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000": - header["background"] = "FF4F4F4F" - header["text_color"] = "FFFFFFFF" - else: - # Ensure colors are in correct format - header["background"] = bgColor - header["text_color"] = textColor - - # Fix table cell contrast - if "table_cell" in styles: - cell = styles["table_cell"] - bgColor = cell.get("background", "FFFFFFFF") - textColor = cell.get("text_color", "FF000000") - - # Normalize colors (remove # if present, ensure aRGB format) - bgColor = self._normalizeColor(bgColor) - textColor = self._normalizeColor(textColor) - - # If both are white or both are dark, fix it - if bgColor.upper() == "FFFFFFFF" and textColor.upper() == "FFFFFFFF": - cell["background"] = "FFFFFFFF" - cell["text_color"] = "FF2F2F2F" - elif bgColor.upper() == "FF000000" and textColor.upper() == "FF000000": - cell["background"] = "FFFFFFFF" - cell["text_color"] = "FF2F2F2F" - else: - # Ensure colors are in correct format - cell["background"] = bgColor - cell["text_color"] = textColor - - return styles - - except Exception as e: - self.logger.warning(f"Style validation failed: {str(e)}") - return self._getDefaultStyleSet() - def _normalizeColor(self, colorValue: str) -> str: """Normalize color to aRGB format without # prefix.""" if not isinstance(colorValue, str): @@ -506,77 +231,10 @@ class RendererXlsx(BaseRenderer): # Unexpected format, return default black return "FF000000" - def _getDefaultStyleSet(self) -> Dict[str, Any]: - """Default Excel style set - used when no style instructions present.""" - return { - "title": {"font_size": 16, "color": "FF1F4E79", "bold": True, "align": "left"}, - "heading": {"font_size": 14, "color": "FF2F2F2F", "bold": True, "align": "left"}, - "table_header": {"background": "FF4F4F4F", "text_color": "FFFFFFFF", "bold": True, "align": "center"}, - "table_cell": {"background": "FFFFFFFF", "text_color": "FF2F2F2F", "bold": False, "align": "left"}, - "bullet_list": {"font_size": 11, "color": "FF2F2F2F", "indent": 2}, - "paragraph": {"font_size": 11, "color": "FF2F2F2F", "bold": False, "align": "left"}, - "code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"} - } - def _renderInlineRuns(self, runs: list) -> str: """Flatten inline runs to plain text for Excel cells.""" return "".join(r.get("value", "") for r in runs) - async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: - """Get AI styles with proper Excel color conversion.""" - if not aiService: - return defaultStyles - - try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - - requestOptions = AiCallOptions() - requestOptions.operationType = OperationTypeEnum.DATA_GENERATE - - request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions) - response = await aiService.callAi(request) - - import json - import re - - # Clean and parse JSON - result = response.content.strip() if response and response.content else "" - - # Check if result is empty - if not result: - self.logger.warning("AI styling returned empty response, using defaults") - return defaultStyles - - # Extract JSON from markdown if present - json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if json_match: - result = json_match.group(1).strip() - self.services.utils.debugLogToFile(f"EXTRACTED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER") - elif result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - self.services.utils.debugLogToFile(f"CLEANED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER") - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - self.services.utils.debugLogToFile(f"CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...", "EXCEL_RENDERER") - - # Try to parse JSON - try: - styles = json.loads(result) - except json.JSONDecodeError as json_error: - self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults") - return defaultStyles - - # Convert colors to Excel aRGB format - styles = self._convertColorsFormat(styles) - - return styles - - except Exception as e: - self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return defaultStyles - def _getSafeAlignment(self, alignValue: Any) -> str: """Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'.""" if not alignValue: @@ -627,15 +285,27 @@ class RendererXlsx(BaseRenderer): except Exception: return default + @staticmethod + def _looksLikeColor(value: str) -> bool: + """Return True if *value* looks like a hex color (e.g. ``#e1e4e8`` or ``FF24292E``).""" + raw = value.lstrip('#') + if len(raw) not in (3, 6, 8): + return False + return all(c in '0123456789abcdefABCDEF' for c in raw) + def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: - """Convert hex colors to aRGB format for Excel compatibility (without # prefix).""" + """Convert hex colors to aRGB format for Excel compatibility (without # prefix). + + Only touches values that actually look like hex colors so that non-color + strings (font names, border style keywords, bullet chars, etc.) are + preserved intact. + """ try: self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER") for styleName, styleConfig in styles.items(): if isinstance(styleConfig, dict): for prop, value in styleConfig.items(): - if isinstance(value, str): - # Normalize color to aRGB format without # prefix + if isinstance(value, str) and self._looksLikeColor(value): styles[styleName][prop] = self._normalizeColor(value) return styles except Exception as e: @@ -789,199 +459,6 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate Excel sheets: {str(e)}") - def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str): - """Populate a sheet with a single table section.""" - try: - # Sheet title - sheet['A1'] = sheetTitle - title_style = styles.get("title", {}) - sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79"))) - sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left"))) - - # Get table data from elements (canonical JSON format) - elements = section.get("elements", []) - if elements and isinstance(elements, list) and len(elements) > 0: - table_element = elements[0] - # Extract from nested content structure - content = table_element.get("content", {}) - if not isinstance(content, dict): - headers = [] - rows = [] - else: - headers = content.get("headers") or [] - rows = content.get("rows") or [] - # Ensure headers and rows are lists - if not isinstance(headers, list): - headers = [] - if not isinstance(rows, list): - rows = [] - else: - headers = [] - rows = [] - - if not headers and not rows: - sheet['A3'] = "No table data available" - return - - # Add headers - header_style = styles.get("table_header", {}) - for col, header in enumerate(headers, 1): - cell = sheet.cell(row=3, column=col, value=header) - if header_style.get("bold"): - cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000"))) - if header_style.get("background"): - cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid") - - # Add rows - handle both array format and cells object format - cell_style = styles.get("table_cell", {}) - header_count = len(headers) - for row_idx, row_data in enumerate(rows, 4): - # Handle different row formats - if isinstance(row_data, list): - # Array format: [value1, value2, ...] - cell_values = row_data - elif isinstance(row_data, dict) and "cells" in row_data: - # Cells object format: {"cells": [{"value": ...}, ...]} - cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] - else: - # Unknown format, skip - continue - - # Validate row column count matches headers - pad or truncate if needed - if len(cell_values) < header_count: - # Pad with empty strings if row has fewer columns - cell_values.extend([""] * (header_count - len(cell_values))) - elif len(cell_values) > header_count: - # Truncate if row has more columns than headers - cell_values = cell_values[:header_count] - - for col_idx, cell_value in enumerate(cell_values, 1): - # Extract value if it's a dict with "value" key - if isinstance(cell_value, dict): - actual_value = cell_value.get("value", "") - else: - actual_value = cell_value - - cell = sheet.cell(row=row_idx, column=col_idx, value=actual_value) - if cell_style.get("text_color"): - cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) - - # Auto-adjust column widths - for col in range(1, len(headers) + 1): - sheet.column_dimensions[get_column_letter(col)].width = 20 - - except Exception as e: - self.logger.warning(f"Could not populate table sheet: {str(e)}") - - def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]): - """Populate the main sheet with document overview and all content.""" - try: - # Document title - use documents[].title as primary source, fallback to metadata.title - documents = jsonContent.get("documents", []) - if documents and isinstance(documents[0], dict) and documents[0].get("title"): - documentTitle = documents[0].get("title") - else: - documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report") - sheet['A1'] = documentTitle - - # Safety check for title style - title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"}) - try: - safe_color = self._getSafeColor(title_style["color"]) - sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color) - sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"])) - except Exception as font_error: - # Try with a safe color - sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000") - sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"])) - - # Generation info - sheet['A3'] = "Generated:" - sheet['B3'] = self._formatTimestamp() - sheet['A4'] = "Status:" - sheet['B4'] = "Generated Successfully" - - # Document metadata - metadata = jsonContent.get("metadata", {}) - if metadata: - sheet['A6'] = "Document Information:" - sheet['A6'].font = Font(bold=True) - - row = 7 - for key, value in metadata.items(): - if key != "title": - sheet[f'A{row}'] = f"{key.title()}:" - sheet[f'B{row}'] = str(value) - row += 1 - - # Content overview - sections = self._extractSections(jsonContent) - sheet[f'A{row + 1}'] = "Content Overview:" - sheet[f'A{row + 1}'].font = Font(bold=True) - - row += 2 - sheet[f'A{row}'] = f"Total Sections: {len(sections)}" - - # Count different content types - content_types = {} - for section in sections: - content_type = section.get("content_type", "unknown") - content_types[content_type] = content_types.get(content_type, 0) + 1 - - for content_type, count in content_types.items(): - row += 1 - sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}" - - # Add all content to this sheet - row += 2 - for section in sections: - row = self._addSectionToSheet(sheet, section, styles, row) - row += 1 # Empty row between sections - - # Auto-adjust column widths - sheet.column_dimensions['A'].width = 20 - sheet.column_dimensions['B'].width = 30 - - except Exception as e: - self.logger.warning(f"Could not populate main sheet: {str(e)}") - - def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]): - """Populate additional sheets based on content types.""" - try: - sections = self._extractSections(jsonContent) - - for sheetName in sheetNames: - if sheetName not in sheets: - continue - - sheet = sheets[sheetName] - sheetTitle = sheetName.title() - sheet['A1'] = sheetTitle - sheet['A1'].font = Font(size=16, bold=True) - - row = 3 - - # Filter sections by content type - if sheetName == "tables": - filtered_sections = [s for s in sections if s.get("content_type") == "table"] - elif sheetName == "lists": - filtered_sections = [s for s in sections if s.get("content_type") == "list"] - elif sheetName == "text": - filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]] - else: - filtered_sections = sections - - for section in filtered_sections: - row = self._addSectionToSheet(sheet, section, styles, row) - row += 1 # Empty row between sections - - # Auto-adjust column widths - for col in range(1, 6): - sheet.column_dimensions[get_column_letter(col)].width = 20 - - except Exception as e: - self.logger.warning(f"Could not populate content type sheets: {str(e)}") - def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a section to a sheet and return the next row.""" try: @@ -1161,20 +638,21 @@ class RendererXlsx(BaseRenderer): text = text[:32764] + "..." return text + def _buildTableBorder(self, borderStyle: str, borderColor: str) -> Border: + """Build an openpyxl ``Border`` matching the requested *borderStyle*.""" + if borderStyle == "none": + noSide = Side(style=None) + return Border(left=noSide, right=noSide, top=noSide, bottom=noSide) + if borderStyle == "horizontal": + hSide = Side(style="thin", color=borderColor) + noSide = Side(style=None) + return Border(left=noSide, right=noSide, top=hSide, bottom=hSide) + thinSide = Side(style="thin", color=borderColor) + return Border(left=thinSide, right=thinSide, top=thinSide, bottom=thinSide) + def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: - """ - Add a table element to Excel sheet with proper formatting and borders. - - PERFORMANCE OPTIMIZATIONS: - 1. Pre-calculated style objects (Font, PatternFill, Alignment) to avoid repeated creation - 2. Optimized _sanitizeCellValue() with regex pre-checks for numbers and dates - 3. Batch cell operations where possible - 4. Reduced exception handling overhead - - Expected performance: 10-30x faster for large tables compared to unoptimized version. - """ + """Add a table element to Excel sheet with styling, borders, banding and alignment.""" try: - # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return startRow @@ -1182,7 +660,6 @@ class RendererXlsx(BaseRenderer): headers = content.get("headers", []) rows = content.get("rows", []) - # Ensure headers and rows are lists if not isinstance(headers, list): headers = [] if not isinstance(rows, list): @@ -1191,131 +668,102 @@ class RendererXlsx(BaseRenderer): if not headers and not rows: return startRow - # Define border style - thin_border = Border( - left=Side(style='thin'), - right=Side(style='thin'), - top=Side(style='thin'), - bottom=Side(style='thin') + # --- per-table style override merge --- + tableStyleOverride = content.get("tableStyle", {}) + mergedStyles = deepMerge(styles, tableStyleOverride) if tableStyleOverride else styles + + # --- border --- + tableBorderCfg = mergedStyles.get("table_border", {}) + borderColor = self._getSafeColor(tableBorderCfg.get("color", "FF000000")) + borderStyleName = tableBorderCfg.get("style", "grid") + tableBorder = self._buildTableBorder(borderStyleName, borderColor) + + # --- header style --- + headerStyle = mergedStyles.get("table_header", {}) + primaryFont = mergedStyles.get("fonts", {}).get("primary") + headerFontColor = self._getSafeColor(headerStyle.get("text_color", "FF000000")) + headerFontSize = headerStyle.get("font_size") + headerFont = Font(name=primaryFont, bold=headerStyle.get("bold", True), + size=headerFontSize, color=headerFontColor) + headerFill = None + if headerStyle.get("background"): + hdrBg = self._getSafeColor(headerStyle["background"]) + headerFill = PatternFill(start_color=hdrBg, end_color=hdrBg, fill_type="solid") + headerAlignment = Alignment( + horizontal=self._getSafeAlignment(headerStyle.get("align", "left")), + vertical="center" ) headerRow = startRow - header_style = styles.get("table_header", {}) - - # Pre-calculate and cache style objects to avoid repeated parsing - header_font_color = self._getSafeColor(header_style.get("text_color", "FF000000")) - header_font = Font(bold=header_style.get("bold", True), color=header_font_color) - header_bg_color = None - header_fill = None - if header_style.get("background"): - header_bg_color = self._getSafeColor(header_style["background"]) - header_fill = PatternFill(start_color=header_bg_color, end_color=header_bg_color, fill_type="solid") - header_alignment = Alignment( - horizontal=self._getSafeAlignment(header_style.get("align", "left")), - vertical="center" - ) - - # Add headers with formatting - OPTIMIZED: use cached style objects for col, header in enumerate(headers, 1): runs = self._inlineRunsForCell(header) headerText = self._renderInlineRuns(runs) - sanitized_header = self._sanitizeCellValue(headerText) - cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) - - # Apply styling with fallbacks - use pre-calculated objects - try: - cell.font = header_font - except Exception: - try: - cell.font = Font(bold=True, color=self._getSafeColor("FF000000")) - except Exception: - pass - - try: - if header_fill: - cell.fill = header_fill - except Exception: - pass - - try: - cell.alignment = header_alignment - except Exception: - try: - cell.alignment = Alignment(horizontal="left", vertical="center") - except Exception: - pass - - try: - cell.border = thin_border - except Exception: - pass + cell = sheet.cell(row=headerRow, column=col, value=self._sanitizeCellValue(headerText)) + cell.font = headerFont + if headerFill: + cell.fill = headerFill + cell.alignment = headerAlignment + cell.border = tableBorder startRow += 1 - # Add rows with formatting - OPTIMIZED: pre-calculate style objects - cell_style = styles.get("table_cell", {}) - header_count = len(headers) + # --- cell style --- + cellStyle = mergedStyles.get("table_cell", {}) + headerCount = len(headers) + cellTextColor = self._getSafeColor(cellStyle.get("text_color")) if cellStyle.get("text_color") else None + cellFontSize = cellStyle.get("font_size") + cellFont = Font(name=primaryFont, size=cellFontSize, + color=cellTextColor) if cellTextColor else None - # Pre-calculate and cache style objects to avoid repeated parsing - cell_text_color = None - cell_font = None - if cell_style.get("text_color"): - cell_text_color = self._getSafeColor(cell_style["text_color"]) - cell_font = Font(color=cell_text_color) - cell_alignment = Alignment( - horizontal=self._getSafeAlignment(cell_style.get("align", "left")), - vertical="center" - ) + # --- banding --- + bandingCfg = mergedStyles.get("table_banding", {}) + bandingEnabled = bandingCfg.get("enabled", True) + evenFill = None + oddFill = None + if bandingEnabled: + evenColor = self._getSafeColor(bandingCfg.get("even", "FFF6F8FA")) + oddColor = self._getSafeColor(bandingCfg.get("odd", "FFFFFFFF")) + evenFill = PatternFill(start_color=evenColor, end_color=evenColor, fill_type="solid") + oddFill = PatternFill(start_color=oddColor, end_color=oddColor, fill_type="solid") - for row_data in rows: - # Handle different row formats - if isinstance(row_data, list): - cell_values = row_data - elif isinstance(row_data, dict) and "cells" in row_data: - cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] - else: - continue - - # Validate row column count matches headers - pad or truncate if needed - if len(cell_values) < header_count: - # Pad with empty strings if row has fewer columns - cell_values.extend([""] * (header_count - len(cell_values))) - elif len(cell_values) > header_count: - # Truncate if row has more columns than headers - cell_values = cell_values[:header_count] - - for col, cell_value in enumerate(cell_values, 1): - runs = self._inlineRunsForCell(cell_value) - cellText = self._renderInlineRuns(runs) - sanitized_value = self._sanitizeCellValue(cellText) - cell = sheet.cell(row=startRow, column=col, value=sanitized_value) - - # Apply styling with fallbacks - use pre-calculated objects - try: - if cell_font: - cell.font = cell_font - except Exception: - pass - - try: - cell.alignment = cell_alignment - except Exception: - try: - cell.alignment = Alignment(horizontal="left", vertical="center") - except Exception: - pass - - try: - cell.border = thin_border - except Exception: - pass - - startRow += 1 + # --- column alignments --- + colAlignments = self._inferColumnAlignments(headers, rows, mergedStyles) + + for dataRowIdx, rowData in enumerate(rows): + if isinstance(rowData, list): + cellValues = rowData + elif isinstance(rowData, dict) and "cells" in rowData: + cellValues = [cellObj.get("value", "") for cellObj in rowData.get("cells", [])] + else: + continue + + if len(cellValues) < headerCount: + cellValues.extend([""] * (headerCount - len(cellValues))) + elif len(cellValues) > headerCount: + cellValues = cellValues[:headerCount] + + for col, cellValue in enumerate(cellValues, 1): + runs = self._inlineRunsForCell(cellValue) + cellText = self._renderInlineRuns(runs) + cell = sheet.cell(row=startRow, column=col, value=self._sanitizeCellValue(cellText)) + + if cellFont: + cell.font = cellFont + + if bandingEnabled: + rowFill = evenFill if dataRowIdx % 2 == 0 else oddFill + if rowFill: + cell.fill = rowFill + + colAlign = colAlignments[col - 1] if col - 1 < len(colAlignments) else "left" + cell.alignment = Alignment(horizontal=colAlign, vertical="center") + cell.border = tableBorder + + startRow += 1 - # Auto-adjust column widths for col in range(1, len(headers) + 1): - column_letter = get_column_letter(col) - sheet.column_dimensions[column_letter].width = 20 + columnLetter = get_column_letter(col) + sheet.column_dimensions[columnLetter].width = 20 return startRow @@ -1334,12 +782,19 @@ class RendererXlsx(BaseRenderer): listItems = [] listStyle = styles.get("bullet_list", {}) + bulletChar = listStyle.get("bullet_char", "\u2022") + fontSize = listStyle.get("font_size", 11) + primaryFont = styles.get("fonts", {}).get("primary") + cellFont = Font( + name=primaryFont, + size=fontSize, + color=self._getSafeColor(listStyle.get("color")) + ) for item in listItems: runs = self._inlineRunsForListItem(item) text = self._renderInlineRuns(runs) - sheet.cell(row=startRow, column=1, value=f"\u2022 {text}") - if listStyle.get("color"): - sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"])) + cell = sheet.cell(row=startRow, column=1, value=f"{bulletChar} {text}") + cell.font = cellFont startRow += 1 return startRow @@ -1362,9 +817,10 @@ class RendererXlsx(BaseRenderer): if text: sheet.cell(row=startRow, column=1, value=text) - paragraph_style = styles.get("paragraph", {}) - if paragraph_style.get("color"): - sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"])) + paragraphStyle = styles.get("paragraph", {}) + primaryFont = styles.get("fonts", {}).get("primary") + if paragraphStyle.get("color"): + sheet.cell(row=startRow, column=1).font = Font(name=primaryFont, color=self._getSafeColor(paragraphStyle["color"])) startRow += 1 @@ -1387,15 +843,13 @@ class RendererXlsx(BaseRenderer): if text: sheet.cell(row=startRow, column=1, value=text) - heading_style = styles.get("heading", {}) - font_size = heading_style.get("font_size", 14) - if level > 1: - font_size = max(10, font_size - (level - 1) * 2) - + headingStyle = styles.get(f"heading{level}", styles.get("heading1", {})) + primaryFont = styles.get("fonts", {}).get("primary") sheet.cell(row=startRow, column=1).font = Font( - size=font_size, - bold=True, - color=self._getSafeColor(heading_style.get("color", "FF000000")) + name=primaryFont, + size=headingStyle.get("font_size", 14), + bold=headingStyle.get("bold", True), + color=self._getSafeColor(headingStyle.get("color")) ) startRow += 1 @@ -1506,37 +960,39 @@ class RendererXlsx(BaseRenderer): language = content.get("language", "") if code: - code_style = styles.get("code_block", {}) + codeStyle = styles.get("code_block", {}) - # Pre-calculate and cache style objects to avoid repeated parsing - code_font_name = code_style.get("font", "Courier New") - code_font_size = code_style.get("font_size", 10) - code_text_color = self._getSafeColor(code_style.get("color", "FF2F2F2F")) - code_font = Font(name=code_font_name, size=code_font_size, color=code_text_color) + codeFontName = codeStyle.get("font", styles.get("fonts", {}).get("monospace", "Consolas")) + codeFontSize = codeStyle.get("font_size", 10) + codeTextColor = self._getSafeColor(codeStyle.get("color", "FF2F2F2F")) + codeFont = Font(name=codeFontName, size=codeFontSize, color=codeTextColor) - code_bg_color = None - code_fill = None - if code_style.get("background"): - code_bg_color = self._getSafeColor(code_style["background"]) - code_fill = PatternFill(start_color=code_bg_color, end_color=code_bg_color, fill_type="solid") + codeFill = None + if codeStyle.get("background"): + codeBgColor = self._getSafeColor(codeStyle["background"]) + codeFill = PatternFill(start_color=codeBgColor, end_color=codeBgColor, fill_type="solid") + + codeBorder = None + if codeStyle.get("border_color"): + codeBorderColor = self._getSafeColor(codeStyle["border_color"]) + codeSide = Side(style="thin", color=codeBorderColor) + codeBorder = Border(left=codeSide, right=codeSide, top=codeSide, bottom=codeSide) - # Add language label if present if language: langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):") - langCell.font = Font(bold=True, color=code_text_color) + langCell.font = Font(bold=True, color=codeTextColor) startRow += 1 - # Split code into lines and add each line - use cached style objects - code_lines = code.split('\n') - for line in code_lines: + codeLines = code.split('\n') + for line in codeLines: codeCell = sheet.cell(row=startRow, column=1, value=line) - codeCell.font = code_font - # Set background color if specified - if code_fill: - codeCell.fill = code_fill + codeCell.font = codeFont + if codeFill: + codeCell.fill = codeFill + if codeBorder: + codeCell.border = codeBorder startRow += 1 - # Add spacing after code block startRow += 1 return startRow diff --git a/modules/serviceCenter/services/serviceGeneration/styleDefaults.py b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py index e6447c73..8d60c282 100644 --- a/modules/serviceCenter/services/serviceGeneration/styleDefaults.py +++ b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py @@ -11,39 +11,56 @@ DEFAULT_STYLE: Dict[str, Any] = { "monospace": "Consolas", }, "colors": { - "primary": "#1F3864", - "secondary": "#2C3E50", - "accent": "#2980B9", + "primary": "#24292e", + "secondary": "#586069", + "accent": "#0366d6", "background": "#FFFFFF", }, "documentTitle": { "sizePt": 28, "weight": "bold", - "color": "#1F3864", + "color": "#24292e", "spaceBeforePt": 0, "spaceAfterPt": 18, "align": "center", }, "headings": { - "h1": {"sizePt": 22, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 22, "spaceAfterPt": 8}, - "h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 20, "spaceAfterPt": 6}, - "h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 16, "spaceAfterPt": 4}, - "h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 12, "spaceAfterPt": 3}, + "h1": {"sizePt": 22, "weight": "bold", "color": "#24292e", "spaceBeforePt": 24, "spaceAfterPt": 8}, + "h2": {"sizePt": 18, "weight": "bold", "color": "#24292e", "spaceBeforePt": 20, "spaceAfterPt": 6}, + "h3": {"sizePt": 14, "weight": "bold", "color": "#586069", "spaceBeforePt": 16, "spaceAfterPt": 4}, + "h4": {"sizePt": 12, "weight": "bold", "color": "#586069", "spaceBeforePt": 12, "spaceAfterPt": 3}, }, - "paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"}, + "paragraph": {"sizePt": 11, "lineSpacing": 1.5, "color": "#24292e", "align": "left"}, "table": { - "headerBg": "#1F3864", - "headerFg": "#FFFFFF", + "headerBg": "#f6f8fa", + "headerFg": "#24292e", "headerSizePt": 10, "bodySizePt": 10, - "rowBandingEven": "#F2F6FC", + "rowBandingEven": "#f6f8fa", "rowBandingOdd": "#FFFFFF", - "borderColor": "#CBD5E1", + "borderColor": "#e1e4e8", "borderWidthPt": 0.5, + "borderStyle": "grid", + "bandingEnabled": True, + "cellPaddingPt": 4, }, "list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11}, "image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"}, - "codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"}, + "codeBlock": {"fontSizePt": 9, "background": "#f6f8fa", "borderColor": "#e1e4e8"}, + "coverPage": { + "titleSizePt": 28, + "subtitleSizePt": 16, + "authorSizePt": 12, + "dateSizePt": 12, + "titleColor": "#24292e", + "subtitleColor": "#586069", + }, + "caption": { + "sizePt": 10, + "color": "#586069", + "italic": True, + "align": "center", + }, "page": { "format": "A4", "marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60}, @@ -57,98 +74,9 @@ DEFAULT_STYLE: Dict[str, Any] = { } -# ------------------------------------------------------------------ -# Theme presets (A3): named, purpose-specific style overrides that are -# deep-merged onto DEFAULT_STYLE. A preset only declares the keys it changes; -# everything else inherits the default. Explicit per-call `style` overrides -# always win over the preset. -# ------------------------------------------------------------------ -THEME_PRESETS: Dict[str, Dict[str, Any]] = { - # "general" intentionally empty -> identical to DEFAULT_STYLE. - "general": {}, - "finance": { - "fonts": {"primary": "Calibri"}, - "colors": {"primary": "#0B3D2E", "secondary": "#14532D", "accent": "#047857"}, - "documentTitle": {"color": "#0B3D2E", "align": "left"}, - "headings": { - "h1": {"color": "#0B3D2E"}, - "h2": {"color": "#0B3D2E"}, - "h3": {"color": "#14532D"}, - "h4": {"color": "#14532D"}, - }, - "table": {"headerBg": "#0B3D2E", "rowBandingEven": "#ECFDF5"}, - }, - "legal": { - # Serif, sober, single-column, justified body, no logo banner. - "fonts": {"primary": "Times New Roman"}, - "colors": {"primary": "#1A1A1A", "secondary": "#333333", "accent": "#5A5A5A"}, - "documentTitle": {"color": "#1A1A1A", "align": "center", "sizePt": 20}, - "headings": { - "h1": {"color": "#1A1A1A", "sizePt": 16}, - "h2": {"color": "#1A1A1A", "sizePt": 14}, - "h3": {"color": "#333333", "sizePt": 12}, - "h4": {"color": "#333333", "sizePt": 11}, - }, - "paragraph": {"sizePt": 11, "lineSpacing": 1.5, "color": "#1A1A1A", "align": "justify"}, - "table": {"headerBg": "#333333", "rowBandingEven": "#F5F5F5", "borderColor": "#999999"}, - "page": {"showPageNumbers": True}, - }, - "technical": { - "fonts": {"primary": "Arial", "monospace": "Consolas"}, - "colors": {"primary": "#0F172A", "secondary": "#1E293B", "accent": "#2563EB"}, - "documentTitle": {"color": "#0F172A", "align": "left"}, - "headings": { - "h1": {"color": "#0F172A"}, - "h2": {"color": "#1E293B"}, - "h3": {"color": "#1E293B"}, - "h4": {"color": "#334155"}, - }, - "paragraph": {"sizePt": 10, "lineSpacing": 1.2}, - "codeBlock": {"fontSizePt": 9, "background": "#0F172A"}, - "table": {"headerBg": "#1E293B", "rowBandingEven": "#EEF2FF"}, - }, - "hr": { - "fonts": {"primary": "Calibri"}, - "colors": {"primary": "#5B21B6", "secondary": "#6D28D9", "accent": "#9333EA"}, - "documentTitle": {"color": "#5B21B6", "align": "center"}, - "headings": { - "h1": {"color": "#5B21B6"}, - "h2": {"color": "#6D28D9"}, - "h3": {"color": "#7C3AED"}, - "h4": {"color": "#7C3AED"}, - }, - "table": {"headerBg": "#5B21B6", "rowBandingEven": "#F5F3FF"}, - }, - "marketing": { - # Bold, image-friendly, generous spacing, larger title. - "fonts": {"primary": "Verdana"}, - "colors": {"primary": "#BE123C", "secondary": "#E11D48", "accent": "#F59E0B"}, - "documentTitle": {"color": "#BE123C", "sizePt": 34, "align": "center", "spaceAfterPt": 24}, - "headings": { - "h1": {"color": "#BE123C", "sizePt": 24}, - "h2": {"color": "#E11D48", "sizePt": 19}, - "h3": {"color": "#E11D48", "sizePt": 15}, - "h4": {"color": "#9F1239", "sizePt": 13}, - }, - "paragraph": {"sizePt": 12, "lineSpacing": 1.3}, - "image": {"defaultWidthPt": 540, "maxWidthPt": 900, "alignment": "center"}, - "table": {"headerBg": "#BE123C", "rowBandingEven": "#FFF1F2"}, - }, -} -def resolveTheme(themeName: str | None) -> Dict[str, Any]: - """Return the partial style override for a named theme preset. - - Unknown / empty names fall back to ``{}`` (i.e. plain DEFAULT_STYLE). - The lookup is case-insensitive. - """ - if not themeName: - return {} - return dict(THEME_PRESETS.get(str(themeName).strip().lower(), {})) - - -def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: +def deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: """Recursively merge override into base. Both dicts left unchanged; returns new dict.""" result = {} for key in base: @@ -156,7 +84,7 @@ def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any] baseVal = base[key] overVal = override[key] if isinstance(baseVal, dict) and isinstance(overVal, dict): - result[key] = _deepMerge(baseVal, overVal) + result[key] = deepMerge(baseVal, overVal) else: result[key] = overVal else: @@ -167,17 +95,15 @@ def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any] return result -def resolveStyle(agentStyle: dict | None, documentTheme: str | None = None) -> Dict[str, Any]: - """Resolve the effective style: ``DEFAULT_STYLE <- themePreset <- agentStyle``. +def resolveStyle(agentStyle: dict | None = None) -> Dict[str, Any]: + """Resolve the effective style: ``DEFAULT_STYLE <- agentStyle``. - Precedence (lowest to highest): platform defaults, the named ``documentTheme`` - preset, then any explicit per-call ``agentStyle`` override. With no theme and - no override this returns plain :data:`DEFAULT_STYLE`. + Precedence (lowest to highest): platform defaults, then any explicit + per-call ``agentStyle`` override. With no override this returns plain + :data:`DEFAULT_STYLE`. Context-aware styling is handled by the AI + enhancement step in ``mainServiceGeneration.renderReport``. """ resolved = dict(DEFAULT_STYLE) - themeOverride = resolveTheme(documentTheme) - if themeOverride: - resolved = _deepMerge(resolved, themeOverride) if agentStyle: - resolved = _deepMerge(resolved, agentStyle) + resolved = deepMerge(resolved, agentStyle) return resolved diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 42962ad7..5a1ff0eb 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -22,7 +22,6 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: return ActionResult.isFailure(error="prompt is required") documentType = parameters.get("documentType") - documentTheme = parameters.get("documentTheme") or None # Prefer explicit outputFormat (flow UI); resultType remains for legacy / API callers. resultType = parameters.get("outputFormat") or parameters.get("resultType") if isinstance(resultType, str): @@ -83,8 +82,7 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: outputFormat=resultType, # Can be None - AI determines from prompt title=title, parentOperationId=parentOperationId, - generationIntent="document", # NEW: Explicit intent, skips detection - documentTheme=documentTheme # Named style preset for the renderer + generationIntent="document" # NEW: Explicit intent, skips detection ) # Convert AiResponse to ActionResult diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 46aac70d..62955b12 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -252,32 +252,37 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: output_format_for_call = output_extension.replace('.', '') if output_extension else (output_format or 'txt') # Simple mode: fast path without document generation pipeline + # Uses the same extraction + chunking pipeline as full mode to avoid oversized prompts if simpleMode: self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI (simple mode)") - context_parts = [] - paramContext = parameters.get("context") # already serialized above + simpleParts: Optional[List[ContentPart]] = contentParts + if not simpleParts and documentList and len(documentList.references) > 0: + from modules.datamodels.datamodelDocref import DocumentItemReference + fileIdRefs = [r for r in documentList.references if isinstance(r, DocumentItemReference)] + if fileIdRefs: + simpleParts = _resolve_file_refs_to_content_parts(self.services, fileIdRefs) + if not simpleParts: + try: + documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) + simpleParts = _action_docs_to_content_parts(self.services, [ + {"documentData": self.services.interfaceDbComponent.getFileData(doc.fileId), + "documentName": getattr(doc, 'fileName', ''), + "mimeType": getattr(doc, 'mimeType', 'application/octet-stream')} + for doc in documents if hasattr(doc, 'fileId') and doc.fileId + ]) + except Exception as e: + logger.warning(f"Error extracting content parts in simple mode: {e}") + + paramContext = parameters.get("context") + simplePrompt = aiPrompt if paramContext and isinstance(paramContext, str) and paramContext.strip(): - context_parts.append(paramContext.strip()) - if documentList and len(documentList.references) > 0: - try: - documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) - for doc in documents: - if hasattr(doc, 'fileId') and doc.fileId: - fileData = self.services.interfaceDbComponent.getFileData(doc.fileId) - if fileData: - if isinstance(fileData, bytes): - doc_text = fileData.decode('utf-8', errors='ignore') - else: - doc_text = str(fileData) - context_parts.append(doc_text) - except Exception as e: - logger.warning(f"Error extracting context from documents in simple mode: {e}") - context_text = "\n\n".join(context_parts) if context_parts else "" - + simplePrompt = f"{aiPrompt}\n\n--- DATA CONTEXT ---\n{paramContext.strip()}" + request = AiCallRequest( - prompt=aiPrompt, - context=context_text if context_text else None, + prompt=simplePrompt, + contentParts=simpleParts if simpleParts else None, + context=None, options=AiCallOptions( resultFormat=output_format_for_call, operationType=OperationTypeEnum.DATA_ANALYSE, diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index aacfacaa..55c9a40a 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -85,15 +85,6 @@ class MethodAi(MethodBase): default="", description="Additional context data (string or upstream-bound dict/list, e.g. accounting data) appended to the prompt. Non-string values are JSON-serialized." ), - "documentTheme": WorkflowActionParameter( - name="documentTheme", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"], - required=False, - default="general", - description="Named style preset for the document renderer (general/finance/legal/technical/hr/marketing). The agent forwards it to the renderDocument tool's documentTheme." - ), "resultType": WorkflowActionParameter( name="resultType", type="str", @@ -385,15 +376,6 @@ class MethodAi(MethodBase): required=False, description="Type of document (content hint for the model); used as title fallback when title is empty." ), - "documentTheme": WorkflowActionParameter( - name="documentTheme", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["general", "finance", "legal", "technical", "hr", "marketing"], - required=False, - default="general", - description="Named style preset applied by the renderer (colors, fonts, spacing): general, finance, legal, technical, hr, marketing." - ), "resultType": WorkflowActionParameter( name="resultType", type="str", -- 2.45.2 From 60bb771158099e7a1e1bb9577b90ffc7bbec0b8b Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 3 Jun 2026 17:02:18 +0200 Subject: [PATCH 2/3] fixes doc generation and renderers 2 --- .../serviceGeneration/test_style_resolver.py | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/tests/serviceGeneration/test_style_resolver.py b/tests/serviceGeneration/test_style_resolver.py index 23027efe..e7d629cd 100644 --- a/tests/serviceGeneration/test_style_resolver.py +++ b/tests/serviceGeneration/test_style_resolver.py @@ -3,9 +3,7 @@ import pytest from modules.serviceCenter.services.serviceGeneration.styleDefaults import ( resolveStyle, - resolveTheme, DEFAULT_STYLE, - THEME_PRESETS, ) @@ -49,45 +47,3 @@ def test_override_document_title_partial_merge(): assert result["documentTitle"]["sizePt"] == 32 assert result["documentTitle"]["align"] == "center" assert result["headings"]["h1"]["sizePt"] == DEFAULT_STYLE["headings"]["h1"]["sizePt"] - - -# ── Theme presets (A3) ───────────────────────────────────────────── - -def test_resolve_theme_unknown_is_empty(): - assert resolveTheme(None) == {} - assert resolveTheme("does-not-exist") == {} - - -def test_resolve_theme_case_insensitive(): - assert resolveTheme("FINANCE") == THEME_PRESETS["finance"] - - -def test_general_theme_equals_defaults(): - assert resolveStyle(None, "general") == DEFAULT_STYLE - - -def test_theme_applies_preset_over_defaults(): - result = resolveStyle(None, "legal") - # legal preset changes the primary font to a serif and justifies body text - assert result["fonts"]["primary"] == "Times New Roman" - assert result["paragraph"]["align"] == "justify" - # untouched keys still come from DEFAULT_STYLE - assert result["page"]["format"] == DEFAULT_STYLE["page"]["format"] - - -def test_explicit_style_overrides_theme(): - # theme sets finance green; explicit style must win - result = resolveStyle({"colors": {"primary": "#FF0000"}}, "finance") - assert result["colors"]["primary"] == "#FF0000" - # non-overridden theme key still applies - assert result["table"]["headerBg"] == THEME_PRESETS["finance"]["table"]["headerBg"] - - -def test_marketing_theme_enlarges_title_and_images(): - result = resolveStyle(None, "marketing") - assert result["documentTitle"]["sizePt"] == 34 - assert result["image"]["defaultWidthPt"] == 540 - - -def test_unknown_theme_falls_back_to_defaults(): - assert resolveStyle(None, "rainbow") == DEFAULT_STYLE -- 2.45.2 From b7503e0272162bd7a0710bfb9b268e5d21bf3d84 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 3 Jun 2026 17:15:20 +0200 Subject: [PATCH 3/3] fixes doc generation and renderers 3 --- .../serviceGeneration/renderers/rendererDocx.py | 4 +++- .../serviceGeneration/renderers/rendererPdf.py | 10 ++++------ .../serviceGeneration/renderers/rendererPptx.py | 3 ++- .../serviceGeneration/renderers/rendererXlsx.py | 4 +++- tests/unit/services/test_renderer_pdf_smoke.py | 3 ++- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py index 9d7b24ff..28e6fd65 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py @@ -120,9 +120,11 @@ class RendererDocx(BaseRenderer): doc = Document() self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s") - # Apply unified style (always provided by mainServiceGeneration.renderReport) style_start = time.time() self.logger.debug("_generateDocxFromJson: About to get style set") + if not unifiedStyle: + from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle + unifiedStyle = resolveStyle(None) styleSet = self._convertUnifiedStyleToInternal(unifiedStyle) self._unifiedStyle = unifiedStyle self._styleSet = styleSet diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py index fc6bd51b..425da644 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py @@ -229,11 +229,9 @@ class RendererPdf(BaseRenderer): # memory simultaneously. Collected here, deleted after the build. self._tempImageFiles = [] try: - self._unifiedStyle = unifiedStyle - if unifiedStyle: - styles = self._convertUnifiedStyleToInternal(unifiedStyle) - else: - styles = self._convertUnifiedStyleToInternal({}) + from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle + self._unifiedStyle = unifiedStyle or resolveStyle(None) + styles = self._convertUnifiedStyleToInternal(self._unifiedStyle) for level in range(1, 7): hKey = f"heading{level}" if hKey not in styles: @@ -934,7 +932,7 @@ class RendererPdf(BaseRenderer): code = _normalizePdfMonospaceText(code) elements = [] fs = code_style_def.get("font_size", 9) - mono = code_style_def.get("font", "Courier") + mono = _resolveFontFamily(code_style_def.get("font", "Courier")) textColorFallback = styles.get("colors", {}).get("primary", "#24292e") diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py index 7064ea1e..112f1bf0 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py @@ -92,7 +92,8 @@ class RendererPptx(BaseRenderer): import re if not style: - style = {} + from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle + style = resolveStyle(None) internalStyle = self._convertUnifiedStyleToInternal(style) styles = internalStyle self._styles = styles diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py index 0b544dd8..44d491d7 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py @@ -136,7 +136,9 @@ class RendererXlsx(BaseRenderer): self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER") - # Store unified style for use by inline-run helpers + if not style: + from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle + style = resolveStyle(None) self._unifiedStyle = style # Convert unified style to internal format diff --git a/tests/unit/services/test_renderer_pdf_smoke.py b/tests/unit/services/test_renderer_pdf_smoke.py index 33324572..60c1a2ef 100644 --- a/tests/unit/services/test_renderer_pdf_smoke.py +++ b/tests/unit/services/test_renderer_pdf_smoke.py @@ -232,8 +232,9 @@ def test_normalize_pdf_monospace_replaces_box_drawing(): def test_pdf_heading_font_sizes_strictly_decrease(): """H3 must not fall back to H1 styles (previous bug: ## smaller than ###).""" + from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle renderer = RendererPdf(services=_fakeServices()) - styles = renderer._getDefaultStyleSet() + styles = renderer._convertUnifiedStyleToInternal(resolveStyle(None)) assert styles["heading1"]["font_size"] > styles["heading2"]["font_size"] > styles["heading3"]["font_size"] assert renderer._defaultHeadingStyleDef(2)["font_size"] > renderer._defaultHeadingStyleDef(3)["font_size"] if REPORTLAB_AVAILABLE: -- 2.45.2