diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index cfc10db2..786eea7d 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -162,6 +162,7 @@ class AiCallOptions(BaseModel): # Provider filtering (from UI multiselect or automation config) allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)") + allowedModels: Optional[List[str]] = Field(default=None, description="Whitelist of allowed model names (AND-filter with allowedProviders). None/empty = all allowed.") class AiCallRequest(BaseModel): diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py index 784cc042..0228fbad 100644 --- a/modules/datamodels/datamodelJson.py +++ b/modules/datamodels/datamodelJson.py @@ -6,7 +6,7 @@ Unified JSON document schema and helpers used by both generation prompts and ren This defines a single canonical template and the supported section types. """ -from typing import List +from typing import List, Literal, TypedDict # Canonical list of supported section types across the system supportedSectionTypes: List[str] = [ @@ -18,6 +18,21 @@ supportedSectionTypes: List[str] = [ "image", ] +class InlineRun(TypedDict, total=False): + """Single inline content run. Every paragraph/cell/list-item is a List[InlineRun].""" + type: Literal["text", "image", "link", "bold", "italic", "code"] + value: str # text content (for text/bold/italic/code/link-label) + fileId: str # for type=image: reference to FileItem + base64Data: str # for type=image: resolved base64 (post-processing) + mimeType: str # for type=image: e.g. "image/png" + widthPt: int # for type=image: optional render width + href: str # for type=link: URL target + +supportedInlineRunTypes: List[str] = [ + "text", "image", "link", "bold", "italic", "code", +] + + # Canonical JSON template used for AI generation (documents array + sections) # This template is used for STRUCTURE generation - sections have empty elements arrays. # For content generation, elements arrays will be populated later. diff --git a/modules/features/graphicalEditor/nodeDefinitions/ai.py b/modules/features/graphicalEditor/nodeDefinitions/ai.py index 3273540a..0336e382 100644 --- a/modules/features/graphicalEditor/nodeDefinitions/ai.py +++ b/modules/features/graphicalEditor/nodeDefinitions/ai.py @@ -3,6 +3,15 @@ from modules.shared.i18nRegistry import t +_AI_COMMON_PARAMS = [ + {"name": "requireNeutralization", "type": "boolean", "required": False, + "frontendType": "checkbox", "default": False, + "description": t("Eingaben fuer diesen Call neutralisieren")}, + {"name": "allowedModels", "type": "array", "required": False, + "frontendType": "modelMultiSelect", "default": [], + "description": t("Erlaubte LLM-Modelle (leer = alle erlaubten)")}, +] + AI_NODES = [ { "id": "ai.prompt", @@ -19,7 +28,7 @@ AI_NODES = [ "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, {"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox", "description": t("Einfacher Modus"), "default": True}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": [ @@ -38,7 +47,7 @@ AI_NODES = [ "parameters": [ {"name": "prompt", "type": "string", "required": True, "frontendType": "textarea", "description": t("Recherche-Anfrage")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, @@ -58,7 +67,7 @@ AI_NODES = [ {"name": "summaryLength", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["brief", "medium", "detailed"]}, "description": t("Kurz, mittel oder ausführlich"), "default": "medium"}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, @@ -77,7 +86,7 @@ AI_NODES = [ "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, {"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text", "description": t("Zielsprache (z.B. de, en, French)")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, @@ -97,7 +106,7 @@ AI_NODES = [ {"name": "targetFormat", "type": "string", "required": True, "frontendType": "select", "frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]}, "description": t("Zielformat")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, @@ -114,7 +123,7 @@ AI_NODES = [ "parameters": [ {"name": "prompt", "type": "string", "required": True, "frontendType": "textarea", "description": t("Generierungs-Prompt")}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, @@ -134,7 +143,7 @@ AI_NODES = [ {"name": "resultType", "type": "string", "required": False, "frontendType": "select", "frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]}, "description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["Transit"]}}, @@ -154,7 +163,7 @@ AI_NODES = [ "description": t("Konsolidierungsmodus"), "default": "summarize"}, {"name": "prompt", "type": "string", "required": False, "frontendType": "textarea", "description": t("Optionaler Prompt für die Konsolidierung"), "default": ""}, - ], + ] + _AI_COMMON_PARAMS, "inputs": 1, "outputs": 1, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, diff --git a/modules/features/trustee/mainTrustee.py b/modules/features/trustee/mainTrustee.py index 05e01e8a..d8f7a804 100644 --- a/modules/features/trustee/mainTrustee.py +++ b/modules/features/trustee/mainTrustee.py @@ -361,6 +361,17 @@ QUICK_ACTIONS = [ # The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows. # --------------------------------------------------------------------------- +_FINANCE_STYLE_HINT = ( + "\n\nWenn du ein Dokument erstellst, verwende einen professionellen Finanz-Stil:\n" + "- Schriftart: Calibri\n" + "- Primaerfarbe: #1F3864 (Dunkelblau)\n" + "- Akzentfarbe: #2980B9\n" + "- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n" + "- Konservatives, seriöses Layout\n" + "Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen." +) + + def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]: """Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt.""" return { @@ -370,8 +381,9 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]: "parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}}, {"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process", "parameters": { - "aiPrompt": prompt, + "aiPrompt": prompt + _FINANCE_STYLE_HINT, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, + "requireNeutralization": True, "simpleMode": False, }, "position": {"x": 500, "y": 0}}, ], @@ -454,10 +466,19 @@ TEMPLATE_WORKFLOWS = [ "Verwende die uebergebene Budget-Datei als Soll-Quelle und die im " "Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n" "WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN " - "Uebersichts-Chart ueber alle Konten ist gewuenscht." + "Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n" + "Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, " + "verwende einen professionellen Finanz-Stil:\n" + "- Schriftart: Calibri\n" + "- Primaerfarbe: #1F3864 (Dunkelblau)\n" + "- Akzentfarbe: #2980B9\n" + "- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n" + "- Konservatives, seriöses Layout\n" + "Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen." ), "resultType": "xlsx", "documentTheme": "finance", + "requireNeutralization": True, "documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, "simpleMode": False, diff --git a/modules/features/workspace/datamodelFeatureWorkspace.py b/modules/features/workspace/datamodelFeatureWorkspace.py index b12d4b84..4e32702c 100644 --- a/modules/features/workspace/datamodelFeatureWorkspace.py +++ b/modules/features/workspace/datamodelFeatureWorkspace.py @@ -2,8 +2,8 @@ # All rights reserved. """Workspace feature data models — WorkspaceUserSettings.""" -from typing import Optional -from pydantic import BaseModel, Field +from typing import List, Optional +from pydantic import Field from modules.datamodels.datamodelBase import PowerOnModel from modules.shared.i18nRegistry import i18nModel import uuid @@ -52,3 +52,18 @@ class WorkspaceUserSettings(PowerOnModel): description="Max agent rounds override (None = instance default)", json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False}, ) + requireNeutralization: bool = Field( + default=False, + description="Default neutralization setting for this user", + json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False}, + ) + allowedProviders: List[str] = Field( + default_factory=list, + description="Allowed AI providers (empty = all permitted by RBAC)", + json_schema_extra={"label": "Erlaubte Provider", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False}, + ) + allowedModels: List[str] = Field( + default_factory=list, + description="Allowed AI models (empty = all permitted)", + json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False}, + ) diff --git a/modules/features/workspace/routeFeatureWorkspace.py b/modules/features/workspace/routeFeatureWorkspace.py index 3e1a54b7..5b0d4d7a 100644 --- a/modules/features/workspace/routeFeatureWorkspace.py +++ b/modules/features/workspace/routeFeatureWorkspace.py @@ -110,6 +110,7 @@ class WorkspaceInputRequest(BaseModel): workflowId: Optional[str] = Field(default=None, description="Continue existing workflow") userLanguage: str = Field(default="en", description="User language code") allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers") + allowedModels: List[str] = Field(default_factory=list, description="Restrict AI to these models") requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override") @@ -635,6 +636,7 @@ async def streamWorkspaceStart( userLanguage=userInput.userLanguage, instanceConfig=instanceConfig, allowedProviders=userInput.allowedProviders, + allowedModels=userInput.allowedModels, requireNeutralization=userInput.requireNeutralization, billingFeatureCode=wsBillingFeatureCode, ) @@ -692,6 +694,7 @@ async def _runWorkspaceAgent( userLanguage: str = "en", instanceConfig: Dict[str, Any] = None, allowedProviders: List[str] = None, + allowedModels: List[str] = None, requireNeutralization: Optional[bool] = None, billingFeatureCode: Optional[str] = None, ): @@ -715,6 +718,9 @@ async def _runWorkspaceAgent( logger.info(f"Workspace agent: allowedProviders={allowedProviders}") else: logger.debug("Workspace agent: no allowedProviders in request") + if allowedModels: + aiService.services.allowedModels = allowedModels + logger.info(f"Workspace agent: allowedModels={allowedModels}") if requireNeutralization is not None: ctx.requireNeutralization = requireNeutralization @@ -2139,6 +2145,76 @@ async def updateGeneralSettings( return await getGeneralSettings(request, instanceId, context) +# ========================================================================= +# User-level AI settings (neutralisation, providers, models) +# ========================================================================= + +@router.get("/{instanceId}/user-settings") +@limiter.limit("120/minute") +async def getWorkspaceUserSettings( + request: Request, + instanceId: str = Path(...), + context: RequestContext = Depends(getRequestContext), +): + """Get the current user's workspace AI settings (auto-creates with defaults if not exists).""" + _mandateId, _ = _validateInstanceAccess(instanceId, context) + wsInterface = _getWorkspaceInterface(context, instanceId) + userId = str(context.user.id) + + settings = wsInterface.getWorkspaceUserSettings(userId) + if settings: + return JSONResponse({ + "requireNeutralization": settings.requireNeutralization, + "allowedProviders": settings.allowedProviders, + "allowedModels": settings.allowedModels, + }) + + data = { + "userId": userId, + "mandateId": str(context.mandateId) if context.mandateId else "", + "featureInstanceId": instanceId, + } + created = wsInterface.saveWorkspaceUserSettings(data) + return JSONResponse({ + "requireNeutralization": created.requireNeutralization, + "allowedProviders": created.allowedProviders, + "allowedModels": created.allowedModels, + }) + + +@router.put("/{instanceId}/user-settings") +@limiter.limit("120/minute") +async def putWorkspaceUserSettings( + request: Request, + instanceId: str = Path(...), + body: dict = Body(...), + context: RequestContext = Depends(getRequestContext), +): + """Save the current user's workspace AI settings.""" + _mandateId, _ = _validateInstanceAccess(instanceId, context) + wsInterface = _getWorkspaceInterface(context, instanceId) + userId = str(context.user.id) + + data = { + "userId": userId, + "mandateId": str(context.mandateId) if context.mandateId else "", + "featureInstanceId": instanceId, + } + if "requireNeutralization" in body: + data["requireNeutralization"] = bool(body["requireNeutralization"]) + if "allowedProviders" in body: + data["allowedProviders"] = body["allowedProviders"] + if "allowedModels" in body: + data["allowedModels"] = body["allowedModels"] + + saved = wsInterface.saveWorkspaceUserSettings(data) + return JSONResponse({ + "requireNeutralization": saved.requireNeutralization, + "allowedProviders": saved.allowedProviders, + "allowedModels": saved.allowedModels, + }) + + # ========================================================================= # RAG / Knowledge — anonymised instance statistics (presentation / KPIs) # ========================================================================= diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index a859ffa7..dcf819cc 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -111,6 +111,19 @@ class AiObjects: processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1, ) + allowedModels = getattr(options, 'allowedModels', None) if options else None + if allowedModels: + filteredModels = [m for m in availableModels if m.name in allowedModels] + if filteredModels: + availableModels = filteredModels + else: + errorMsg = f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}" + logger.error(errorMsg) + return AiCallResponse( + content=errorMsg, modelName="error", priceCHF=0.0, + processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1, + ) + failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels) if not failoverModelList: @@ -364,6 +377,19 @@ class AiObjects: ) return + allowedModels = getattr(options, 'allowedModels', None) if options else None + if allowedModels: + filtered = [m for m in availableModels if m.name in allowedModels] + if filtered: + availableModels = filtered + else: + yield AiCallResponse( + content=f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}", + modelName="error", priceCHF=0.0, processingTime=0.0, + bytesSent=0, bytesReceived=0, errorCount=1, + ) + return + failoverModelList = modelSelector.getFailoverModelList( request.prompt, request.context or "", options, availableModels ) @@ -516,6 +542,14 @@ class AiObjects: else: logger.warning(f"No embedding models match allowedProviders {allowedProviders}") + allowedModels = getattr(options, 'allowedModels', None) if options else None + if allowedModels: + filtered = [m for m in availableModels if m.name in allowedModels] + if filtered: + availableModels = filtered + else: + logger.warning(f"No embedding models match allowedModels {allowedModels}") + failoverModelList = modelSelector.getFailoverModelList( combinedText, "", options, availableModels ) diff --git a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py index c2a4842b..7b071996 100644 --- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py +++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py @@ -25,142 +25,11 @@ def _registerMediaTools(registry: ToolRegistry, services): # ---- Document rendering tool ---- def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: - """Convert markdown content to the standard document JSON format expected by renderers.""" - import re as _re - - sections = [] - order = 0 - lines = markdown.split("\n") - i = 0 - - def _nextId(): - nonlocal order - order += 1 - return f"s_{order}" - - while i < len(lines): - line = lines[i] - - # --- Headings --- - headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line) - if headingMatch: - level = len(headingMatch.group(1)) - text = headingMatch.group(2).strip() - sections.append({ - "id": _nextId(), "content_type": "heading", "order": order, - "elements": [{"content": {"text": text, "level": level}}], - }) - i += 1 - continue - - # --- Fenced code blocks --- - codeMatch = _re.match(r'^```(\w*)', line) - if codeMatch: - lang = codeMatch.group(1) or "text" - codeLines = [] - i += 1 - while i < len(lines) and not lines[i].startswith("```"): - codeLines.append(lines[i]) - i += 1 - i += 1 - sections.append({ - "id": _nextId(), "content_type": "code_block", "order": order, - "elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}], - }) - continue - - # --- Tables --- - tableMatch = _re.match(r'^\|(.+)\|$', line) - if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]): - headerCells = [c.strip() for c in tableMatch.group(1).split("|")] - i += 2 - rows = [] - while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]): - rowCells = [c.strip() for c in lines[i][1:-1].split("|")] - rows.append(rowCells) - i += 1 - sections.append({ - "id": _nextId(), "content_type": "table", "order": order, - "elements": [{"content": {"headers": headerCells, "rows": rows}}], - }) - continue - - # --- Bullet / numbered lists --- - listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line) - if listMatch: - isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2))) - items = [] - while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]): - m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]) - items.append({"text": m.group(3).strip()}) - i += 1 - sections.append({ - "id": _nextId(), "content_type": "bullet_list", "order": order, - "elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}], - }) - continue - - # --- Empty lines (skip) --- - if not line.strip(): - i += 1 - continue - - # --- Images: ![alt](file:fileId) or ![alt](url) --- - imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line) - if imgMatch: - altText = imgMatch.group(1).strip() or "Image" - src = imgMatch.group(2).strip() - fileId = "" - if src.startswith("file:"): - fileId = src[5:] - sections.append({ - "id": _nextId(), "content_type": "image", "order": order, - "elements": [{ - "content": { - "altText": altText, - "base64Data": "", - "_fileRef": fileId, - "_srcUrl": src if not fileId else "", - } - }], - }) - i += 1 - continue - - # --- Paragraph (collect consecutive non-empty lines) --- - paraLines = [] - while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]): - paraLines.append(lines[i]) - i += 1 - if paraLines: - sections.append({ - "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": " ".join(paraLines)}}], - }) - continue - - i += 1 - - if not sections: - sections.append({ - "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": markdown.strip() or "(empty)"}}], - }) - - return { - "metadata": { - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "agent_rendering", - "title": title, - "language": language, - }, - "documents": [{ - "id": "doc_1", - "title": title, - "sections": sections, - }], - } + """Delegate to the consolidated parser in subDocumentUtility.""" + from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson + result = markdownToDocumentJson(markdown, title, language) + result["metadata"]["extraction_method"] = "agent_rendering" + return result async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]): """Render agent-produced markdown content into any document format via the RendererRegistry.""" @@ -245,35 +114,75 @@ def _registerMediaTools(registry: ToolRegistry, services): except Exception as e: logger.warning(f"renderDocument: knowledge service unavailable: {e}") resolvedImages = 0 + + def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"): + """Resolve a single image reference dict to base64Data in-place.""" + nonlocal resolvedImages + fileRef = targetObj.get(fileRefKey, "") or targetObj.get(fileIdKey, "") + if not fileRef or targetObj.get("base64Data"): + return + if knowledgeService: + chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef) + imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"] + if imageChunks: + targetObj["base64Data"] = imageChunks[0].get("data", "") + chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png") + targetObj["mimeType"] = chunkMime + resolvedImages += 1 + if not targetObj.get("base64Data"): + try: + rawBytes = services.chat.getFileData(fileRef) + if rawBytes: + import base64 as _b64 + targetObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii") + targetObj["mimeType"] = "image/png" + resolvedImages += 1 + except Exception as e: + logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}") + targetObj.pop("_fileRef", None) + targetObj.pop("_srcUrl", None) + + def _resolveInlineRuns(runsList): + """Scan a list of inline runs and resolve any image runs with fileId.""" + for run in runsList: + if run.get("type") == "image" and run.get("fileId") and not run.get("base64Data"): + _resolveImageRef(run, fileRefKey="fileId", fileIdKey="fileId") + for doc in structuredContent.get("documents", []): for section in doc.get("sections", []): - if section.get("content_type") != "image": + cType = section.get("content_type") + # Block-level image sections + if cType == "image": + for element in section.get("elements", []): + contentObj = element.get("content", {}) + _resolveImageRef(contentObj) continue - for element in section.get("elements", []): - contentObj = element.get("content", {}) - fileRef = contentObj.get("_fileRef", "") - if not fileRef or contentObj.get("base64Data"): - continue - if knowledgeService: - chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef) - imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"] - if imageChunks: - contentObj["base64Data"] = imageChunks[0].get("data", "") - chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png") - contentObj["mimeType"] = chunkMime - resolvedImages += 1 - if not contentObj.get("base64Data"): - try: - rawBytes = services.chat.getFileData(fileRef) - if rawBytes: - import base64 as _b64 - contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii") - contentObj["mimeType"] = "image/png" - resolvedImages += 1 - except Exception as e: - logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}") - contentObj.pop("_fileRef", None) - contentObj.pop("_srcUrl", None) + # Paragraphs with inlineRuns + if cType == "paragraph": + for element in section.get("elements", []): + runs = element.get("content", {}).get("inlineRuns") + if runs: + _resolveInlineRuns(runs) + continue + # Bullet lists - items are List[List[InlineRun]] + if cType == "bullet_list": + for element in section.get("elements", []): + items = element.get("content", {}).get("items", []) + for item in items: + if isinstance(item, list): + _resolveInlineRuns(item) + continue + # Tables - headers and row cells are List[InlineRun] + if cType == "table": + for element in section.get("elements", []): + contentObj = element.get("content", {}) + for cell in contentObj.get("headers", []): + if isinstance(cell, list): + _resolveInlineRuns(cell) + for row in contentObj.get("rows", []): + for cell in row: + if isinstance(cell, list): + _resolveInlineRuns(cell) sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", [])) logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}") @@ -285,6 +194,7 @@ def _registerMediaTools(registry: ToolRegistry, services): language=language, title=title, userPrompt=content, + style=args.get("style"), ) if not documents: @@ -367,6 +277,20 @@ def _registerMediaTools(registry: ToolRegistry, services): "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"}, "title": {"type": "string", "description": "Document title", "default": "Document"}, "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"}, + "style": { + "type": "object", + "description": ( + "Optional style overrides for the rendered document. Supports nested keys: " + "fonts (primary, monospace), colors (primary, secondary, accent, background), " + "headings (h1-h4 with sizePt, weight, color, spaceBeforePt, spaceAfterPt), " + "paragraph (sizePt, lineSpacing, color), table (headerBg, headerFg, headerSizePt, " + "bodySizePt, rowBandingEven, rowBandingOdd, borderColor, borderWidthPt), " + "list (bulletChar, indentPt, sizePt), image (defaultWidthPt, maxWidthPt, alignment), " + "codeBlock (fontSizePt, background, borderColor), " + "page (format, marginsPt, showPageNumbers, headerHeight, footerHeight, headerLogo, headerText, footerText). " + "Only provided keys override defaults; omitted keys keep their default values." + ), + }, }, }, readOnly=False, diff --git a/modules/serviceCenter/services/serviceAi/mainServiceAi.py b/modules/serviceCenter/services/serviceAi/mainServiceAi.py index 6428bed3..18ac46bc 100644 --- a/modules/serviceCenter/services/serviceAi/mainServiceAi.py +++ b/modules/serviceCenter/services/serviceAi/mainServiceAi.py @@ -86,7 +86,7 @@ class _ServicesAdapter: return getattr(w, "featureCode", None) if w else None def __getattr__(self, name: str): - if name in ("allowedProviders", "preferredProviders", "currentUserLanguage"): + if name in ("allowedProviders", "allowedModels", "preferredProviders", "currentUserLanguage"): return getattr(self.workflow, name, None) if self.workflow else None raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") @@ -177,6 +177,11 @@ class AiService: request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders}) logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}") + # Calculate effective allowedModels: Workflow ∩ Request (node-level) + effectiveModels = self._calculateEffectiveModels(request) + if effectiveModels and request.options: + request.options = request.options.model_copy(update={'allowedModels': effectiveModels}) + # Neutralize prompt if enabled (before AI call) _wasNeutralized = False _excludedDocs: List[str] = [] @@ -225,6 +230,11 @@ class AiService: if effectiveProviders and request.options: request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders}) + # Calculate effective allowedModels: Workflow ∩ Request (node-level) + effectiveModels = self._calculateEffectiveModels(request) + if effectiveModels and request.options: + request.options = request.options.model_copy(update={'allowedModels': effectiveModels}) + # Neutralize prompt if enabled (before streaming) _wasNeutralized = False _excludedDocs: List[str] = [] @@ -1240,6 +1250,43 @@ detectedIntent-Werte: logger.warning(f"Error calculating effective providers: {e}") return None + def _calculateEffectiveModels(self, request: AiCallRequest = None) -> Optional[List[str]]: + """ + Calculate effective allowed models: Workflow.allowedModels ∩ request.options.allowedModels. + + AND-logic intersection: + - If workflow specifies allowedModels, start with those. + - If request (node-level) also specifies allowedModels, intersect. + - Returns None if no model filtering is needed. + """ + try: + effectiveModels = None + + # Workflow-level allowedModels (from automation config) + workflowModels = getattr(self.services, 'allowedModels', None) + if workflowModels: + effectiveModels = list(workflowModels) + + # Request-level (node-level) allowedModels + requestModels = None + if request and request.options and request.options.allowedModels: + requestModels = request.options.allowedModels + + if requestModels: + if effectiveModels: + effectiveModels = [m for m in effectiveModels if m in requestModels] + else: + effectiveModels = list(requestModels) + + if effectiveModels: + logger.debug(f"Model filter: Workflow={workflowModels}, Request={requestModels}, Effective={effectiveModels}") + + return effectiveModels if effectiveModels else None + + except Exception as e: + logger.warning(f"Error calculating effective models: {e}") + return None + async def ensureAiObjectsInitialized(self): """Ensure aiObjects is initialized and submodules are ready.""" if self.aiObjects is None: diff --git a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py index b9377404..6afcc0a8 100644 --- a/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/serviceCenter/services/serviceGeneration/mainServiceGeneration.py @@ -14,6 +14,7 @@ from .subDocumentUtility import ( detectMimeTypeFromData, convertDocumentDataToString ) +from .styleDefaults import resolveStyle logger = logging.getLogger(__name__) @@ -382,7 +383,7 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]: """ Render extracted JSON content to the specified output format. Processes EACH document separately and calls renderer for each. @@ -399,12 +400,14 @@ class GenerationService: userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation parentOperationId: Optional parent operation ID for hierarchical logging + style: Optional style overrides (deep-merged with DEFAULT_STYLE) Returns: List of RenderedDocument objects. Each RenderedDocument represents one rendered file (main document or supporting file) """ try: + resolvedStyle = resolveStyle(style) # Validate JSON input if not isinstance(extractedContent, dict): raise ValueError("extractedContent must be a JSON dictionary") @@ -469,7 +472,7 @@ class GenerationService: docTitle = doc.get("title", title) # Render this document (can return multiple files, e.g., HTML + images) - renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService) + renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService, style=resolvedStyle) allRenderedDocuments.extend(renderedDocs) logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)") diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py index b080ce88..583c423c 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/documentRendererBaseTemplate.py @@ -84,7 +84,7 @@ class BaseRenderer(ABC): return list(supportedSectionTypes) @abstractmethod - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """ Render extracted JSON content to multiple documents. Each renderer must implement this method. @@ -95,6 +95,9 @@ class BaseRenderer(ABC): title: Report title userPrompt: Original user prompt for context aiService: AI service instance for additional processing + style: Fully-resolved unified style dict from styleDefaults.resolveStyle(). + When provided, renderers use these values instead of their + own defaults / AI-generated styles. Returns: List of RenderedDocument objects. @@ -102,6 +105,112 @@ class BaseRenderer(ABC): Even if only one document is returned, it must be wrapped in a list. """ pass + + def _convertUnifiedStyleToInternal(self, style: Dict[str, Any]) -> Dict[str, Any]: + """Convert the unified resolvedStyle dict (from styleDefaults) into + the renderer-internal style-set format that all rendering methods already + consume. Override in subclasses for format-specific tweaks.""" + h1 = style["headings"]["h1"] + h2 = style["headings"]["h2"] + h3 = style["headings"].get("h3", h2) + h4 = style["headings"].get("h4", h3) + tbl = style["table"] + para = style["paragraph"] + lst = style["list"] + cb = style["codeBlock"] + return { + "title": { + "font_size": h1["sizePt"], "color": h1["color"], + "bold": h1.get("weight") == "bold", "align": "left", + }, + "heading1": { + "font_size": h1["sizePt"], "color": h1["color"], + "bold": h1.get("weight") == "bold", "align": "left", + }, + "heading2": { + "font_size": h2["sizePt"], "color": h2["color"], + "bold": h2.get("weight") == "bold", "align": "left", + }, + "heading3": { + "font_size": h3["sizePt"], "color": h3["color"], + "bold": h3.get("weight") == "bold", "align": "left", + }, + "heading4": { + "font_size": h4["sizePt"], "color": h4["color"], + "bold": h4.get("weight") == "bold", "align": "left", + }, + "paragraph": { + "font_size": para["sizePt"], "color": para["color"], + "bold": False, "align": "left", + }, + "table_header": { + "background": tbl["headerBg"], "text_color": tbl["headerFg"], + "bold": True, "align": "center", + }, + "table_cell": { + "background": tbl["rowBandingOdd"], "text_color": para["color"], + "bold": False, "align": "left", + }, + "table_border": { + "style": "grid", "color": tbl["borderColor"], + }, + "bullet_list": { + "font_size": lst["sizePt"], "color": para["color"], + "indent": lst["indentPt"], + }, + "code_block": { + "font": style["fonts"]["monospace"], + "font_size": cb["fontSizePt"], "color": para["color"], + "background": cb["background"], + }, + } + + @staticmethod + def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any: + """Extract inline runs from new-format content, falling back to old format. + + For paragraphs (itemsKey=None): + new: content["inlineRuns"] -> List[InlineRun] + old: content["text"] -> wrapped in [{"type":"text","value":text}] + + For list items (itemsKey="items"): + new: content["items"] is List[List[InlineRun]] + old: content["items"] is List[str] or List[{"text":…}] + Returns the items list (caller decides per-item conversion). + + For table headers/cells: + new: each header/cell is List[InlineRun] + old: each header/cell is a plain str + Caller handles per-cell. + """ + if itemsKey: + return content.get(itemsKey, []) + inlineRuns = content.get("inlineRuns") + if inlineRuns: + return inlineRuns + text = content.get("text", "") + if text: + return [{"type": "text", "value": text}] + return [] + + @staticmethod + def _inlineRunsForCell(cell) -> list: + """Normalize a single table header or cell value to List[InlineRun]. + Accepts either a plain string or an already-correct list of run dicts.""" + if isinstance(cell, list): + return cell + return [{"type": "text", "value": str(cell) if cell is not None else ""}] + + @staticmethod + def _inlineRunsForListItem(item) -> list: + """Normalize a single list item to List[InlineRun]. + Accepts a plain string, a dict with 'text', or an already-correct list of run dicts.""" + if isinstance(item, list): + return item + if isinstance(item, dict): + text = item.get("text", "") + return [{"type": "text", "value": text}] + return [{"type": "text", "value": str(item)}] def _determineFilename(self, title: str, mimeType: str) -> str: """Determine filename from title and mimeType.""" diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py index 7a1277ca..ab37f756 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererDocx.py @@ -53,18 +53,17 @@ class RendererDocx(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """Render extracted JSON content to DOCX format using AI-analyzed styling.""" self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") try: if not DOCX_AVAILABLE: - # Fallback to HTML if python-docx not available from .rendererHtml import RendererHtml htmlRenderer = RendererHtml() - return await htmlRenderer.render(extractedContent, title, userPrompt, aiService) + return await htmlRenderer.render(extractedContent, title, userPrompt, aiService, style=style) # Generate DOCX using AI-analyzed styling - docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) + docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style) # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} @@ -114,23 +113,27 @@ class RendererDocx(BaseRenderer): ) ] - async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, unifiedStyle: Dict[str, Any] = None) -> str: """Generate DOCX content from structured JSON document.""" import time start_time = time.time() try: self.logger.debug("_generateDocxFromJson: Starting document generation") - # Create new document doc = Document() self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s") - # Get style set: use styles from metadata if available, otherwise enhance with AI - template_from_metadata = None - if json_content and isinstance(json_content.get("metadata"), dict): - template_from_metadata = json_content["metadata"].get("templateName") + # Phase 3: prefer unified style when provided style_start = time.time() self.logger.debug("_generateDocxFromJson: About to get style set") - styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata) + if unifiedStyle: + styleSet = self._convertUnifiedStyleToInternal(unifiedStyle) + self._unifiedStyle = unifiedStyle + else: + template_from_metadata = None + if json_content and isinstance(json_content.get("metadata"), dict): + template_from_metadata = json_content["metadata"].get("templateName") + styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata) + self._unifiedStyle = None self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s") # Setup basic document styles and create all styles from style set @@ -298,11 +301,11 @@ class RendererDocx(BaseRenderer): def _setupBasicDocumentStyles(self, doc: Document) -> None: """Set up basic document styles.""" try: - # Set default font style = doc.styles['Normal'] font = style.font - font.name = 'Calibri' - font.size = Pt(11) + us = getattr(self, '_unifiedStyle', None) + font.name = us["fonts"]["primary"] if us else 'Calibri' + font.size = Pt(us["paragraph"]["sizePt"] if us else 11) except Exception as e: self.logger.warning(f"Could not set up basic document styles: {str(e)}") @@ -421,6 +424,8 @@ class RendererDocx(BaseRenderer): def _addMarkdownInlineRuns(self, paragraph, text: str) -> None: """Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph.""" pos = 0 + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier New" for m in self._MD_INLINE_RE.finditer(text): if m.start() > pos: paragraph.add_run(text[pos:m.start()]) @@ -434,12 +439,45 @@ class RendererDocx(BaseRenderer): paragraph.add_run(m.group(6)).italic = True elif m.group(7): run = paragraph.add_run(m.group(7)) - run.font.name = "Courier New" + run.font.name = monoFont run.font.size = Pt(9) pos = m.end() if pos < len(text): paragraph.add_run(text[pos:]) + def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None: + """Process a list of InlineRun dicts into python-docx Runs on a paragraph.""" + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier New" + for run in runs: + runType = run.get("type", "text") + value = run.get("value", "") + if runType == "text": + paragraph.add_run(value) + elif runType == "bold": + paragraph.add_run(value).bold = True + elif runType == "italic": + paragraph.add_run(value).italic = True + elif runType == "code": + r = paragraph.add_run(value) + r.font.name = monoFont + r.font.size = Pt(9) + elif runType == "link": + r = paragraph.add_run(value) + r.font.underline = True + r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9) + elif runType == "image": + b64 = run.get("base64Data", "") + if b64: + try: + imgBytes = base64.b64decode(b64) + imgStream = io.BytesIO(imgBytes) + paragraph.add_run().add_picture(imgStream, width=Inches(2)) + except Exception: + paragraph.add_run(f"[Image: {run.get('altText', '')}]") + else: + paragraph.add_run(value) + def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """ Render a JSON table to DOCX using AI-generated styles. @@ -485,7 +523,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.error(f"Error rendering table: {str(e)}", exc_info=True) - def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None: + def _renderTableFastXml(self, doc: Document, headers: list, rows: list, styles: Dict[str, Any]) -> None: """ High-performance table rendering using direct XML manipulation. @@ -546,24 +584,34 @@ class RendererDocx(BaseRenderer): # Build all rows using fast XML rows_start = time.time() - # Header row - headerRow = self._createTableRowXml(headers, isHeader=True) + # Resolve header style colors + tableStyle = styles.get("table_header", {}) + headerBg = tableStyle.get("background", "") + headerFg = tableStyle.get("text_color", "") + + # Flatten inline-run headers to plain strings for fast XML path + flatHeaders = [] + for h in headers: + runs = self._inlineRunsForCell(h) + flatHeaders.append("".join(r.get("value", "") for r in runs)) + + headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None) tbl.append(headerRow) - + header_time = time.time() - rows_start self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s") - - # Data rows - batch process for performance + data_start = time.time() rowCount = len(rows) - + for idx, rowData in enumerate(rows): - # Convert all cells to strings - cellTexts = [str(cell) if cell is not None else '' for cell in rowData] - # Pad if needed - while len(cellTexts) < len(headers): + cellTexts = [] + for cell in rowData: + runs = self._inlineRunsForCell(cell) + cellTexts.append("".join(r.get("value", "") for r in runs)) + while len(cellTexts) < len(flatHeaders): cellTexts.append('') - + row = self._createTableRowXml(cellTexts, isHeader=False) tbl.append(row) @@ -641,74 +689,64 @@ class RendererDocx(BaseRenderer): return tblBorders - def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any: - """ - Create a table row XML element with cells. - - This is the core fast-path: builds the row XML directly without - going through python-docx's slow cell.text assignment. - """ + def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any: + """Create a table row XML element with cells. + Fast-path: builds row XML directly via lxml.""" from docx.oxml.shared import OxmlElement, qn - + + if headerBgHex is None: + us = getattr(self, '_unifiedStyle', None) + headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864' + else: + headerBgHex = headerBgHex.lstrip('#') + if headerFgHex is None: + us = getattr(self, '_unifiedStyle', None) + headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF' + else: + headerFgHex = headerFgHex.lstrip('#') + tr = OxmlElement('w:tr') - - # Row properties for header if isHeader: trPr = OxmlElement('w:trPr') - tblHeader = OxmlElement('w:tblHeader') - trPr.append(tblHeader) + trPr.append(OxmlElement('w:tblHeader')) tr.append(trPr) - + for cellText in cells: - # Create cell tc = OxmlElement('w:tc') - - # Cell properties tcPr = OxmlElement('w:tcPr') tcW = OxmlElement('w:tcW') tcW.set(qn('w:type'), 'auto') tcW.set(qn('w:w'), '0') tcPr.append(tcW) - - # Header cell styling - light blue background + if isHeader: shd = OxmlElement('w:shd') shd.set(qn('w:val'), 'clear') shd.set(qn('w:color'), 'auto') - shd.set(qn('w:fill'), '4472C4') # Professional blue + shd.set(qn('w:fill'), headerBgHex) tcPr.append(shd) - + tc.append(tcPr) - - # Paragraph with text p = OxmlElement('w:p') - - # Add run with text r = OxmlElement('w:r') - - # Header text styling - bold and white + if isHeader: rPr = OxmlElement('w:rPr') - b = OxmlElement('w:b') - rPr.append(b) - # White text color + rPr.append(OxmlElement('w:b')) color = OxmlElement('w:color') - color.set(qn('w:val'), 'FFFFFF') + color.set(qn('w:val'), headerFgHex) rPr.append(color) r.append(rPr) - - # Text element + t = OxmlElement('w:t') - # Preserve spaces if text starts/ends with whitespace if cellText and (cellText[0] == ' ' or cellText[-1] == ' '): t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') t.text = cellText r.append(t) - p.append(r) tc.append(p) tr.append(tc) - + return tr def _applyHorizontalBordersOnly(self, table) -> None: @@ -836,47 +874,37 @@ class RendererDocx(BaseRenderer): def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance.""" try: - # Extract from nested content structure content = list_data.get("content", {}) if not isinstance(content, dict): return items = content.get("items", []) bullet_style = styles.get("bullet_list", {}) - - # Pre-calculate and cache style objects to avoid repeated parsing - font_size_pt = None + + font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None text_color_rgb = None - if bullet_style: - if "font_size" in bullet_style: - font_size_pt = Pt(bullet_style["font_size"]) - if "color" in bullet_style: - color_hex = bullet_style["color"].lstrip('#') - text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) - + if bullet_style.get("color"): + color_hex = bullet_style["color"].lstrip('#') + text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + for item in items: - itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "") - if not itemText: + itemRuns = self._inlineRunsForListItem(item) + if not itemRuns or not any(r.get("value") for r in itemRuns): continue para = doc.add_paragraph(style='List Bullet') - self._addMarkdownInlineRuns(para, itemText) - - # Apply bullet list styling from style set - use cached objects - if bullet_style and para.runs: - # Use direct access instead of iterating - if len(para.runs) > 0: - run = para.runs[0] - if font_size_pt: - run.font.size = font_size_pt - if text_color_rgb: - run.font.color.rgb = text_color_rgb - else: - # Create run if none exists - run = para.add_run() - if font_size_pt: - run.font.size = font_size_pt - if text_color_rgb: - run.font.color.rgb = text_color_rgb - + isNewRunFormat = isinstance(item, list) + if isNewRunFormat: + self._renderInlineRuns(itemRuns, para, styles) + else: + itemText = "".join(r.get("value", "") for r in itemRuns) + self._addMarkdownInlineRuns(para, itemText) + + if bullet_style and para.runs and len(para.runs) > 0: + run = para.runs[0] + if font_size_pt: + run.font.size = font_size_pt + if text_color_rgb: + run.font.color.rgb = text_color_rgb + except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") @@ -905,90 +933,79 @@ class RendererDocx(BaseRenderer): def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON paragraph to DOCX using AI-generated styles.""" try: - # Extract from nested content structure content = paragraph_data.get("content", {}) if isinstance(content, dict): - text = content.get("text", "") + inlineRuns = self._inlineRunsFromContent(content) elif isinstance(content, str): - text = content + inlineRuns = [{"type": "text", "value": content}] else: - text = "" - - # CRITICAL: Prevent rendering base64 image data as text - # Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG) - if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or - (len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))): - # This looks like base64 data - don't render as text - self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})") + inlineRuns = [] + + if not inlineRuns: + return + + plainText = "".join(r.get("value", "") for r in inlineRuns) + if plainText and (plainText.startswith("/9j/") or plainText.startswith("iVBORw0KGgo") or + (len(plainText) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in plainText[:100]))): + self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(plainText)})") para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]") if para.runs: - para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error + para.runs[0].font.color.rgb = RGBColor(255, 0, 0) return - - if text: - para = doc.add_paragraph() - self._addMarkdownInlineRuns(para, text) - paragraph_style = styles.get("paragraph", {}) - if paragraph_style: - # Pre-calculate and cache style objects - font_size_pt = None - text_color_rgb = None - if "font_size" in paragraph_style: - font_size_pt = Pt(paragraph_style["font_size"]) - if "color" in paragraph_style: - color_hex = paragraph_style["color"].lstrip('#') - text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) - bold = paragraph_style.get("bold", False) - - # Use direct access instead of iterating - if len(para.runs) > 0: - run = para.runs[0] - if font_size_pt: - run.font.size = font_size_pt - run.font.bold = bold - if text_color_rgb: - run.font.color.rgb = text_color_rgb + + para = doc.add_paragraph() + hasNewRuns = content.get("inlineRuns") if isinstance(content, dict) else None + if hasNewRuns: + self._renderInlineRuns(inlineRuns, para, styles) + else: + self._addMarkdownInlineRuns(para, plainText) + + paragraph_style = styles.get("paragraph", {}) + if paragraph_style: + font_size_pt = Pt(paragraph_style["font_size"]) if "font_size" in paragraph_style else None + text_color_rgb = None + if "color" in paragraph_style: + color_hex = paragraph_style["color"].lstrip('#') + text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) + bold = paragraph_style.get("bold", False) + if len(para.runs) > 0: + run = para.runs[0] + if font_size_pt: + run.font.size = font_size_pt + run.font.bold = bold + if text_color_rgb: + run.font.color.rgb = text_color_rgb + if "align" in paragraph_style: + align = paragraph_style["align"] + if align == "center": + para.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif align == "right": + para.alignment = WD_ALIGN_PARAGRAPH.RIGHT else: - # Create run if none exists - run = para.add_run() - if font_size_pt: - run.font.size = font_size_pt - run.font.bold = bold - if text_color_rgb: - run.font.color.rgb = text_color_rgb - - if "align" in paragraph_style: - align = paragraph_style["align"] - if align == "center": - para.alignment = WD_ALIGN_PARAGRAPH.CENTER - elif align == "right": - para.alignment = WD_ALIGN_PARAGRAPH.RIGHT - else: - para.alignment = WD_ALIGN_PARAGRAPH.LEFT - + para.alignment = WD_ALIGN_PARAGRAPH.LEFT + except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON code block to DOCX using AI-generated styles.""" try: - # Extract from nested content structure content = code_data.get("content", {}) if not isinstance(content, dict): return code = content.get("code", "") language = content.get("language", "") code_style = styles.get("code_block", {}) - + us = getattr(self, '_unifiedStyle', None) + if code: if language: lang_para = doc.add_paragraph(f"Code ({language}):") if len(lang_para.runs) > 0: lang_para.runs[0].bold = True - - # Pre-calculate and cache style objects - code_font_name = code_style.get("font", "Courier New") - code_font_size_pt = Pt(code_style.get("font_size", 9)) + + code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New") + code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9)) code_text_color_rgb = None if "color" in code_style: color_hex = code_style["color"].lstrip('#') diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py index 58143ac2..b39efd50 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererHtml.py @@ -40,7 +40,7 @@ class RendererHtml(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """ Render HTML document with images as separate files. Returns list of documents: [HTML document, image1, image2, ...] @@ -54,7 +54,7 @@ class RendererHtml(BaseRenderer): self._renderedImages = images # Generate HTML using AI-analyzed styling - htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) + htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService, style=style) # Replace base64 data URIs with relative file paths if images exist if images: @@ -107,11 +107,16 @@ class RendererHtml(BaseRenderer): return resultDocuments - async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str: """Generate HTML content from structured JSON document using AI-generated styling.""" try: - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + # Use unified style when provided, otherwise fall back to existing flow + if style: + styles = self._convertUnifiedStyleToInternal(style) + self._unifiedStyle = style + else: + styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + self._unifiedStyle = None # Validate JSON structure if not self._validateJsonStructure(jsonContent): @@ -272,6 +277,10 @@ class RendererHtml(BaseRenderer): def _generateCssStyles(self, styles: Dict[str, Any]) -> str: """Generate CSS from style definitions.""" + # When unified style is available, generate CSS directly from it + if getattr(self, "_unifiedStyle", None): + return self._generateCssFromUnifiedStyle(self._unifiedStyle) + css_parts = [] # Body styles @@ -368,6 +377,164 @@ class RendererHtml(BaseRenderer): return '\n'.join(css_parts) + def _generateCssFromUnifiedStyle(self, style: Dict[str, Any]) -> str: + """Generate CSS directly from unified style dict.""" + fonts = style.get("fonts", {}) + colors = style.get("colors", {}) + headings = style.get("headings", {}) + para = style.get("paragraph", {}) + tbl = style.get("table", {}) + lst = style.get("list", {}) + cb = style.get("codeBlock", {}) + page = style.get("page", {}) + + primaryFont = fonts.get("primary", "Arial, sans-serif") + monoFont = fonts.get("monospace", "Courier New, monospace") + bgColor = colors.get("background", "#FFFFFF") + primaryColor = colors.get("primary", "#1F3864") + paraColor = para.get("color", "#333333") + paraSizePt = para.get("sizePt", 11) + lineSpacing = para.get("lineSpacing", 1.15) + + css_parts = [] + + # Body + css_parts.append("body {") + css_parts.append(f" font-family: {primaryFont};") + css_parts.append(f" background: {bgColor};") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" font-size: {paraSizePt}pt;") + css_parts.append(f" line-height: {lineSpacing};") + margins = page.get("marginsPt", {}) + if margins: + css_parts.append(f" margin: {margins.get('top', 60)}pt {margins.get('right', 60)}pt {margins.get('bottom', 60)}pt {margins.get('left', 60)}pt;") + else: + css_parts.append(" margin: 0; padding: 20px;") + css_parts.append("}") + + # Document title (uses h1 style) + h1 = headings.get("h1", {}) + css_parts.append(".document-title {") + css_parts.append(f" font-size: {h1.get('sizePt', 24)}pt;") + css_parts.append(f" color: {h1.get('color', primaryColor)};") + css_parts.append(f" font-weight: {h1.get('weight', 'bold')};") + css_parts.append(" margin: 0 0 1em 0;") + css_parts.append("}") + + # Headings h1-h4 + for level in range(1, 5): + key = f"h{level}" + h = headings.get(key, h1 if level == 1 else headings.get(f"h{level-1}", {})) + css_parts.append(f"h{level} {{") + css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;") + css_parts.append(f" color: {h.get('color', primaryColor)};") + css_parts.append(f" font-weight: {h.get('weight', 'bold')};") + css_parts.append(f" margin: 1.2em 0 0.4em 0;") + css_parts.append("}") + + # Paragraphs + css_parts.append("p {") + css_parts.append(f" font-size: {paraSizePt}pt;") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" line-height: {lineSpacing};") + css_parts.append(" margin: 0 0 1em 0;") + css_parts.append("}") + + # Tables + borderColor = tbl.get("borderColor", "#DEE2E6") + css_parts.append("table {") + css_parts.append(f" border-collapse: collapse;") + css_parts.append(f" width: 100%;") + css_parts.append(f" margin: 1em 0;") + css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append("}") + + # Table headers + css_parts.append("th {") + css_parts.append(f" background: {tbl.get('headerBg', '#1F3864')};") + css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};") + css_parts.append(" font-weight: bold;") + css_parts.append(" text-align: center;") + css_parts.append(f" padding: 10px;") + css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append("}") + + # Table cells + css_parts.append("td {") + css_parts.append(f" color: {paraColor};") + css_parts.append(" padding: 8px;") + css_parts.append(f" border: 1px solid {borderColor};") + css_parts.append("}") + + # Lists + css_parts.append("ul {") + css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" padding-left: {lst.get('indentPt', 18)}pt;") + css_parts.append(" margin: 0 0 1em 0;") + css_parts.append("}") + + # Code blocks + css_parts.append("pre {") + css_parts.append(f" font-family: {monoFont};") + css_parts.append(f" font-size: {cb.get('fontSizePt', 9)}pt;") + css_parts.append(f" color: {paraColor};") + css_parts.append(f" background: {cb.get('background', '#F8F9FA')};") + css_parts.append(f" border: 1px solid {cb.get('borderColor', '#E2E8F0')};") + css_parts.append(" border-radius: 4px;") + css_parts.append(" padding: 1em;") + css_parts.append(" margin: 1em 0;") + css_parts.append(" overflow-x: auto;") + css_parts.append("}") + + # Images + css_parts.append("img {") + css_parts.append(" max-width: 100%;") + css_parts.append(" height: auto;") + css_parts.append(" margin: 1em 0;") + css_parts.append(" border-radius: 4px;") + css_parts.append("}") + + # Generated info + css_parts.append(".generated-info {") + css_parts.append(" font-size: 0.9em;") + css_parts.append(" color: #666;") + css_parts.append(" text-align: center;") + css_parts.append(" margin-top: 2em;") + css_parts.append(" padding-top: 1em;") + css_parts.append(" border-top: 1px solid #ddd;") + css_parts.append("}") + + return '\n'.join(css_parts) + + def _renderInlineRuns(self, runs: list) -> str: + """Convert inline runs to HTML markup.""" + import html as htmlLib + parts = [] + for run in runs: + runType = run.get("type", "text") + value = htmlLib.escape(run.get("value", "")) + if runType == "text": + parts.append(value) + elif runType == "bold": + parts.append(f"{value}") + elif runType == "italic": + parts.append(f"{value}") + elif runType == "code": + parts.append(f"{value}") + elif runType == "link": + href = htmlLib.escape(run.get("href", "")) + parts.append(f'{value}') + elif runType == "image": + b64 = run.get("base64Data", "") + mime = run.get("mimeType", "image/png") + alt = value + if b64: + parts.append(f'{alt}') + else: + parts.append(value) + return "".join(parts) + def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a single JSON section to HTML using AI-generated styles. Supports three content formats: reference, object (base64), extracted_text. @@ -419,6 +586,11 @@ class RendererHtml(BaseRenderer): # Regular paragraph element - extract from nested content structure (standard JSON format) content = element.get("content", {}) if isinstance(content, dict): + # New format: inlineRuns + inlineRuns = content.get("inlineRuns") + if inlineRuns and isinstance(inlineRuns, list): + htmlParts.append(f'

{self._renderInlineRuns(inlineRuns)}

') + continue text = content.get("text", "") elif isinstance(content, str): text = content @@ -495,7 +667,8 @@ class RendererHtml(BaseRenderer): # Table header htmlParts.append('') for header in headers: - htmlParts.append(f'{header}') + runs = self._inlineRunsForCell(header) + htmlParts.append(f'{self._renderInlineRuns(runs)}') htmlParts.append('') # Table body @@ -503,7 +676,8 @@ class RendererHtml(BaseRenderer): for row in rows: htmlParts.append('') for cellData in row: - htmlParts.append(f'{cellData}') + runs = self._inlineRunsForCell(cellData) + htmlParts.append(f'{self._renderInlineRuns(runs)}') htmlParts.append('') htmlParts.append('') @@ -528,10 +702,8 @@ class RendererHtml(BaseRenderer): htmlParts = ['') return '\n'.join(htmlParts) @@ -571,6 +743,11 @@ class RendererHtml(BaseRenderer): if isinstance(el, dict): content = el.get("content", {}) if isinstance(content, dict): + # New format: inlineRuns + inlineRuns = content.get("inlineRuns") + if inlineRuns and isinstance(inlineRuns, list): + texts.append(self._renderInlineRuns(inlineRuns)) + continue text = content.get("text", "") elif isinstance(content, str): text = content @@ -581,16 +758,18 @@ class RendererHtml(BaseRenderer): elif isinstance(el, str): texts.append(el) if texts: - # Join multiple paragraphs with

tags return '\n'.join(f'

{text}

' for text in texts) return "" elif isinstance(paragraphData, str): return f'

{paragraphData}

' elif isinstance(paragraphData, dict): - # Handle nested content structure: element.content vs element.text # Extract from nested content structure content = paragraphData.get("content", {}) if isinstance(content, dict): + # New format: inlineRuns + inlineRuns = content.get("inlineRuns") + if inlineRuns and isinstance(inlineRuns, list): + return f'

{self._renderInlineRuns(inlineRuns)}

' text = content.get("text", "") elif isinstance(content, str): text = content diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py index df2aff10..31537980 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPdf.py @@ -106,17 +106,17 @@ class RendererPdf(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """Render extracted JSON content to PDF format using AI-analyzed styling.""" try: if not REPORTLAB_AVAILABLE: # Fallback to HTML if reportlab not available from .rendererHtml import RendererHtml html_renderer = RendererHtml() - return await html_renderer.render(extractedContent, title, userPrompt, aiService) + return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style) # Generate PDF using AI-analyzed styling - pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) + pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style) # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} @@ -163,11 +163,28 @@ class RendererPdf(BaseRenderer): ) ] - async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str: """Generate PDF content from structured JSON document using AI-generated styling.""" try: - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(json_content, userPrompt, aiService) + # Get style set from unified style or legacy approach + if unifiedStyle: + styles = self._convertUnifiedStyleToInternal(unifiedStyle) + self._unifiedStyle = unifiedStyle + for level in range(1, 7): + hKey = f"heading{level}" + if hKey not in styles: + styles[hKey] = self._defaultHeadingStyleDef(level) + else: + styles[hKey].setdefault("space_after", 12) + styles[hKey].setdefault("space_before", 12) + styles["paragraph"].setdefault("space_after", 6) + styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2)) + styles["bullet_list"].setdefault("space_after", 3) + styles["code_block"].setdefault("space_after", 6) + styles["code_block"].setdefault("align", "left") + else: + styles = await self._getStyleSet(json_content, userPrompt, aiService) + self._unifiedStyle = None # Validate JSON structure if not self._validateJsonStructure(json_content): @@ -179,15 +196,13 @@ class RendererPdf(BaseRenderer): # Create a buffer to hold the PDF buffer = io.BytesIO() - # Create PDF document - doc = SimpleDocTemplate( - buffer, - pagesize=A4, - rightMargin=72, - leftMargin=72, - topMargin=72, - bottomMargin=18 - ) + # Create PDF document with unified page margins or defaults + pageCfg = unifiedStyle["page"] if unifiedStyle else None + if pageCfg: + m = pageCfg["marginsPt"] + doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"]) + else: + doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) # Build PDF content (no cover page — body starts on page 1; filename still uses `title`) story = [] @@ -609,6 +624,31 @@ class RendererPdf(BaseRenderer): .replace(">", ">") ) + def _renderInlineRunsToPdfXml(self, runs: list) -> str: + """Convert inline runs to ReportLab Paragraph XML.""" + parts = [] + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier" + for run in runs: + runType = run.get("type", "text") + value = self._escapeReportlabXml(run.get("value", "")) + if runType == "text": + parts.append(value) + elif runType == "bold": + parts.append(f"{value}") + elif runType == "italic": + parts.append(f"{value}") + elif runType == "code": + parts.append(f'{value}') + elif runType == "link": + href = self._escapeReportlabXml(run.get("href", "")) + parts.append(f'{value}') + elif runType == "image": + parts.append(f"[Image: {value}]") + else: + parts.append(value) + return "".join(parts) + def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str: """Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately).""" if not text: @@ -744,10 +784,10 @@ class RendererPdf(BaseRenderer): return [] headers = content.get("headers", []) rows = content.get("rows", []) - + if not headers or not rows: return [] - + numCols = len(headers) colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1) colWidths = [colWidth] * numCols @@ -755,8 +795,12 @@ class RendererPdf(BaseRenderer): hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header") cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell") - def _cellPara(val, ps): - return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps) + def _cellPara(cell, ps): + runs = self._inlineRunsForCell(cell) + if isinstance(cell, list): + xml = self._renderInlineRunsToPdfXml(runs) + return Paragraph(_wrapEmojiSpansInXml(xml), ps) + return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps) headerRow = [_cellPara(h, hdrPs) for h in headers] bodyRows = [] @@ -786,7 +830,7 @@ class RendererPdf(BaseRenderer): ] table.setStyle(TableStyle(table_style)) return [table, Spacer(1, 12)] - + except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return [] @@ -794,32 +838,29 @@ class RendererPdf(BaseRenderer): def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON bullet list to PDF elements using AI-generated styles.""" try: - # Extract from nested content structure content = list_data.get("content", {}) if not isinstance(content, dict): return [] items = content.get("items", []) - bullet_style_def = styles.get("bullet_list", {}) - + bulletStyleDef = styles.get("bullet_list", {}) + normalStyle = self._createNormalStyle(styles) + elements = [] for item in items: - if isinstance(item, str): - elements.append( - Paragraph(f"• {self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles)) - ) + runs = self._inlineRunsForListItem(item) + if isinstance(item, list): + xml = self._renderInlineRunsToPdfXml(runs) + elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle)) + elif isinstance(item, str): + elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle)) elif isinstance(item, dict) and "text" in item: - elements.append( - Paragraph( - f"• {self._markdownInlineToReportlabXml(item['text'])}", - self._createNormalStyle(styles), - ) - ) - + elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle)) + if elements: - elements.append(Spacer(1, bullet_style_def.get("space_after", 3))) - + elements.append(Spacer(1, bulletStyleDef.get("space_after", 3))) + return elements - + except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") return [] @@ -848,20 +889,27 @@ class RendererPdf(BaseRenderer): def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON paragraph to PDF elements using AI-generated styles.""" try: - # Extract from nested content structure content = paragraph_data.get("content", {}) - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - + if isinstance(content, str): + content = {"text": content} + if not isinstance(content, dict): + return [] + + normalStyle = self._createNormalStyle(styles) + + if "inlineRuns" in content: + runs = self._inlineRunsFromContent(content) + xml = self._renderInlineRunsToPdfXml(runs) + if xml: + return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)] + return [] + + text = content.get("text", "") if text: - return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))] - + return [self._paragraphFromInlineMarkdown(text, normalStyle)] + return [] - + except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") return [] diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py index 3bdff7f1..49ee8048 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererPptx.py @@ -59,7 +59,7 @@ class RendererPptx(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """ Render content as PowerPoint presentation from JSON data. @@ -68,7 +68,7 @@ class RendererPptx(BaseRenderer): title: Title for the presentation userPrompt: User prompt for AI styling aiService: AI service for styling - **kwargs: Additional rendering options + style: Unified style dict from pipeline (preferred over AI-generated styles) Returns: Base64-encoded PowerPoint presentation as string @@ -81,8 +81,19 @@ class RendererPptx(BaseRenderer): from pptx.dml.color import RGBColor import re - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(extractedContent, userPrompt, aiService) + # Get style set: prefer unified style, then metadata, then AI-enhanced + if style: + internalStyle = self._convertUnifiedStyleToInternal(style) + defaultPptx = self._getDefaultStyleSet() + for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"): + internalStyle[key] = defaultPptx.get(key) + internalStyle["heading"] = internalStyle["heading1"] + internalStyle["subheading"] = internalStyle["heading2"] + styles = internalStyle + self._unifiedStyle = style + else: + styles = await self._getStyleSet(extractedContent, userPrompt, aiService) + self._unifiedStyle = None # Create new presentation prs = Presentation() @@ -910,15 +921,17 @@ JSON ONLY. NO OTHER TEXT.""" # Extract from nested content structure content = paragraph_data.get("content", {}) if isinstance(content, dict): - text = content.get("text", "") + if content.get("inlineRuns"): + text = "".join(r.get("value", "") for r in content["inlineRuns"]) + else: + text = content.get("text", "") elif isinstance(content, str): text = content else: text = "" if text: - # Limit paragraph length based on content density - max_length = 200 # Default limit + max_length = 200 if len(text) > max_length: text = text[:max_length] + "..." @@ -1303,6 +1316,32 @@ JSON ONLY. NO OTHER TEXT.""" r.text = text[pos:] _applyBase(r) + def _renderInlineRunsPptx(self, runs, paragraph, fontSize=None, fontColor=None): + """Process InlineRun dicts into pptx text runs.""" + from pptx.util import Pt + paragraph.text = "" + us = getattr(self, '_unifiedStyle', None) + monoFont = us["fonts"]["monospace"] if us else "Courier New" + for run in runs: + runType = run.get("type", "text") + value = run.get("value", "") + r = paragraph.add_run() + r.text = value + if fontSize: + r.font.size = fontSize + if fontColor: + r.font.color.rgb = fontColor + if runType == "bold": + r.font.bold = True + elif runType == "italic": + r.font.italic = True + elif runType == "code": + r.font.name = monoFont + if fontSize and hasattr(fontSize, 'pt'): + r.font.size = Pt(max(8, int(fontSize.pt * 0.85))) + elif runType == "link": + r.font.underline = True + def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None: """Add a PowerPoint table to slide.""" try: @@ -1374,7 +1413,8 @@ JSON ONLY. NO OTHER TEXT.""" cell = table.cell(0, col_idx) # Clear existing text and set new text cell.text_frame.clear() - header_text = str(header) if header else "" + cellRuns = self._inlineRunsForCell(header) + header_text = "".join(r.get("value", "") for r in cellRuns) cell.text = header_text # Ensure paragraph exists @@ -1420,7 +1460,8 @@ JSON ONLY. NO OTHER TEXT.""" cell = table.cell(row_idx, col_idx) # Clear existing text and set new text cell.text_frame.clear() - cell_text = str(cell_data) if cell_data is not None else "" + cellRuns = self._inlineRunsForCell(cell_data) + cell_text = "".join(r.get("value", "") for r in cellRuns) cell.text = cell_text # Ensure paragraph exists @@ -1462,9 +1503,8 @@ JSON ONLY. NO OTHER TEXT.""" fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47)))) for item in items: - itemText = item.get("text", "") if isinstance(item, dict) else str(item) - if not itemText or not itemText.strip(): - continue + runs = self._inlineRunsForListItem(item) + isNewFormat = isinstance(item, list) p = text_frame.add_paragraph() p.level = 0 @@ -1472,21 +1512,33 @@ JSON ONLY. NO OTHER TEXT.""" p.space_before = Pt(2) p.space_after = Pt(2) - # Consistent bullet prefix - self._addMarkdownInlineRuns(p, f" • {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) + if isNewFormat: + bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs + self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor) + else: + itemText = item.get("text", "") if isinstance(item, dict) else str(item) + if not itemText or not itemText.strip(): + continue + self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) - # Subitems + # Subitems (only for dict-style items) if isinstance(item, dict): for sub in item.get("subitems", []): - subText = sub.get("text", "") if isinstance(sub, dict) else str(sub) - if not subText: - continue + subRuns = self._inlineRunsForListItem(sub) + isSubNew = isinstance(sub, list) sp = text_frame.add_paragraph() sp.level = 0 sp.alignment = PP_ALIGN.LEFT sp.space_before = Pt(1) sp.space_after = Pt(1) - self._addMarkdownInlineRuns(sp, f" – {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) + if isSubNew: + subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns + self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor) + else: + subText = sub.get("text", "") if isinstance(sub, dict) else str(sub) + if not subText: + continue + self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") @@ -1540,42 +1592,53 @@ JSON ONLY. NO OTHER TEXT.""" # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): + inlineRuns = self._inlineRunsFromContent(content) + hasInlineRuns = content.get("inlineRuns") is not None text = content.get("text", "") elif isinstance(content, str): text = content + inlineRuns = [{"type": "text", "value": text}] if text else [] + hasInlineRuns = False else: text = "" + inlineRuns = [] + hasInlineRuns = False - if text: - p = text_frame.add_paragraph() - p.level = 0 - - try: - if hasattr(p, 'paragraph_format'): - p.paragraph_format.bullet.type = None - except (AttributeError, TypeError): - pass - - paragraph_style = styles.get("paragraph", {}) - base_font_size = paragraph_style.get("font_size", 14) - calculated_size = max(10, int(base_font_size * font_size_multiplier)) - fSize = Pt(calculated_size) - fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) - fBold = paragraph_style.get("bold", False) + if not inlineRuns and not text: + return + + p = text_frame.add_paragraph() + p.level = 0 + + try: + if hasattr(p, 'paragraph_format'): + p.paragraph_format.bullet.type = None + except (AttributeError, TypeError): + pass + + paragraph_style = styles.get("paragraph", {}) + base_font_size = paragraph_style.get("font_size", 14) + calculated_size = max(10, int(base_font_size * font_size_multiplier)) + fSize = Pt(calculated_size) + fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + fBold = paragraph_style.get("bold", False) + + if hasInlineRuns: + self._renderInlineRunsPptx(inlineRuns, p, fontSize=fSize, fontColor=fColor) + else: self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold) - - # Add proper spacing - p.space_before = Pt(6) # Space before paragraph - p.space_after = Pt(6) # Space after paragraph - p.line_spacing = 1.2 # Line spacing for readability - - align = paragraph_style.get("align", "left") - if align == "center": - p.alignment = PP_ALIGN.CENTER - elif align == "right": - p.alignment = PP_ALIGN.RIGHT - else: - p.alignment = PP_ALIGN.LEFT + + p.space_before = Pt(6) + p.space_after = Pt(6) + p.line_spacing = 1.2 + + align = paragraph_style.get("align", "left") + if align == "center": + p.alignment = PP_ALIGN.CENTER + elif align == "right": + p.alignment = PP_ALIGN.RIGHT + else: + p.alignment = PP_ALIGN.LEFT except Exception as e: logger.warning(f"Error adding paragraph to slide: {str(e)}") diff --git a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py index 79f5688c..3c6fdd5e 100644 --- a/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/serviceCenter/services/serviceGeneration/renderers/rendererXlsx.py @@ -68,17 +68,17 @@ class RendererXlsx(BaseRenderer): from modules.datamodels.datamodelJson import supportedSectionTypes return list(supportedSectionTypes) - async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available from .rendererCsv import RendererCsv csvRenderer = RendererCsv() - return await csvRenderer.render(extractedContent, title, userPrompt, aiService) + return await csvRenderer.render(extractedContent, title, userPrompt, aiService, style=style) # Generate Excel using AI-analyzed styling - excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) + excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService, style=style) # Extract metadata for document type and other info metadata = extractedContent.get("metadata", {}) if extractedContent else {} @@ -298,15 +298,22 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate analysis sheet: {str(e)}") - async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: + async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: # Debug output self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER") - # Get style set: use styles from metadata if available, otherwise enhance with AI - styles = await self._getStyleSet(jsonContent, userPrompt, aiService) + # Store unified style for use by inline-run helpers + self._unifiedStyle = style + + # Get style set: prefer unified style, fall back to legacy approach + if style: + styles = self._convertUnifiedStyleToInternal(style) + styles = self._convertColorsFormat(styles) + else: + styles = await self._getStyleSet(jsonContent, userPrompt, aiService) # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) if not self._validateJsonStructure(jsonContent): @@ -511,6 +518,10 @@ class RendererXlsx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"} } + def _renderInlineRuns(self, runs: list) -> str: + """Flatten inline runs to plain text for Excel cells.""" + return "".join(r.get("value", "") for r in runs) + async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: """Get AI styles with proper Excel color conversion.""" if not aiService: @@ -1206,7 +1217,9 @@ class RendererXlsx(BaseRenderer): # Add headers with formatting - OPTIMIZED: use cached style objects for col, header in enumerate(headers, 1): - sanitized_header = self._sanitizeCellValue(header) + runs = self._inlineRunsForCell(header) + headerText = self._renderInlineRuns(runs) + sanitized_header = self._sanitizeCellValue(headerText) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) # Apply styling with fallbacks - use pre-calculated objects @@ -1272,7 +1285,9 @@ class RendererXlsx(BaseRenderer): cell_values = cell_values[:header_count] for col, cell_value in enumerate(cell_values, 1): - sanitized_value = self._sanitizeCellValue(cell_value) + runs = self._inlineRunsForCell(cell_value) + cellText = self._renderInlineRuns(runs) + sanitized_value = self._sanitizeCellValue(cellText) cell = sheet.cell(row=startRow, column=col, value=sanitized_value) # Apply styling with fallbacks - use pre-calculated objects @@ -1311,20 +1326,20 @@ class RendererXlsx(BaseRenderer): def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a list element to Excel sheet. Expects nested content structure.""" try: - # Extract from nested content structure content = element.get("content", {}) if not isinstance(content, dict): return startRow - list_items = content.get("items") or [] - # Ensure list_items is a list - if not isinstance(list_items, list): - list_items = [] + listItems = content.get("items") or [] + if not isinstance(listItems, list): + listItems = [] - list_style = styles.get("bullet_list", {}) - for item in list_items: - sheet.cell(row=startRow, column=1, value=f"• {item}") - if list_style.get("color"): - sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"])) + listStyle = styles.get("bullet_list", {}) + for item in listItems: + runs = self._inlineRunsForListItem(item) + text = self._renderInlineRuns(runs) + sheet.cell(row=startRow, column=1, value=f"\u2022 {text}") + if listStyle.get("color"): + sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"])) startRow += 1 return startRow @@ -1336,10 +1351,10 @@ class RendererXlsx(BaseRenderer): def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a paragraph element to Excel sheet. Expects nested content structure.""" try: - # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): - text = content.get("text", "") + runs = self._inlineRunsFromContent(content) + text = self._renderInlineRuns(runs) elif isinstance(content, str): text = content else: diff --git a/modules/serviceCenter/services/serviceGeneration/styleDefaults.py b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py new file mode 100644 index 00000000..b5a92641 --- /dev/null +++ b/modules/serviceCenter/services/serviceGeneration/styleDefaults.py @@ -0,0 +1,75 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Default style definitions and style resolution for document rendering.""" + +from typing import Any, Dict + + +DEFAULT_STYLE: Dict[str, Any] = { + "fonts": { + "primary": "Calibri", + "monospace": "Consolas", + }, + "colors": { + "primary": "#1F3864", + "secondary": "#2C3E50", + "accent": "#2980B9", + "background": "#FFFFFF", + }, + "headings": { + "h1": {"sizePt": 24, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 12, "spaceAfterPt": 6}, + "h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 10, "spaceAfterPt": 4}, + "h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 8, "spaceAfterPt": 3}, + "h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 6, "spaceAfterPt": 2}, + }, + "paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"}, + "table": { + "headerBg": "#1F3864", + "headerFg": "#FFFFFF", + "headerSizePt": 10, + "bodySizePt": 10, + "rowBandingEven": "#F2F6FC", + "rowBandingOdd": "#FFFFFF", + "borderColor": "#CBD5E1", + "borderWidthPt": 0.5, + }, + "list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11}, + "image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"}, + "codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"}, + "page": { + "format": "A4", + "marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60}, + "showPageNumbers": True, + "headerHeight": 30, + "footerHeight": 30, + "headerLogo": None, + "headerText": "", + "footerText": "", + }, +} + + +def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: + """Recursively merge override into base. Both dicts left unchanged; returns new dict.""" + result = {} + for key in base: + if key in override: + baseVal = base[key] + overVal = override[key] + if isinstance(baseVal, dict) and isinstance(overVal, dict): + result[key] = _deepMerge(baseVal, overVal) + else: + result[key] = overVal + else: + result[key] = base[key] + for key in override: + if key not in base: + result[key] = override[key] + return result + + +def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]: + """Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict.""" + if not agentStyle: + return dict(DEFAULT_STYLE) + return _deepMerge(DEFAULT_STYLE, agentStyle) diff --git a/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py b/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py index 8a3e7cea..594fbe02 100644 --- a/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py +++ b/modules/serviceCenter/services/serviceGeneration/subDocumentUtility.py @@ -9,11 +9,70 @@ from typing import Any, Dict logger = logging.getLogger(__name__) +def _parseInlineRuns(text: str) -> list: + """ + Parse inline markdown formatting into a list of InlineRun dicts. + Handles: images, links, bold, italic, inline code, plain text. + Uses a regex-based tokenizer that processes tokens left-to-right. + """ + if not text: + return [{"type": "text", "value": ""}] + + # Pattern order matters: images before links, bold before italic + _TOKEN_RE = re.compile( + r'!\[(?P[^\]]*)\]\((?P[^)"]+)(?:\s+"(?P\d+)pt")?\)' # image + r'|\[(?P[^\]]+)\]\((?P[^)]+)\)' # link + r'|`(?P[^`]+)`' # inline code + r'|\*\*(?P.+?)\*\*' # bold + r'|(?.+?)\*(?!\w)' # italic *x* + r'|(?.+?)_(?!\w)' # italic _x_ + ) + + runs = [] + lastEnd = 0 + + for m in _TOKEN_RE.finditer(text): + # Plain text before this match + if m.start() > lastEnd: + runs.append({"type": "text", "value": text[lastEnd:m.start()]}) + + if m.group("imgAlt") is not None or m.group("imgSrc") is not None: + alt = (m.group("imgAlt") or "").strip() or "Image" + src = (m.group("imgSrc") or "").strip() + widthStr = m.group("imgWidth") + run = {"type": "image", "value": alt} + if src.startswith("file:"): + run["fileId"] = src[5:] + else: + run["href"] = src + if widthStr: + run["widthPt"] = int(widthStr) + runs.append(run) + elif m.group("linkText") is not None: + runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")}) + elif m.group("code") is not None: + runs.append({"type": "code", "value": m.group("code")}) + elif m.group("bold") is not None: + runs.append({"type": "bold", "value": m.group("bold")}) + elif m.group("italic1") is not None: + runs.append({"type": "italic", "value": m.group("italic1")}) + elif m.group("italic2") is not None: + runs.append({"type": "italic", "value": m.group("italic2")}) + + lastEnd = m.end() + + # Trailing plain text + if lastEnd < len(text): + runs.append({"type": "text", "value": text[lastEnd:]}) + + return runs if runs else [{"type": "text", "value": text}] + + def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: """ - Convert markdown content to the standard document JSON format expected by renderReport. - Supports headings, code blocks, tables, lists, images (file: refs), paragraphs. - For plain text: wraps entire content in a single paragraph section. + Convert markdown content to the standard document JSON format with Inline-Run model. + Sections use inlineRuns (list of run dicts) instead of plain text strings. + Supports headings, code blocks, tables, lists, images, paragraphs. """ if not isinstance(markdown, str): markdown = str(markdown) if markdown else "" @@ -31,7 +90,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D while i < len(lines): line = lines[i] - # Headings + # Headings (plain text, no inline formatting) headingMatch = re.match(r"^(#{1,6})\s+(.+)", line) if headingMatch: level = len(headingMatch.group(1)) @@ -43,7 +102,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D i += 1 continue - # Fenced code blocks + # Fenced code blocks (no inline formatting) codeMatch = re.match(r"^```(\w*)", line) if codeMatch: lang = codeMatch.group(1) or "text" @@ -59,14 +118,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D }) continue - # Tables + # Tables - cells are List[InlineRun] tableMatch = re.match(r"^\|(.+)\|$", line) if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]): - headerCells = [c.strip() for c in tableMatch.group(1).split("|")] + headerCells = [_parseInlineRuns(c.strip()) for c in tableMatch.group(1).split("|")] i += 2 rows = [] while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]): - rowCells = [c.strip() for c in lines[i][1:-1].split("|")] + rowCells = [_parseInlineRuns(c.strip()) for c in lines[i][1:-1].split("|")] rows.append(rowCells) i += 1 sections.append({ @@ -75,14 +134,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D }) continue - # Bullet / numbered lists + # Bullet / numbered lists - items are List[List[InlineRun]] listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line) if listMatch: isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2))) items = [] while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]): m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]) - items.append({"text": m.group(3).strip()}) + items.append(_parseInlineRuns(m.group(3).strip())) i += 1 sections.append({ "id": _nextId(), "content_type": "bullet_list", "order": order, @@ -95,46 +154,50 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D i += 1 continue - # Images (simplified: store as paragraph with ref for now - full resolution needs Knowledge Store) - imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)]+)\)", line) + # Standalone image on its own line -> block-level image section + imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", line) if imgMatch: altText = imgMatch.group(1).strip() or "Image" src = imgMatch.group(2).strip() + widthStr = imgMatch.group(3) fileId = src[5:] if src.startswith("file:") else "" + content = { + "altText": altText, + "base64Data": "", + "_fileRef": fileId, + "_srcUrl": src if not fileId else "", + } + if widthStr: + content["widthPt"] = int(widthStr) sections.append({ "id": _nextId(), "content_type": "image", "order": order, - "elements": [{ - "content": { - "altText": altText, - "base64Data": "", - "_fileRef": fileId, - "_srcUrl": src if not fileId else "", - } - }], + "elements": [{"content": content}], }) i += 1 continue - # Paragraph + # Paragraph - produces inlineRuns paraLines = [] while i < len(lines) and lines[i].strip() and not re.match( - r"^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )", lines[i] + r"^(#{1,6}\s|```|\|.+\||!\[[^\]]*\]\([^)]+\)\s*$|(\s*)([-*+]|\d+[.)]) )", lines[i] ): paraLines.append(lines[i]) i += 1 if paraLines: + combinedText = " ".join(paraLines) sections.append({ "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": " ".join(paraLines)}}], + "elements": [{"content": {"inlineRuns": _parseInlineRuns(combinedText)}}], }) continue i += 1 if not sections: + fallbackText = markdown.strip() or "(empty)" sections.append({ "id": _nextId(), "content_type": "paragraph", "order": order, - "elements": [{"content": {"text": markdown.strip() or "(empty)"}}], + "elements": [{"content": {"inlineRuns": _parseInlineRuns(fallbackText)}}], }) return { diff --git a/modules/workflows/methods/methodAi/_common.py b/modules/workflows/methods/methodAi/_common.py new file mode 100644 index 00000000..9e77d431 --- /dev/null +++ b/modules/workflows/methods/methodAi/_common.py @@ -0,0 +1,18 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. + +"""Shared helpers for AI workflow actions.""" + + +def applyCommonAiParams(parameters: dict, request) -> None: + """Apply common AI parameters (requireNeutralization, allowedModels) from node to request.""" + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + request.requireNeutralization = bool(requireNeutralization) + + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + if not request.options: + from modules.datamodels.datamodelAi import AiCallOptions + request.options = AiCallOptions() + request.options.allowedModels = allowedModels diff --git a/modules/workflows/methods/methodAi/actions/consolidate.py b/modules/workflows/methods/methodAi/actions/consolidate.py index fa622507..7483507e 100644 --- a/modules/workflows/methods/methodAi/actions/consolidate.py +++ b/modules/workflows/methods/methodAi/actions/consolidate.py @@ -67,6 +67,8 @@ async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult: prompt=prompt, options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE), ) + from modules.workflows.methods.methodAi._common import applyCommonAiParams + applyCommonAiParams(parameters, req) resp = await ai_service.callAi(req) except (SubscriptionInactiveException, BillingContextError): raise diff --git a/modules/workflows/methods/methodAi/actions/convertDocument.py b/modules/workflows/methods/methodAi/actions/convertDocument.py index 39d6e16f..b2ed908b 100644 --- a/modules/workflows/methods/methodAi/actions/convertDocument.py +++ b/modules/workflows/methods/methodAi/actions/convertDocument.py @@ -36,6 +36,10 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult: } if parentOperationId: processParams["parentOperationId"] = parentOperationId + if parameters.get("allowedModels"): + processParams["allowedModels"] = parameters["allowedModels"] + if parameters.get("requireNeutralization") is not None: + processParams["requireNeutralization"] = parameters["requireNeutralization"] return await self.process(processParams) diff --git a/modules/workflows/methods/methodAi/actions/generateCode.py b/modules/workflows/methods/methodAi/actions/generateCode.py index 313057a0..5ec6b51d 100644 --- a/modules/workflows/methods/methodAi/actions/generateCode.py +++ b/modules/workflows/methods/methodAi/actions/generateCode.py @@ -55,6 +55,16 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult: processingMode=ProcessingModeEnum.DETAILED ) + # Apply node-level AI params + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + options.allowedModels = allowedModels + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + _ctx = getattr(self.services, '_context', None) + if _ctx: + _ctx.requireNeutralization = bool(requireNeutralization) + # outputFormat: Optional - if None, formats determined from prompt by AI aiResponse: AiResponse = await self.services.ai.callAiContent( prompt=prompt, diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 0709b924..18c158c1 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -59,6 +59,16 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: compressContext=False ) + # Apply node-level AI params + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + options.allowedModels = allowedModels + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + _ctx = getattr(self.services, '_context', None) + if _ctx: + _ctx.requireNeutralization = bool(requireNeutralization) + # outputFormat: Optional - if None, formats determined from prompt by AI aiResponse: AiResponse = await self.services.ai.callAiContent( prompt=prompt, diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 63e0f33e..d82ac4f7 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -212,6 +212,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: ) ) + from modules.workflows.methods.methodAi._common import applyCommonAiParams + applyCommonAiParams(parameters, request) + aiResponse_obj = await self.services.ai.callAi(request) # Convert AiCallResponse to AiResponse format @@ -243,6 +246,16 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE ) + # Apply node-level AI params (allowedModels, requireNeutralization) + allowedModels = parameters.get("allowedModels") + if allowedModels and isinstance(allowedModels, list): + options.allowedModels = allowedModels + requireNeutralization = parameters.get("requireNeutralization") + if requireNeutralization is not None: + _ctx = getattr(self.services, '_context', None) + if _ctx: + _ctx.requireNeutralization = bool(requireNeutralization) + # Get generationIntent from parameters (required for DATA_GENERATE) # Default to "document" if not provided (most common use case) # For code generation, use ai.generateCode action or explicitly pass generationIntent="code" diff --git a/modules/workflows/methods/methodAi/actions/summarizeDocument.py b/modules/workflows/methods/methodAi/actions/summarizeDocument.py index e32c1965..4c2bb2bc 100644 --- a/modules/workflows/methods/methodAi/actions/summarizeDocument.py +++ b/modules/workflows/methods/methodAi/actions/summarizeDocument.py @@ -39,6 +39,10 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult: } if parentOperationId: processParams["parentOperationId"] = parentOperationId + if parameters.get("allowedModels"): + processParams["allowedModels"] = parameters["allowedModels"] + if parameters.get("requireNeutralization") is not None: + processParams["requireNeutralization"] = parameters["requireNeutralization"] return await self.process(processParams) diff --git a/modules/workflows/methods/methodAi/actions/translateDocument.py b/modules/workflows/methods/methodAi/actions/translateDocument.py index bb6f8437..dc0533a9 100644 --- a/modules/workflows/methods/methodAi/actions/translateDocument.py +++ b/modules/workflows/methods/methodAi/actions/translateDocument.py @@ -41,6 +41,10 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult: processParams["resultType"] = resultType if parentOperationId: processParams["parentOperationId"] = parentOperationId + if parameters.get("allowedModels"): + processParams["allowedModels"] = parameters["allowedModels"] + if parameters.get("requireNeutralization") is not None: + processParams["requireNeutralization"] = parameters["requireNeutralization"] return await self.process(processParams) diff --git a/tests/serviceAi/__init__.py b/tests/serviceAi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/serviceAi/test_allowed_models_whitelist.py b/tests/serviceAi/test_allowed_models_whitelist.py new file mode 100644 index 00000000..4593afd9 --- /dev/null +++ b/tests/serviceAi/test_allowed_models_whitelist.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.datamodels.datamodelAi import AiCallOptions + + +def test_allowed_models_field_exists(): + opts = AiCallOptions(allowedModels=["gpt-5-mini", "claude-4-7-opus"]) + assert opts.allowedModels == ["gpt-5-mini", "claude-4-7-opus"] + + +def test_allowed_models_default_none(): + opts = AiCallOptions() + assert opts.allowedModels is None diff --git a/tests/serviceGeneration/__init__.py b/tests/serviceGeneration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/serviceGeneration/test_inline_image_paragraph.py b/tests/serviceGeneration/test_inline_image_paragraph.py new file mode 100644 index 00000000..be0c5d19 --- /dev/null +++ b/tests/serviceGeneration/test_inline_image_paragraph.py @@ -0,0 +1,23 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson + + +def test_inline_image_in_paragraph(): + md = "Results show ![chart](file:abc \"200pt\") clearly." + result = markdownToDocumentJson(md, "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + types = [r["type"] for r in runs] + assert "text" in types + assert "image" in types + imgRun = next(r for r in runs if r["type"] == "image") + assert imgRun.get("fileId") == "abc" + + +def test_multiple_inline_images(): + md = "A ![x](file:1) B ![y](file:2) C" + result = markdownToDocumentJson(md, "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + images = [r for r in runs if r["type"] == "image"] + assert len(images) == 2 diff --git a/tests/serviceGeneration/test_md_to_json_consolidation.py b/tests/serviceGeneration/test_md_to_json_consolidation.py new file mode 100644 index 00000000..83118374 --- /dev/null +++ b/tests/serviceGeneration/test_md_to_json_consolidation.py @@ -0,0 +1,71 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson + + +def test_basic_paragraph(): + result = markdownToDocumentJson("Hello world", "Test") + doc = result["documents"][0] + section = doc["sections"][0] + assert section["content_type"] == "paragraph" + assert section["elements"][0]["content"]["inlineRuns"][0] == {"type": "text", "value": "Hello world"} + + +def test_inline_bold(): + result = markdownToDocumentJson("This is **bold** text", "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + assert any(r["type"] == "bold" and r["value"] == "bold" for r in runs) + + +def test_inline_image(): + result = markdownToDocumentJson("Text ![logo](file:abc123) more", "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + assert any(r["type"] == "image" and r.get("fileId") == "abc123" for r in runs) + + +def test_inline_link(): + result = markdownToDocumentJson("Click [here](https://example.com)", "Test") + runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"] + assert any(r["type"] == "link" and r.get("href") == "https://example.com" for r in runs) + + +def test_table_cells_are_inline_runs(): + md = "| A | B |\n| --- | --- |\n| **x** | y |" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "table" + rows = section["elements"][0]["content"]["rows"] + assert isinstance(rows[0][0], list) + + +def test_bullet_list_inline_runs(): + md = "- Item **one**\n- Item two" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "bullet_list" + items = section["elements"][0]["content"]["items"] + assert isinstance(items[0], list) + + +def test_standalone_image_block(): + md = "![Big chart](file:chart123)" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "image" + + +def test_heading_unchanged(): + result = markdownToDocumentJson("# Title", "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "heading" + assert section["elements"][0]["content"]["text"] == "Title" + assert section["elements"][0]["content"]["level"] == 1 + + +def test_code_block_unchanged(): + md = "```python\nprint('hi')\n```" + result = markdownToDocumentJson(md, "Test") + section = result["documents"][0]["sections"][0] + assert section["content_type"] == "code_block" + assert section["elements"][0]["content"]["code"] == "print('hi')" diff --git a/tests/serviceGeneration/test_style_resolver.py b/tests/serviceGeneration/test_style_resolver.py new file mode 100644 index 00000000..6b2b649a --- /dev/null +++ b/tests/serviceGeneration/test_style_resolver.py @@ -0,0 +1,39 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +import pytest +from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE + + +def test_resolve_none_returns_defaults(): + result = resolveStyle(None) + assert result == DEFAULT_STYLE + + +def test_resolve_empty_returns_defaults(): + result = resolveStyle({}) + assert result == DEFAULT_STYLE + + +def test_override_single_color(): + result = resolveStyle({"colors": {"primary": "#FF0000"}}) + assert result["colors"]["primary"] == "#FF0000" + assert result["colors"]["secondary"] == DEFAULT_STYLE["colors"]["secondary"] + + +def test_override_nested_heading(): + result = resolveStyle({"headings": {"h1": {"sizePt": 30}}}) + assert result["headings"]["h1"]["sizePt"] == 30 + assert result["headings"]["h1"]["weight"] == "bold" + + +def test_override_font(): + result = resolveStyle({"fonts": {"primary": "Arial"}}) + assert result["fonts"]["primary"] == "Arial" + assert result["fonts"]["monospace"] == "Consolas" + + +def test_full_style_passthrough(): + custom = {"fonts": {"primary": "Helvetica", "monospace": "Monaco"}} + result = resolveStyle(custom) + assert result["fonts"]["primary"] == "Helvetica" + assert result["fonts"]["monospace"] == "Monaco"