plan d implemented - generationn styles

This commit is contained in:
ValueOn AG 2026-04-29 23:12:46 +02:00
parent b12671bbb5
commit afd7e9d941
32 changed files with 1411 additions and 485 deletions

View file

@ -162,6 +162,7 @@ class AiCallOptions(BaseModel):
# Provider filtering (from UI multiselect or automation config) # Provider filtering (from UI multiselect or automation config)
allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)") allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)")
allowedModels: Optional[List[str]] = Field(default=None, description="Whitelist of allowed model names (AND-filter with allowedProviders). None/empty = all allowed.")
class AiCallRequest(BaseModel): class AiCallRequest(BaseModel):

View file

@ -6,7 +6,7 @@ Unified JSON document schema and helpers used by both generation prompts and ren
This defines a single canonical template and the supported section types. This defines a single canonical template and the supported section types.
""" """
from typing import List from typing import List, Literal, TypedDict
# Canonical list of supported section types across the system # Canonical list of supported section types across the system
supportedSectionTypes: List[str] = [ supportedSectionTypes: List[str] = [
@ -18,6 +18,21 @@ supportedSectionTypes: List[str] = [
"image", "image",
] ]
class InlineRun(TypedDict, total=False):
"""Single inline content run. Every paragraph/cell/list-item is a List[InlineRun]."""
type: Literal["text", "image", "link", "bold", "italic", "code"]
value: str # text content (for text/bold/italic/code/link-label)
fileId: str # for type=image: reference to FileItem
base64Data: str # for type=image: resolved base64 (post-processing)
mimeType: str # for type=image: e.g. "image/png"
widthPt: int # for type=image: optional render width
href: str # for type=link: URL target
supportedInlineRunTypes: List[str] = [
"text", "image", "link", "bold", "italic", "code",
]
# Canonical JSON template used for AI generation (documents array + sections) # Canonical JSON template used for AI generation (documents array + sections)
# This template is used for STRUCTURE generation - sections have empty elements arrays. # This template is used for STRUCTURE generation - sections have empty elements arrays.
# For content generation, elements arrays will be populated later. # For content generation, elements arrays will be populated later.

View file

@ -3,6 +3,15 @@
from modules.shared.i18nRegistry import t from modules.shared.i18nRegistry import t
_AI_COMMON_PARAMS = [
{"name": "requireNeutralization", "type": "boolean", "required": False,
"frontendType": "checkbox", "default": False,
"description": t("Eingaben fuer diesen Call neutralisieren")},
{"name": "allowedModels", "type": "array", "required": False,
"frontendType": "modelMultiSelect", "default": [],
"description": t("Erlaubte LLM-Modelle (leer = alle erlaubten)")},
]
AI_NODES = [ AI_NODES = [
{ {
"id": "ai.prompt", "id": "ai.prompt",
@ -19,7 +28,7 @@ AI_NODES = [
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
{"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox", {"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox",
"description": t("Einfacher Modus"), "default": True}, "description": t("Einfacher Modus"), "default": True},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": [ "inputPorts": {0: {"accepts": [
@ -38,7 +47,7 @@ AI_NODES = [
"parameters": [ "parameters": [
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea", {"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
"description": t("Recherche-Anfrage")}, "description": t("Recherche-Anfrage")},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
@ -58,7 +67,7 @@ AI_NODES = [
{"name": "summaryLength", "type": "string", "required": False, "frontendType": "select", {"name": "summaryLength", "type": "string", "required": False, "frontendType": "select",
"frontendOptions": {"options": ["brief", "medium", "detailed"]}, "frontendOptions": {"options": ["brief", "medium", "detailed"]},
"description": t("Kurz, mittel oder ausführlich"), "default": "medium"}, "description": t("Kurz, mittel oder ausführlich"), "default": "medium"},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
@ -77,7 +86,7 @@ AI_NODES = [
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""}, "description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
{"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text", {"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text",
"description": t("Zielsprache (z.B. de, en, French)")}, "description": t("Zielsprache (z.B. de, en, French)")},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
@ -97,7 +106,7 @@ AI_NODES = [
{"name": "targetFormat", "type": "string", "required": True, "frontendType": "select", {"name": "targetFormat", "type": "string", "required": True, "frontendType": "select",
"frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]}, "frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]},
"description": t("Zielformat")}, "description": t("Zielformat")},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}}, "inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
@ -114,7 +123,7 @@ AI_NODES = [
"parameters": [ "parameters": [
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea", {"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
"description": t("Generierungs-Prompt")}, "description": t("Generierungs-Prompt")},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
@ -134,7 +143,7 @@ AI_NODES = [
{"name": "resultType", "type": "string", "required": False, "frontendType": "select", {"name": "resultType", "type": "string", "required": False, "frontendType": "select",
"frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]}, "frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]},
"description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"}, "description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["Transit"]}}, "inputPorts": {0: {"accepts": ["Transit"]}},
@ -154,7 +163,7 @@ AI_NODES = [
"description": t("Konsolidierungsmodus"), "default": "summarize"}, "description": t("Konsolidierungsmodus"), "default": "summarize"},
{"name": "prompt", "type": "string", "required": False, "frontendType": "textarea", {"name": "prompt", "type": "string", "required": False, "frontendType": "textarea",
"description": t("Optionaler Prompt für die Konsolidierung"), "default": ""}, "description": t("Optionaler Prompt für die Konsolidierung"), "default": ""},
], ] + _AI_COMMON_PARAMS,
"inputs": 1, "inputs": 1,
"outputs": 1, "outputs": 1,
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}}, "inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},

View file

@ -361,6 +361,17 @@ QUICK_ACTIONS = [
# The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows. # The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows.
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_FINANCE_STYLE_HINT = (
"\n\nWenn du ein Dokument erstellst, verwende einen professionellen Finanz-Stil:\n"
"- Schriftart: Calibri\n"
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
"- Akzentfarbe: #2980B9\n"
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
"- Konservatives, seriöses Layout\n"
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
)
def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]: def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
"""Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt.""" """Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt."""
return { return {
@ -370,8 +381,9 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
"parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}}, "parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}},
{"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process", {"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process",
"parameters": { "parameters": {
"aiPrompt": prompt, "aiPrompt": prompt + _FINANCE_STYLE_HINT,
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
"requireNeutralization": True,
"simpleMode": False, "simpleMode": False,
}, "position": {"x": 500, "y": 0}}, }, "position": {"x": 500, "y": 0}},
], ],
@ -454,10 +466,19 @@ TEMPLATE_WORKFLOWS = [
"Verwende die uebergebene Budget-Datei als Soll-Quelle und die im " "Verwende die uebergebene Budget-Datei als Soll-Quelle und die im "
"Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n" "Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n"
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN " "WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
"Uebersichts-Chart ueber alle Konten ist gewuenscht." "Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
"verwende einen professionellen Finanz-Stil:\n"
"- Schriftart: Calibri\n"
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
"- Akzentfarbe: #2980B9\n"
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
"- Konservatives, seriöses Layout\n"
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
), ),
"resultType": "xlsx", "resultType": "xlsx",
"documentTheme": "finance", "documentTheme": "finance",
"requireNeutralization": True,
"documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]}, "documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]},
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]}, "context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
"simpleMode": False, "simpleMode": False,

View file

@ -2,8 +2,8 @@
# All rights reserved. # All rights reserved.
"""Workspace feature data models — WorkspaceUserSettings.""" """Workspace feature data models — WorkspaceUserSettings."""
from typing import Optional from typing import List, Optional
from pydantic import BaseModel, Field from pydantic import Field
from modules.datamodels.datamodelBase import PowerOnModel from modules.datamodels.datamodelBase import PowerOnModel
from modules.shared.i18nRegistry import i18nModel from modules.shared.i18nRegistry import i18nModel
import uuid import uuid
@ -52,3 +52,18 @@ class WorkspaceUserSettings(PowerOnModel):
description="Max agent rounds override (None = instance default)", description="Max agent rounds override (None = instance default)",
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False}, json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
) )
requireNeutralization: bool = Field(
default=False,
description="Default neutralization setting for this user",
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
)
allowedProviders: List[str] = Field(
default_factory=list,
description="Allowed AI providers (empty = all permitted by RBAC)",
json_schema_extra={"label": "Erlaubte Provider", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False},
)
allowedModels: List[str] = Field(
default_factory=list,
description="Allowed AI models (empty = all permitted)",
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
)

View file

@ -110,6 +110,7 @@ class WorkspaceInputRequest(BaseModel):
workflowId: Optional[str] = Field(default=None, description="Continue existing workflow") workflowId: Optional[str] = Field(default=None, description="Continue existing workflow")
userLanguage: str = Field(default="en", description="User language code") userLanguage: str = Field(default="en", description="User language code")
allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers") allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers")
allowedModels: List[str] = Field(default_factory=list, description="Restrict AI to these models")
requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override") requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override")
@ -635,6 +636,7 @@ async def streamWorkspaceStart(
userLanguage=userInput.userLanguage, userLanguage=userInput.userLanguage,
instanceConfig=instanceConfig, instanceConfig=instanceConfig,
allowedProviders=userInput.allowedProviders, allowedProviders=userInput.allowedProviders,
allowedModels=userInput.allowedModels,
requireNeutralization=userInput.requireNeutralization, requireNeutralization=userInput.requireNeutralization,
billingFeatureCode=wsBillingFeatureCode, billingFeatureCode=wsBillingFeatureCode,
) )
@ -692,6 +694,7 @@ async def _runWorkspaceAgent(
userLanguage: str = "en", userLanguage: str = "en",
instanceConfig: Dict[str, Any] = None, instanceConfig: Dict[str, Any] = None,
allowedProviders: List[str] = None, allowedProviders: List[str] = None,
allowedModels: List[str] = None,
requireNeutralization: Optional[bool] = None, requireNeutralization: Optional[bool] = None,
billingFeatureCode: Optional[str] = None, billingFeatureCode: Optional[str] = None,
): ):
@ -715,6 +718,9 @@ async def _runWorkspaceAgent(
logger.info(f"Workspace agent: allowedProviders={allowedProviders}") logger.info(f"Workspace agent: allowedProviders={allowedProviders}")
else: else:
logger.debug("Workspace agent: no allowedProviders in request") logger.debug("Workspace agent: no allowedProviders in request")
if allowedModels:
aiService.services.allowedModels = allowedModels
logger.info(f"Workspace agent: allowedModels={allowedModels}")
if requireNeutralization is not None: if requireNeutralization is not None:
ctx.requireNeutralization = requireNeutralization ctx.requireNeutralization = requireNeutralization
@ -2139,6 +2145,76 @@ async def updateGeneralSettings(
return await getGeneralSettings(request, instanceId, context) return await getGeneralSettings(request, instanceId, context)
# =========================================================================
# User-level AI settings (neutralisation, providers, models)
# =========================================================================
@router.get("/{instanceId}/user-settings")
@limiter.limit("120/minute")
async def getWorkspaceUserSettings(
request: Request,
instanceId: str = Path(...),
context: RequestContext = Depends(getRequestContext),
):
"""Get the current user's workspace AI settings (auto-creates with defaults if not exists)."""
_mandateId, _ = _validateInstanceAccess(instanceId, context)
wsInterface = _getWorkspaceInterface(context, instanceId)
userId = str(context.user.id)
settings = wsInterface.getWorkspaceUserSettings(userId)
if settings:
return JSONResponse({
"requireNeutralization": settings.requireNeutralization,
"allowedProviders": settings.allowedProviders,
"allowedModels": settings.allowedModels,
})
data = {
"userId": userId,
"mandateId": str(context.mandateId) if context.mandateId else "",
"featureInstanceId": instanceId,
}
created = wsInterface.saveWorkspaceUserSettings(data)
return JSONResponse({
"requireNeutralization": created.requireNeutralization,
"allowedProviders": created.allowedProviders,
"allowedModels": created.allowedModels,
})
@router.put("/{instanceId}/user-settings")
@limiter.limit("120/minute")
async def putWorkspaceUserSettings(
request: Request,
instanceId: str = Path(...),
body: dict = Body(...),
context: RequestContext = Depends(getRequestContext),
):
"""Save the current user's workspace AI settings."""
_mandateId, _ = _validateInstanceAccess(instanceId, context)
wsInterface = _getWorkspaceInterface(context, instanceId)
userId = str(context.user.id)
data = {
"userId": userId,
"mandateId": str(context.mandateId) if context.mandateId else "",
"featureInstanceId": instanceId,
}
if "requireNeutralization" in body:
data["requireNeutralization"] = bool(body["requireNeutralization"])
if "allowedProviders" in body:
data["allowedProviders"] = body["allowedProviders"]
if "allowedModels" in body:
data["allowedModels"] = body["allowedModels"]
saved = wsInterface.saveWorkspaceUserSettings(data)
return JSONResponse({
"requireNeutralization": saved.requireNeutralization,
"allowedProviders": saved.allowedProviders,
"allowedModels": saved.allowedModels,
})
# ========================================================================= # =========================================================================
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs) # RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
# ========================================================================= # =========================================================================

View file

@ -111,6 +111,19 @@ class AiObjects:
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1, processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
) )
allowedModels = getattr(options, 'allowedModels', None) if options else None
if allowedModels:
filteredModels = [m for m in availableModels if m.name in allowedModels]
if filteredModels:
availableModels = filteredModels
else:
errorMsg = f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}"
logger.error(errorMsg)
return AiCallResponse(
content=errorMsg, modelName="error", priceCHF=0.0,
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
)
failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels) failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
if not failoverModelList: if not failoverModelList:
@ -364,6 +377,19 @@ class AiObjects:
) )
return return
allowedModels = getattr(options, 'allowedModels', None) if options else None
if allowedModels:
filtered = [m for m in availableModels if m.name in allowedModels]
if filtered:
availableModels = filtered
else:
yield AiCallResponse(
content=f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}",
modelName="error", priceCHF=0.0, processingTime=0.0,
bytesSent=0, bytesReceived=0, errorCount=1,
)
return
failoverModelList = modelSelector.getFailoverModelList( failoverModelList = modelSelector.getFailoverModelList(
request.prompt, request.context or "", options, availableModels request.prompt, request.context or "", options, availableModels
) )
@ -516,6 +542,14 @@ class AiObjects:
else: else:
logger.warning(f"No embedding models match allowedProviders {allowedProviders}") logger.warning(f"No embedding models match allowedProviders {allowedProviders}")
allowedModels = getattr(options, 'allowedModels', None) if options else None
if allowedModels:
filtered = [m for m in availableModels if m.name in allowedModels]
if filtered:
availableModels = filtered
else:
logger.warning(f"No embedding models match allowedModels {allowedModels}")
failoverModelList = modelSelector.getFailoverModelList( failoverModelList = modelSelector.getFailoverModelList(
combinedText, "", options, availableModels combinedText, "", options, availableModels
) )

View file

@ -25,142 +25,11 @@ def _registerMediaTools(registry: ToolRegistry, services):
# ---- Document rendering tool ---- # ---- Document rendering tool ----
def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
"""Convert markdown content to the standard document JSON format expected by renderers.""" """Delegate to the consolidated parser in subDocumentUtility."""
import re as _re from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
result = markdownToDocumentJson(markdown, title, language)
sections = [] result["metadata"]["extraction_method"] = "agent_rendering"
order = 0 return result
lines = markdown.split("\n")
i = 0
def _nextId():
nonlocal order
order += 1
return f"s_{order}"
while i < len(lines):
line = lines[i]
# --- Headings ---
headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
if headingMatch:
level = len(headingMatch.group(1))
text = headingMatch.group(2).strip()
sections.append({
"id": _nextId(), "content_type": "heading", "order": order,
"elements": [{"content": {"text": text, "level": level}}],
})
i += 1
continue
# --- Fenced code blocks ---
codeMatch = _re.match(r'^```(\w*)', line)
if codeMatch:
lang = codeMatch.group(1) or "text"
codeLines = []
i += 1
while i < len(lines) and not lines[i].startswith("```"):
codeLines.append(lines[i])
i += 1
i += 1
sections.append({
"id": _nextId(), "content_type": "code_block", "order": order,
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
})
continue
# --- Tables ---
tableMatch = _re.match(r'^\|(.+)\|$', line)
if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
i += 2
rows = []
while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
rows.append(rowCells)
i += 1
sections.append({
"id": _nextId(), "content_type": "table", "order": order,
"elements": [{"content": {"headers": headerCells, "rows": rows}}],
})
continue
# --- Bullet / numbered lists ---
listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
if listMatch:
isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
items = []
while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
items.append({"text": m.group(3).strip()})
i += 1
sections.append({
"id": _nextId(), "content_type": "bullet_list", "order": order,
"elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
})
continue
# --- Empty lines (skip) ---
if not line.strip():
i += 1
continue
# --- Images: ![alt](file:fileId) or ![alt](url) ---
imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
if imgMatch:
altText = imgMatch.group(1).strip() or "Image"
src = imgMatch.group(2).strip()
fileId = ""
if src.startswith("file:"):
fileId = src[5:]
sections.append({
"id": _nextId(), "content_type": "image", "order": order,
"elements": [{
"content": {
"altText": altText,
"base64Data": "",
"_fileRef": fileId,
"_srcUrl": src if not fileId else "",
}
}],
})
i += 1
continue
# --- Paragraph (collect consecutive non-empty lines) ---
paraLines = []
while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
paraLines.append(lines[i])
i += 1
if paraLines:
sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": " ".join(paraLines)}}],
})
continue
i += 1
if not sections:
sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
})
return {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "agent_rendering",
"title": title,
"language": language,
},
"documents": [{
"id": "doc_1",
"title": title,
"sections": sections,
}],
}
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]): async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
"""Render agent-produced markdown content into any document format via the RendererRegistry.""" """Render agent-produced markdown content into any document format via the RendererRegistry."""
@ -245,35 +114,75 @@ def _registerMediaTools(registry: ToolRegistry, services):
except Exception as e: except Exception as e:
logger.warning(f"renderDocument: knowledge service unavailable: {e}") logger.warning(f"renderDocument: knowledge service unavailable: {e}")
resolvedImages = 0 resolvedImages = 0
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
"""Resolve a single image reference dict to base64Data in-place."""
nonlocal resolvedImages
fileRef = targetObj.get(fileRefKey, "") or targetObj.get(fileIdKey, "")
if not fileRef or targetObj.get("base64Data"):
return
if knowledgeService:
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
if imageChunks:
targetObj["base64Data"] = imageChunks[0].get("data", "")
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
targetObj["mimeType"] = chunkMime
resolvedImages += 1
if not targetObj.get("base64Data"):
try:
rawBytes = services.chat.getFileData(fileRef)
if rawBytes:
import base64 as _b64
targetObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
targetObj["mimeType"] = "image/png"
resolvedImages += 1
except Exception as e:
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}")
targetObj.pop("_fileRef", None)
targetObj.pop("_srcUrl", None)
def _resolveInlineRuns(runsList):
"""Scan a list of inline runs and resolve any image runs with fileId."""
for run in runsList:
if run.get("type") == "image" and run.get("fileId") and not run.get("base64Data"):
_resolveImageRef(run, fileRefKey="fileId", fileIdKey="fileId")
for doc in structuredContent.get("documents", []): for doc in structuredContent.get("documents", []):
for section in doc.get("sections", []): for section in doc.get("sections", []):
if section.get("content_type") != "image": cType = section.get("content_type")
# Block-level image sections
if cType == "image":
for element in section.get("elements", []):
contentObj = element.get("content", {})
_resolveImageRef(contentObj)
continue continue
for element in section.get("elements", []): # Paragraphs with inlineRuns
contentObj = element.get("content", {}) if cType == "paragraph":
fileRef = contentObj.get("_fileRef", "") for element in section.get("elements", []):
if not fileRef or contentObj.get("base64Data"): runs = element.get("content", {}).get("inlineRuns")
continue if runs:
if knowledgeService: _resolveInlineRuns(runs)
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef) continue
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"] # Bullet lists - items are List[List[InlineRun]]
if imageChunks: if cType == "bullet_list":
contentObj["base64Data"] = imageChunks[0].get("data", "") for element in section.get("elements", []):
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png") items = element.get("content", {}).get("items", [])
contentObj["mimeType"] = chunkMime for item in items:
resolvedImages += 1 if isinstance(item, list):
if not contentObj.get("base64Data"): _resolveInlineRuns(item)
try: continue
rawBytes = services.chat.getFileData(fileRef) # Tables - headers and row cells are List[InlineRun]
if rawBytes: if cType == "table":
import base64 as _b64 for element in section.get("elements", []):
contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii") contentObj = element.get("content", {})
contentObj["mimeType"] = "image/png" for cell in contentObj.get("headers", []):
resolvedImages += 1 if isinstance(cell, list):
except Exception as e: _resolveInlineRuns(cell)
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}") for row in contentObj.get("rows", []):
contentObj.pop("_fileRef", None) for cell in row:
contentObj.pop("_srcUrl", None) if isinstance(cell, list):
_resolveInlineRuns(cell)
sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", [])) sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}") logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
@ -285,6 +194,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
language=language, language=language,
title=title, title=title,
userPrompt=content, userPrompt=content,
style=args.get("style"),
) )
if not documents: if not documents:
@ -367,6 +277,20 @@ def _registerMediaTools(registry: ToolRegistry, services):
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"}, "outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
"title": {"type": "string", "description": "Document title", "default": "Document"}, "title": {"type": "string", "description": "Document title", "default": "Document"},
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"}, "language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
"style": {
"type": "object",
"description": (
"Optional style overrides for the rendered document. Supports nested keys: "
"fonts (primary, monospace), colors (primary, secondary, accent, background), "
"headings (h1-h4 with sizePt, weight, color, spaceBeforePt, spaceAfterPt), "
"paragraph (sizePt, lineSpacing, color), table (headerBg, headerFg, headerSizePt, "
"bodySizePt, rowBandingEven, rowBandingOdd, borderColor, borderWidthPt), "
"list (bulletChar, indentPt, sizePt), image (defaultWidthPt, maxWidthPt, alignment), "
"codeBlock (fontSizePt, background, borderColor), "
"page (format, marginsPt, showPageNumbers, headerHeight, footerHeight, headerLogo, headerText, footerText). "
"Only provided keys override defaults; omitted keys keep their default values."
),
},
}, },
}, },
readOnly=False, readOnly=False,

View file

@ -86,7 +86,7 @@ class _ServicesAdapter:
return getattr(w, "featureCode", None) if w else None return getattr(w, "featureCode", None) if w else None
def __getattr__(self, name: str): def __getattr__(self, name: str):
if name in ("allowedProviders", "preferredProviders", "currentUserLanguage"): if name in ("allowedProviders", "allowedModels", "preferredProviders", "currentUserLanguage"):
return getattr(self.workflow, name, None) if self.workflow else None return getattr(self.workflow, name, None) if self.workflow else None
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
@ -177,6 +177,11 @@ class AiService:
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders}) request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}") logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}")
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
effectiveModels = self._calculateEffectiveModels(request)
if effectiveModels and request.options:
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
# Neutralize prompt if enabled (before AI call) # Neutralize prompt if enabled (before AI call)
_wasNeutralized = False _wasNeutralized = False
_excludedDocs: List[str] = [] _excludedDocs: List[str] = []
@ -225,6 +230,11 @@ class AiService:
if effectiveProviders and request.options: if effectiveProviders and request.options:
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders}) request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
effectiveModels = self._calculateEffectiveModels(request)
if effectiveModels and request.options:
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
# Neutralize prompt if enabled (before streaming) # Neutralize prompt if enabled (before streaming)
_wasNeutralized = False _wasNeutralized = False
_excludedDocs: List[str] = [] _excludedDocs: List[str] = []
@ -1240,6 +1250,43 @@ detectedIntent-Werte:
logger.warning(f"Error calculating effective providers: {e}") logger.warning(f"Error calculating effective providers: {e}")
return None return None
def _calculateEffectiveModels(self, request: AiCallRequest = None) -> Optional[List[str]]:
"""
Calculate effective allowed models: Workflow.allowedModels request.options.allowedModels.
AND-logic intersection:
- If workflow specifies allowedModels, start with those.
- If request (node-level) also specifies allowedModels, intersect.
- Returns None if no model filtering is needed.
"""
try:
effectiveModels = None
# Workflow-level allowedModels (from automation config)
workflowModels = getattr(self.services, 'allowedModels', None)
if workflowModels:
effectiveModels = list(workflowModels)
# Request-level (node-level) allowedModels
requestModels = None
if request and request.options and request.options.allowedModels:
requestModels = request.options.allowedModels
if requestModels:
if effectiveModels:
effectiveModels = [m for m in effectiveModels if m in requestModels]
else:
effectiveModels = list(requestModels)
if effectiveModels:
logger.debug(f"Model filter: Workflow={workflowModels}, Request={requestModels}, Effective={effectiveModels}")
return effectiveModels if effectiveModels else None
except Exception as e:
logger.warning(f"Error calculating effective models: {e}")
return None
async def ensureAiObjectsInitialized(self): async def ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized and submodules are ready.""" """Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None: if self.aiObjects is None:

View file

@ -14,6 +14,7 @@ from .subDocumentUtility import (
detectMimeTypeFromData, detectMimeTypeFromData,
convertDocumentDataToString convertDocumentDataToString
) )
from .styleDefaults import resolveStyle
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -382,7 +383,7 @@ class GenerationService:
'workflowId': 'unknown' 'workflowId': 'unknown'
} }
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]: async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]:
""" """
Render extracted JSON content to the specified output format. Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each. Processes EACH document separately and calls renderer for each.
@ -399,12 +400,14 @@ class GenerationService:
userPrompt: User's original prompt for report generation userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging parentOperationId: Optional parent operation ID for hierarchical logging
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
Returns: Returns:
List of RenderedDocument objects. List of RenderedDocument objects.
Each RenderedDocument represents one rendered file (main document or supporting file) Each RenderedDocument represents one rendered file (main document or supporting file)
""" """
try: try:
resolvedStyle = resolveStyle(style)
# Validate JSON input # Validate JSON input
if not isinstance(extractedContent, dict): if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary") raise ValueError("extractedContent must be a JSON dictionary")
@ -469,7 +472,7 @@ class GenerationService:
docTitle = doc.get("title", title) docTitle = doc.get("title", title)
# Render this document (can return multiple files, e.g., HTML + images) # Render this document (can return multiple files, e.g., HTML + images)
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService) renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService, style=resolvedStyle)
allRenderedDocuments.extend(renderedDocs) allRenderedDocuments.extend(renderedDocs)
logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)") logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")

View file

@ -84,7 +84,7 @@ class BaseRenderer(ABC):
return list(supportedSectionTypes) return list(supportedSectionTypes)
@abstractmethod @abstractmethod
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
""" """
Render extracted JSON content to multiple documents. Render extracted JSON content to multiple documents.
Each renderer must implement this method. Each renderer must implement this method.
@ -95,6 +95,9 @@ class BaseRenderer(ABC):
title: Report title title: Report title
userPrompt: Original user prompt for context userPrompt: Original user prompt for context
aiService: AI service instance for additional processing aiService: AI service instance for additional processing
style: Fully-resolved unified style dict from styleDefaults.resolveStyle().
When provided, renderers use these values instead of their
own defaults / AI-generated styles.
Returns: Returns:
List of RenderedDocument objects. List of RenderedDocument objects.
@ -102,6 +105,112 @@ class BaseRenderer(ABC):
Even if only one document is returned, it must be wrapped in a list. Even if only one document is returned, it must be wrapped in a list.
""" """
pass pass
def _convertUnifiedStyleToInternal(self, style: Dict[str, Any]) -> Dict[str, Any]:
"""Convert the unified resolvedStyle dict (from styleDefaults) into
the renderer-internal style-set format that all rendering methods already
consume. Override in subclasses for format-specific tweaks."""
h1 = style["headings"]["h1"]
h2 = style["headings"]["h2"]
h3 = style["headings"].get("h3", h2)
h4 = style["headings"].get("h4", h3)
tbl = style["table"]
para = style["paragraph"]
lst = style["list"]
cb = style["codeBlock"]
return {
"title": {
"font_size": h1["sizePt"], "color": h1["color"],
"bold": h1.get("weight") == "bold", "align": "left",
},
"heading1": {
"font_size": h1["sizePt"], "color": h1["color"],
"bold": h1.get("weight") == "bold", "align": "left",
},
"heading2": {
"font_size": h2["sizePt"], "color": h2["color"],
"bold": h2.get("weight") == "bold", "align": "left",
},
"heading3": {
"font_size": h3["sizePt"], "color": h3["color"],
"bold": h3.get("weight") == "bold", "align": "left",
},
"heading4": {
"font_size": h4["sizePt"], "color": h4["color"],
"bold": h4.get("weight") == "bold", "align": "left",
},
"paragraph": {
"font_size": para["sizePt"], "color": para["color"],
"bold": False, "align": "left",
},
"table_header": {
"background": tbl["headerBg"], "text_color": tbl["headerFg"],
"bold": True, "align": "center",
},
"table_cell": {
"background": tbl["rowBandingOdd"], "text_color": para["color"],
"bold": False, "align": "left",
},
"table_border": {
"style": "grid", "color": tbl["borderColor"],
},
"bullet_list": {
"font_size": lst["sizePt"], "color": para["color"],
"indent": lst["indentPt"],
},
"code_block": {
"font": style["fonts"]["monospace"],
"font_size": cb["fontSizePt"], "color": para["color"],
"background": cb["background"],
},
}
@staticmethod
def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any:
"""Extract inline runs from new-format content, falling back to old format.
For paragraphs (itemsKey=None):
new: content["inlineRuns"] -> List[InlineRun]
old: content["text"] -> wrapped in [{"type":"text","value":text}]
For list items (itemsKey="items"):
new: content["items"] is List[List[InlineRun]]
old: content["items"] is List[str] or List[{"text":}]
Returns the items list (caller decides per-item conversion).
For table headers/cells:
new: each header/cell is List[InlineRun]
old: each header/cell is a plain str
Caller handles per-cell.
"""
if itemsKey:
return content.get(itemsKey, [])
inlineRuns = content.get("inlineRuns")
if inlineRuns:
return inlineRuns
text = content.get("text", "")
if text:
return [{"type": "text", "value": text}]
return []
@staticmethod
def _inlineRunsForCell(cell) -> list:
"""Normalize a single table header or cell value to List[InlineRun].
Accepts either a plain string or an already-correct list of run dicts."""
if isinstance(cell, list):
return cell
return [{"type": "text", "value": str(cell) if cell is not None else ""}]
@staticmethod
def _inlineRunsForListItem(item) -> list:
"""Normalize a single list item to List[InlineRun].
Accepts a plain string, a dict with 'text', or an already-correct list of run dicts."""
if isinstance(item, list):
return item
if isinstance(item, dict):
text = item.get("text", "")
return [{"type": "text", "value": text}]
return [{"type": "text", "value": str(item)}]
def _determineFilename(self, title: str, mimeType: str) -> str: def _determineFilename(self, title: str, mimeType: str) -> str:
"""Determine filename from title and mimeType.""" """Determine filename from title and mimeType."""

View file

@ -53,18 +53,17 @@ class RendererDocx(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes) return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling.""" """Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try: try:
if not DOCX_AVAILABLE: if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml from .rendererHtml import RendererHtml
htmlRenderer = RendererHtml() htmlRenderer = RendererHtml()
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService) return await htmlRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
# Generate DOCX using AI-analyzed styling # Generate DOCX using AI-analyzed styling
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
# Extract metadata for document type and other info # Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {} metadata = extractedContent.get("metadata", {}) if extractedContent else {}
@ -114,23 +113,27 @@ class RendererDocx(BaseRenderer):
) )
] ]
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, unifiedStyle: Dict[str, Any] = None) -> str:
"""Generate DOCX content from structured JSON document.""" """Generate DOCX content from structured JSON document."""
import time import time
start_time = time.time() start_time = time.time()
try: try:
self.logger.debug("_generateDocxFromJson: Starting document generation") self.logger.debug("_generateDocxFromJson: Starting document generation")
# Create new document
doc = Document() doc = Document()
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s") self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
# Get style set: use styles from metadata if available, otherwise enhance with AI # Phase 3: prefer unified style when provided
template_from_metadata = None
if json_content and isinstance(json_content.get("metadata"), dict):
template_from_metadata = json_content["metadata"].get("templateName")
style_start = time.time() style_start = time.time()
self.logger.debug("_generateDocxFromJson: About to get style set") self.logger.debug("_generateDocxFromJson: About to get style set")
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata) if unifiedStyle:
styleSet = self._convertUnifiedStyleToInternal(unifiedStyle)
self._unifiedStyle = unifiedStyle
else:
template_from_metadata = None
if json_content and isinstance(json_content.get("metadata"), dict):
template_from_metadata = json_content["metadata"].get("templateName")
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
self._unifiedStyle = None
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s") self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
# Setup basic document styles and create all styles from style set # Setup basic document styles and create all styles from style set
@ -298,11 +301,11 @@ class RendererDocx(BaseRenderer):
def _setupBasicDocumentStyles(self, doc: Document) -> None: def _setupBasicDocumentStyles(self, doc: Document) -> None:
"""Set up basic document styles.""" """Set up basic document styles."""
try: try:
# Set default font
style = doc.styles['Normal'] style = doc.styles['Normal']
font = style.font font = style.font
font.name = 'Calibri' us = getattr(self, '_unifiedStyle', None)
font.size = Pt(11) font.name = us["fonts"]["primary"] if us else 'Calibri'
font.size = Pt(us["paragraph"]["sizePt"] if us else 11)
except Exception as e: except Exception as e:
self.logger.warning(f"Could not set up basic document styles: {str(e)}") self.logger.warning(f"Could not set up basic document styles: {str(e)}")
@ -421,6 +424,8 @@ class RendererDocx(BaseRenderer):
def _addMarkdownInlineRuns(self, paragraph, text: str) -> None: def _addMarkdownInlineRuns(self, paragraph, text: str) -> None:
"""Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph.""" """Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph."""
pos = 0 pos = 0
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier New"
for m in self._MD_INLINE_RE.finditer(text): for m in self._MD_INLINE_RE.finditer(text):
if m.start() > pos: if m.start() > pos:
paragraph.add_run(text[pos:m.start()]) paragraph.add_run(text[pos:m.start()])
@ -434,12 +439,45 @@ class RendererDocx(BaseRenderer):
paragraph.add_run(m.group(6)).italic = True paragraph.add_run(m.group(6)).italic = True
elif m.group(7): elif m.group(7):
run = paragraph.add_run(m.group(7)) run = paragraph.add_run(m.group(7))
run.font.name = "Courier New" run.font.name = monoFont
run.font.size = Pt(9) run.font.size = Pt(9)
pos = m.end() pos = m.end()
if pos < len(text): if pos < len(text):
paragraph.add_run(text[pos:]) paragraph.add_run(text[pos:])
def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None:
"""Process a list of InlineRun dicts into python-docx Runs on a paragraph."""
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier New"
for run in runs:
runType = run.get("type", "text")
value = run.get("value", "")
if runType == "text":
paragraph.add_run(value)
elif runType == "bold":
paragraph.add_run(value).bold = True
elif runType == "italic":
paragraph.add_run(value).italic = True
elif runType == "code":
r = paragraph.add_run(value)
r.font.name = monoFont
r.font.size = Pt(9)
elif runType == "link":
r = paragraph.add_run(value)
r.font.underline = True
r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9)
elif runType == "image":
b64 = run.get("base64Data", "")
if b64:
try:
imgBytes = base64.b64decode(b64)
imgStream = io.BytesIO(imgBytes)
paragraph.add_run().add_picture(imgStream, width=Inches(2))
except Exception:
paragraph.add_run(f"[Image: {run.get('altText', '')}]")
else:
paragraph.add_run(value)
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
""" """
Render a JSON table to DOCX using AI-generated styles. Render a JSON table to DOCX using AI-generated styles.
@ -485,7 +523,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True) self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None: def _renderTableFastXml(self, doc: Document, headers: list, rows: list, styles: Dict[str, Any]) -> None:
""" """
High-performance table rendering using direct XML manipulation. High-performance table rendering using direct XML manipulation.
@ -546,24 +584,34 @@ class RendererDocx(BaseRenderer):
# Build all rows using fast XML # Build all rows using fast XML
rows_start = time.time() rows_start = time.time()
# Header row # Resolve header style colors
headerRow = self._createTableRowXml(headers, isHeader=True) tableStyle = styles.get("table_header", {})
headerBg = tableStyle.get("background", "")
headerFg = tableStyle.get("text_color", "")
# Flatten inline-run headers to plain strings for fast XML path
flatHeaders = []
for h in headers:
runs = self._inlineRunsForCell(h)
flatHeaders.append("".join(r.get("value", "") for r in runs))
headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None)
tbl.append(headerRow) tbl.append(headerRow)
header_time = time.time() - rows_start header_time = time.time() - rows_start
self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s") self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
# Data rows - batch process for performance
data_start = time.time() data_start = time.time()
rowCount = len(rows) rowCount = len(rows)
for idx, rowData in enumerate(rows): for idx, rowData in enumerate(rows):
# Convert all cells to strings cellTexts = []
cellTexts = [str(cell) if cell is not None else '' for cell in rowData] for cell in rowData:
# Pad if needed runs = self._inlineRunsForCell(cell)
while len(cellTexts) < len(headers): cellTexts.append("".join(r.get("value", "") for r in runs))
while len(cellTexts) < len(flatHeaders):
cellTexts.append('') cellTexts.append('')
row = self._createTableRowXml(cellTexts, isHeader=False) row = self._createTableRowXml(cellTexts, isHeader=False)
tbl.append(row) tbl.append(row)
@ -641,74 +689,64 @@ class RendererDocx(BaseRenderer):
return tblBorders return tblBorders
def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any: def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any:
""" """Create a table row XML element with cells.
Create a table row XML element with cells. Fast-path: builds row XML directly via lxml."""
This is the core fast-path: builds the row XML directly without
going through python-docx's slow cell.text assignment.
"""
from docx.oxml.shared import OxmlElement, qn from docx.oxml.shared import OxmlElement, qn
if headerBgHex is None:
us = getattr(self, '_unifiedStyle', None)
headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864'
else:
headerBgHex = headerBgHex.lstrip('#')
if headerFgHex is None:
us = getattr(self, '_unifiedStyle', None)
headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF'
else:
headerFgHex = headerFgHex.lstrip('#')
tr = OxmlElement('w:tr') tr = OxmlElement('w:tr')
# Row properties for header
if isHeader: if isHeader:
trPr = OxmlElement('w:trPr') trPr = OxmlElement('w:trPr')
tblHeader = OxmlElement('w:tblHeader') trPr.append(OxmlElement('w:tblHeader'))
trPr.append(tblHeader)
tr.append(trPr) tr.append(trPr)
for cellText in cells: for cellText in cells:
# Create cell
tc = OxmlElement('w:tc') tc = OxmlElement('w:tc')
# Cell properties
tcPr = OxmlElement('w:tcPr') tcPr = OxmlElement('w:tcPr')
tcW = OxmlElement('w:tcW') tcW = OxmlElement('w:tcW')
tcW.set(qn('w:type'), 'auto') tcW.set(qn('w:type'), 'auto')
tcW.set(qn('w:w'), '0') tcW.set(qn('w:w'), '0')
tcPr.append(tcW) tcPr.append(tcW)
# Header cell styling - light blue background
if isHeader: if isHeader:
shd = OxmlElement('w:shd') shd = OxmlElement('w:shd')
shd.set(qn('w:val'), 'clear') shd.set(qn('w:val'), 'clear')
shd.set(qn('w:color'), 'auto') shd.set(qn('w:color'), 'auto')
shd.set(qn('w:fill'), '4472C4') # Professional blue shd.set(qn('w:fill'), headerBgHex)
tcPr.append(shd) tcPr.append(shd)
tc.append(tcPr) tc.append(tcPr)
# Paragraph with text
p = OxmlElement('w:p') p = OxmlElement('w:p')
# Add run with text
r = OxmlElement('w:r') r = OxmlElement('w:r')
# Header text styling - bold and white
if isHeader: if isHeader:
rPr = OxmlElement('w:rPr') rPr = OxmlElement('w:rPr')
b = OxmlElement('w:b') rPr.append(OxmlElement('w:b'))
rPr.append(b)
# White text color
color = OxmlElement('w:color') color = OxmlElement('w:color')
color.set(qn('w:val'), 'FFFFFF') color.set(qn('w:val'), headerFgHex)
rPr.append(color) rPr.append(color)
r.append(rPr) r.append(rPr)
# Text element
t = OxmlElement('w:t') t = OxmlElement('w:t')
# Preserve spaces if text starts/ends with whitespace
if cellText and (cellText[0] == ' ' or cellText[-1] == ' '): if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
t.text = cellText t.text = cellText
r.append(t) r.append(t)
p.append(r) p.append(r)
tc.append(p) tc.append(p)
tr.append(tc) tr.append(tc)
return tr return tr
def _applyHorizontalBordersOnly(self, table) -> None: def _applyHorizontalBordersOnly(self, table) -> None:
@ -836,47 +874,37 @@ class RendererDocx(BaseRenderer):
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance.""" """Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
try: try:
# Extract from nested content structure
content = list_data.get("content", {}) content = list_data.get("content", {})
if not isinstance(content, dict): if not isinstance(content, dict):
return return
items = content.get("items", []) items = content.get("items", [])
bullet_style = styles.get("bullet_list", {}) bullet_style = styles.get("bullet_list", {})
# Pre-calculate and cache style objects to avoid repeated parsing font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None
font_size_pt = None
text_color_rgb = None text_color_rgb = None
if bullet_style: if bullet_style.get("color"):
if "font_size" in bullet_style: color_hex = bullet_style["color"].lstrip('#')
font_size_pt = Pt(bullet_style["font_size"]) text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
if "color" in bullet_style:
color_hex = bullet_style["color"].lstrip('#')
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
for item in items: for item in items:
itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "") itemRuns = self._inlineRunsForListItem(item)
if not itemText: if not itemRuns or not any(r.get("value") for r in itemRuns):
continue continue
para = doc.add_paragraph(style='List Bullet') para = doc.add_paragraph(style='List Bullet')
self._addMarkdownInlineRuns(para, itemText) isNewRunFormat = isinstance(item, list)
if isNewRunFormat:
# Apply bullet list styling from style set - use cached objects self._renderInlineRuns(itemRuns, para, styles)
if bullet_style and para.runs: else:
# Use direct access instead of iterating itemText = "".join(r.get("value", "") for r in itemRuns)
if len(para.runs) > 0: self._addMarkdownInlineRuns(para, itemText)
run = para.runs[0]
if font_size_pt: if bullet_style and para.runs and len(para.runs) > 0:
run.font.size = font_size_pt run = para.runs[0]
if text_color_rgb: if font_size_pt:
run.font.color.rgb = text_color_rgb run.font.size = font_size_pt
else: if text_color_rgb:
# Create run if none exists run.font.color.rgb = text_color_rgb
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
if text_color_rgb:
run.font.color.rgb = text_color_rgb
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
@ -905,90 +933,79 @@ class RendererDocx(BaseRenderer):
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON paragraph to DOCX using AI-generated styles.""" """Render a JSON paragraph to DOCX using AI-generated styles."""
try: try:
# Extract from nested content structure
content = paragraph_data.get("content", {}) content = paragraph_data.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
text = content.get("text", "") inlineRuns = self._inlineRunsFromContent(content)
elif isinstance(content, str): elif isinstance(content, str):
text = content inlineRuns = [{"type": "text", "value": content}]
else: else:
text = "" inlineRuns = []
# CRITICAL: Prevent rendering base64 image data as text if not inlineRuns:
# Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG) return
if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or
(len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))): plainText = "".join(r.get("value", "") for r in inlineRuns)
# This looks like base64 data - don't render as text if plainText and (plainText.startswith("/9j/") or plainText.startswith("iVBORw0KGgo") or
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})") (len(plainText) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in plainText[:100]))):
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(plainText)})")
para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]") para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
if para.runs: if para.runs:
para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error para.runs[0].font.color.rgb = RGBColor(255, 0, 0)
return return
if text: para = doc.add_paragraph()
para = doc.add_paragraph() hasNewRuns = content.get("inlineRuns") if isinstance(content, dict) else None
self._addMarkdownInlineRuns(para, text) if hasNewRuns:
paragraph_style = styles.get("paragraph", {}) self._renderInlineRuns(inlineRuns, para, styles)
if paragraph_style: else:
# Pre-calculate and cache style objects self._addMarkdownInlineRuns(para, plainText)
font_size_pt = None
text_color_rgb = None paragraph_style = styles.get("paragraph", {})
if "font_size" in paragraph_style: if paragraph_style:
font_size_pt = Pt(paragraph_style["font_size"]) font_size_pt = Pt(paragraph_style["font_size"]) if "font_size" in paragraph_style else None
if "color" in paragraph_style: text_color_rgb = None
color_hex = paragraph_style["color"].lstrip('#') if "color" in paragraph_style:
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)) color_hex = paragraph_style["color"].lstrip('#')
bold = paragraph_style.get("bold", False) text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
bold = paragraph_style.get("bold", False)
# Use direct access instead of iterating if len(para.runs) > 0:
if len(para.runs) > 0: run = para.runs[0]
run = para.runs[0] if font_size_pt:
if font_size_pt: run.font.size = font_size_pt
run.font.size = font_size_pt run.font.bold = bold
run.font.bold = bold if text_color_rgb:
if text_color_rgb: run.font.color.rgb = text_color_rgb
run.font.color.rgb = text_color_rgb if "align" in paragraph_style:
align = paragraph_style["align"]
if align == "center":
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
elif align == "right":
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
else: else:
# Create run if none exists para.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = para.add_run()
if font_size_pt:
run.font.size = font_size_pt
run.font.bold = bold
if text_color_rgb:
run.font.color.rgb = text_color_rgb
if "align" in paragraph_style:
align = paragraph_style["align"]
if align == "center":
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
elif align == "right":
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
else:
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON code block to DOCX using AI-generated styles.""" """Render a JSON code block to DOCX using AI-generated styles."""
try: try:
# Extract from nested content structure
content = code_data.get("content", {}) content = code_data.get("content", {})
if not isinstance(content, dict): if not isinstance(content, dict):
return return
code = content.get("code", "") code = content.get("code", "")
language = content.get("language", "") language = content.get("language", "")
code_style = styles.get("code_block", {}) code_style = styles.get("code_block", {})
us = getattr(self, '_unifiedStyle', None)
if code: if code:
if language: if language:
lang_para = doc.add_paragraph(f"Code ({language}):") lang_para = doc.add_paragraph(f"Code ({language}):")
if len(lang_para.runs) > 0: if len(lang_para.runs) > 0:
lang_para.runs[0].bold = True lang_para.runs[0].bold = True
# Pre-calculate and cache style objects code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New")
code_font_name = code_style.get("font", "Courier New") code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9))
code_font_size_pt = Pt(code_style.get("font_size", 9))
code_text_color_rgb = None code_text_color_rgb = None
if "color" in code_style: if "color" in code_style:
color_hex = code_style["color"].lstrip('#') color_hex = code_style["color"].lstrip('#')

View file

@ -40,7 +40,7 @@ class RendererHtml(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes) return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
""" """
Render HTML document with images as separate files. Render HTML document with images as separate files.
Returns list of documents: [HTML document, image1, image2, ...] Returns list of documents: [HTML document, image1, image2, ...]
@ -54,7 +54,7 @@ class RendererHtml(BaseRenderer):
self._renderedImages = images self._renderedImages = images
# Generate HTML using AI-analyzed styling # Generate HTML using AI-analyzed styling
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService, style=style)
# Replace base64 data URIs with relative file paths if images exist # Replace base64 data URIs with relative file paths if images exist
if images: if images:
@ -107,11 +107,16 @@ class RendererHtml(BaseRenderer):
return resultDocuments return resultDocuments
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling.""" """Generate HTML content from structured JSON document using AI-generated styling."""
try: try:
# Get style set: use styles from metadata if available, otherwise enhance with AI # Use unified style when provided, otherwise fall back to existing flow
styles = await self._getStyleSet(jsonContent, userPrompt, aiService) if style:
styles = self._convertUnifiedStyleToInternal(style)
self._unifiedStyle = style
else:
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
self._unifiedStyle = None
# Validate JSON structure # Validate JSON structure
if not self._validateJsonStructure(jsonContent): if not self._validateJsonStructure(jsonContent):
@ -272,6 +277,10 @@ class RendererHtml(BaseRenderer):
def _generateCssStyles(self, styles: Dict[str, Any]) -> str: def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions.""" """Generate CSS from style definitions."""
# When unified style is available, generate CSS directly from it
if getattr(self, "_unifiedStyle", None):
return self._generateCssFromUnifiedStyle(self._unifiedStyle)
css_parts = [] css_parts = []
# Body styles # Body styles
@ -368,6 +377,164 @@ class RendererHtml(BaseRenderer):
return '\n'.join(css_parts) return '\n'.join(css_parts)
def _generateCssFromUnifiedStyle(self, style: Dict[str, Any]) -> str:
"""Generate CSS directly from unified style dict."""
fonts = style.get("fonts", {})
colors = style.get("colors", {})
headings = style.get("headings", {})
para = style.get("paragraph", {})
tbl = style.get("table", {})
lst = style.get("list", {})
cb = style.get("codeBlock", {})
page = style.get("page", {})
primaryFont = fonts.get("primary", "Arial, sans-serif")
monoFont = fonts.get("monospace", "Courier New, monospace")
bgColor = colors.get("background", "#FFFFFF")
primaryColor = colors.get("primary", "#1F3864")
paraColor = para.get("color", "#333333")
paraSizePt = para.get("sizePt", 11)
lineSpacing = para.get("lineSpacing", 1.15)
css_parts = []
# Body
css_parts.append("body {")
css_parts.append(f" font-family: {primaryFont};")
css_parts.append(f" background: {bgColor};")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" font-size: {paraSizePt}pt;")
css_parts.append(f" line-height: {lineSpacing};")
margins = page.get("marginsPt", {})
if margins:
css_parts.append(f" margin: {margins.get('top', 60)}pt {margins.get('right', 60)}pt {margins.get('bottom', 60)}pt {margins.get('left', 60)}pt;")
else:
css_parts.append(" margin: 0; padding: 20px;")
css_parts.append("}")
# Document title (uses h1 style)
h1 = headings.get("h1", {})
css_parts.append(".document-title {")
css_parts.append(f" font-size: {h1.get('sizePt', 24)}pt;")
css_parts.append(f" color: {h1.get('color', primaryColor)};")
css_parts.append(f" font-weight: {h1.get('weight', 'bold')};")
css_parts.append(" margin: 0 0 1em 0;")
css_parts.append("}")
# Headings h1-h4
for level in range(1, 5):
key = f"h{level}"
h = headings.get(key, h1 if level == 1 else headings.get(f"h{level-1}", {}))
css_parts.append(f"h{level} {{")
css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;")
css_parts.append(f" color: {h.get('color', primaryColor)};")
css_parts.append(f" font-weight: {h.get('weight', 'bold')};")
css_parts.append(f" margin: 1.2em 0 0.4em 0;")
css_parts.append("}")
# Paragraphs
css_parts.append("p {")
css_parts.append(f" font-size: {paraSizePt}pt;")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" line-height: {lineSpacing};")
css_parts.append(" margin: 0 0 1em 0;")
css_parts.append("}")
# Tables
borderColor = tbl.get("borderColor", "#DEE2E6")
css_parts.append("table {")
css_parts.append(f" border-collapse: collapse;")
css_parts.append(f" width: 100%;")
css_parts.append(f" margin: 1em 0;")
css_parts.append(f" border: 1px solid {borderColor};")
css_parts.append("}")
# Table headers
css_parts.append("th {")
css_parts.append(f" background: {tbl.get('headerBg', '#1F3864')};")
css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};")
css_parts.append(" font-weight: bold;")
css_parts.append(" text-align: center;")
css_parts.append(f" padding: 10px;")
css_parts.append(f" border: 1px solid {borderColor};")
css_parts.append("}")
# Table cells
css_parts.append("td {")
css_parts.append(f" color: {paraColor};")
css_parts.append(" padding: 8px;")
css_parts.append(f" border: 1px solid {borderColor};")
css_parts.append("}")
# Lists
css_parts.append("ul {")
css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" padding-left: {lst.get('indentPt', 18)}pt;")
css_parts.append(" margin: 0 0 1em 0;")
css_parts.append("}")
# Code blocks
css_parts.append("pre {")
css_parts.append(f" font-family: {monoFont};")
css_parts.append(f" font-size: {cb.get('fontSizePt', 9)}pt;")
css_parts.append(f" color: {paraColor};")
css_parts.append(f" background: {cb.get('background', '#F8F9FA')};")
css_parts.append(f" border: 1px solid {cb.get('borderColor', '#E2E8F0')};")
css_parts.append(" border-radius: 4px;")
css_parts.append(" padding: 1em;")
css_parts.append(" margin: 1em 0;")
css_parts.append(" overflow-x: auto;")
css_parts.append("}")
# Images
css_parts.append("img {")
css_parts.append(" max-width: 100%;")
css_parts.append(" height: auto;")
css_parts.append(" margin: 1em 0;")
css_parts.append(" border-radius: 4px;")
css_parts.append("}")
# Generated info
css_parts.append(".generated-info {")
css_parts.append(" font-size: 0.9em;")
css_parts.append(" color: #666;")
css_parts.append(" text-align: center;")
css_parts.append(" margin-top: 2em;")
css_parts.append(" padding-top: 1em;")
css_parts.append(" border-top: 1px solid #ddd;")
css_parts.append("}")
return '\n'.join(css_parts)
def _renderInlineRuns(self, runs: list) -> str:
"""Convert inline runs to HTML markup."""
import html as htmlLib
parts = []
for run in runs:
runType = run.get("type", "text")
value = htmlLib.escape(run.get("value", ""))
if runType == "text":
parts.append(value)
elif runType == "bold":
parts.append(f"<strong>{value}</strong>")
elif runType == "italic":
parts.append(f"<em>{value}</em>")
elif runType == "code":
parts.append(f"<code>{value}</code>")
elif runType == "link":
href = htmlLib.escape(run.get("href", ""))
parts.append(f'<a href="{href}">{value}</a>')
elif runType == "image":
b64 = run.get("base64Data", "")
mime = run.get("mimeType", "image/png")
alt = value
if b64:
parts.append(f'<img src="data:{mime};base64,{b64}" alt="{alt}" style="max-width:100%;height:auto;">')
else:
parts.append(value)
return "".join(parts)
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles. """Render a single JSON section to HTML using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text. Supports three content formats: reference, object (base64), extracted_text.
@ -419,6 +586,11 @@ class RendererHtml(BaseRenderer):
# Regular paragraph element - extract from nested content structure (standard JSON format) # Regular paragraph element - extract from nested content structure (standard JSON format)
content = element.get("content", {}) content = element.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
# New format: inlineRuns
inlineRuns = content.get("inlineRuns")
if inlineRuns and isinstance(inlineRuns, list):
htmlParts.append(f'<p>{self._renderInlineRuns(inlineRuns)}</p>')
continue
text = content.get("text", "") text = content.get("text", "")
elif isinstance(content, str): elif isinstance(content, str):
text = content text = content
@ -495,7 +667,8 @@ class RendererHtml(BaseRenderer):
# Table header # Table header
htmlParts.append('<thead><tr>') htmlParts.append('<thead><tr>')
for header in headers: for header in headers:
htmlParts.append(f'<th>{header}</th>') runs = self._inlineRunsForCell(header)
htmlParts.append(f'<th>{self._renderInlineRuns(runs)}</th>')
htmlParts.append('</tr></thead>') htmlParts.append('</tr></thead>')
# Table body # Table body
@ -503,7 +676,8 @@ class RendererHtml(BaseRenderer):
for row in rows: for row in rows:
htmlParts.append('<tr>') htmlParts.append('<tr>')
for cellData in row: for cellData in row:
htmlParts.append(f'<td>{cellData}</td>') runs = self._inlineRunsForCell(cellData)
htmlParts.append(f'<td>{self._renderInlineRuns(runs)}</td>')
htmlParts.append('</tr>') htmlParts.append('</tr>')
htmlParts.append('</tbody>') htmlParts.append('</tbody>')
@ -528,10 +702,8 @@ class RendererHtml(BaseRenderer):
htmlParts = ['<ul>'] htmlParts = ['<ul>']
for item in items: for item in items:
if isinstance(item, str): runs = self._inlineRunsForListItem(item)
htmlParts.append(f'<li>{item}</li>') htmlParts.append(f'<li>{self._renderInlineRuns(runs)}</li>')
elif isinstance(item, dict) and "text" in item:
htmlParts.append(f'<li>{item["text"]}</li>')
htmlParts.append('</ul>') htmlParts.append('</ul>')
return '\n'.join(htmlParts) return '\n'.join(htmlParts)
@ -571,6 +743,11 @@ class RendererHtml(BaseRenderer):
if isinstance(el, dict): if isinstance(el, dict):
content = el.get("content", {}) content = el.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
# New format: inlineRuns
inlineRuns = content.get("inlineRuns")
if inlineRuns and isinstance(inlineRuns, list):
texts.append(self._renderInlineRuns(inlineRuns))
continue
text = content.get("text", "") text = content.get("text", "")
elif isinstance(content, str): elif isinstance(content, str):
text = content text = content
@ -581,16 +758,18 @@ class RendererHtml(BaseRenderer):
elif isinstance(el, str): elif isinstance(el, str):
texts.append(el) texts.append(el)
if texts: if texts:
# Join multiple paragraphs with <p> tags
return '\n'.join(f'<p>{text}</p>' for text in texts) return '\n'.join(f'<p>{text}</p>' for text in texts)
return "" return ""
elif isinstance(paragraphData, str): elif isinstance(paragraphData, str):
return f'<p>{paragraphData}</p>' return f'<p>{paragraphData}</p>'
elif isinstance(paragraphData, dict): elif isinstance(paragraphData, dict):
# Handle nested content structure: element.content vs element.text
# Extract from nested content structure # Extract from nested content structure
content = paragraphData.get("content", {}) content = paragraphData.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
# New format: inlineRuns
inlineRuns = content.get("inlineRuns")
if inlineRuns and isinstance(inlineRuns, list):
return f'<p>{self._renderInlineRuns(inlineRuns)}</p>'
text = content.get("text", "") text = content.get("text", "")
elif isinstance(content, str): elif isinstance(content, str):
text = content text = content

View file

@ -106,17 +106,17 @@ class RendererPdf(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes) return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling.""" """Render extracted JSON content to PDF format using AI-analyzed styling."""
try: try:
if not REPORTLAB_AVAILABLE: if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available # Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml from .rendererHtml import RendererHtml
html_renderer = RendererHtml() html_renderer = RendererHtml()
return await html_renderer.render(extractedContent, title, userPrompt, aiService) return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style)
# Generate PDF using AI-analyzed styling # Generate PDF using AI-analyzed styling
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
# Extract metadata for document type and other info # Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {} metadata = extractedContent.get("metadata", {}) if extractedContent else {}
@ -163,11 +163,28 @@ class RendererPdf(BaseRenderer):
) )
] ]
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling.""" """Generate PDF content from structured JSON document using AI-generated styling."""
try: try:
# Get style set: use styles from metadata if available, otherwise enhance with AI # Get style set from unified style or legacy approach
styles = await self._getStyleSet(json_content, userPrompt, aiService) if unifiedStyle:
styles = self._convertUnifiedStyleToInternal(unifiedStyle)
self._unifiedStyle = unifiedStyle
for level in range(1, 7):
hKey = f"heading{level}"
if hKey not in styles:
styles[hKey] = self._defaultHeadingStyleDef(level)
else:
styles[hKey].setdefault("space_after", 12)
styles[hKey].setdefault("space_before", 12)
styles["paragraph"].setdefault("space_after", 6)
styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2))
styles["bullet_list"].setdefault("space_after", 3)
styles["code_block"].setdefault("space_after", 6)
styles["code_block"].setdefault("align", "left")
else:
styles = await self._getStyleSet(json_content, userPrompt, aiService)
self._unifiedStyle = None
# Validate JSON structure # Validate JSON structure
if not self._validateJsonStructure(json_content): if not self._validateJsonStructure(json_content):
@ -179,15 +196,13 @@ class RendererPdf(BaseRenderer):
# Create a buffer to hold the PDF # Create a buffer to hold the PDF
buffer = io.BytesIO() buffer = io.BytesIO()
# Create PDF document # Create PDF document with unified page margins or defaults
doc = SimpleDocTemplate( pageCfg = unifiedStyle["page"] if unifiedStyle else None
buffer, if pageCfg:
pagesize=A4, m = pageCfg["marginsPt"]
rightMargin=72, doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"])
leftMargin=72, else:
topMargin=72, doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
bottomMargin=18
)
# Build PDF content (no cover page — body starts on page 1; filename still uses `title`) # Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
story = [] story = []
@ -609,6 +624,31 @@ class RendererPdf(BaseRenderer):
.replace(">", "&gt;") .replace(">", "&gt;")
) )
def _renderInlineRunsToPdfXml(self, runs: list) -> str:
"""Convert inline runs to ReportLab Paragraph XML."""
parts = []
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier"
for run in runs:
runType = run.get("type", "text")
value = self._escapeReportlabXml(run.get("value", ""))
if runType == "text":
parts.append(value)
elif runType == "bold":
parts.append(f"<b>{value}</b>")
elif runType == "italic":
parts.append(f"<i>{value}</i>")
elif runType == "code":
parts.append(f'<font name="{monoFont}">{value}</font>')
elif runType == "link":
href = self._escapeReportlabXml(run.get("href", ""))
parts.append(f'<a href="{href}">{value}</a>')
elif runType == "image":
parts.append(f"[Image: {value}]")
else:
parts.append(value)
return "".join(parts)
def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str: def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
"""Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately).""" """Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
if not text: if not text:
@ -744,10 +784,10 @@ class RendererPdf(BaseRenderer):
return [] return []
headers = content.get("headers", []) headers = content.get("headers", [])
rows = content.get("rows", []) rows = content.get("rows", [])
if not headers or not rows: if not headers or not rows:
return [] return []
numCols = len(headers) numCols = len(headers)
colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1) colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
colWidths = [colWidth] * numCols colWidths = [colWidth] * numCols
@ -755,8 +795,12 @@ class RendererPdf(BaseRenderer):
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header") hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell") cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
def _cellPara(val, ps): def _cellPara(cell, ps):
return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps) runs = self._inlineRunsForCell(cell)
if isinstance(cell, list):
xml = self._renderInlineRunsToPdfXml(runs)
return Paragraph(_wrapEmojiSpansInXml(xml), ps)
return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps)
headerRow = [_cellPara(h, hdrPs) for h in headers] headerRow = [_cellPara(h, hdrPs) for h in headers]
bodyRows = [] bodyRows = []
@ -786,7 +830,7 @@ class RendererPdf(BaseRenderer):
] ]
table.setStyle(TableStyle(table_style)) table.setStyle(TableStyle(table_style))
return [table, Spacer(1, 12)] return [table, Spacer(1, 12)]
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
return [] return []
@ -794,32 +838,29 @@ class RendererPdf(BaseRenderer):
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles.""" """Render a JSON bullet list to PDF elements using AI-generated styles."""
try: try:
# Extract from nested content structure
content = list_data.get("content", {}) content = list_data.get("content", {})
if not isinstance(content, dict): if not isinstance(content, dict):
return [] return []
items = content.get("items", []) items = content.get("items", [])
bullet_style_def = styles.get("bullet_list", {}) bulletStyleDef = styles.get("bullet_list", {})
normalStyle = self._createNormalStyle(styles)
elements = [] elements = []
for item in items: for item in items:
if isinstance(item, str): runs = self._inlineRunsForListItem(item)
elements.append( if isinstance(item, list):
Paragraph(f"{self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles)) xml = self._renderInlineRunsToPdfXml(runs)
) elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle))
elif isinstance(item, str):
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle))
elif isinstance(item, dict) and "text" in item: elif isinstance(item, dict) and "text" in item:
elements.append( elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle))
Paragraph(
f"{self._markdownInlineToReportlabXml(item['text'])}",
self._createNormalStyle(styles),
)
)
if elements: if elements:
elements.append(Spacer(1, bullet_style_def.get("space_after", 3))) elements.append(Spacer(1, bulletStyleDef.get("space_after", 3)))
return elements return elements
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
return [] return []
@ -848,20 +889,27 @@ class RendererPdf(BaseRenderer):
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles.""" """Render a JSON paragraph to PDF elements using AI-generated styles."""
try: try:
# Extract from nested content structure
content = paragraph_data.get("content", {}) content = paragraph_data.get("content", {})
if isinstance(content, dict): if isinstance(content, str):
text = content.get("text", "") content = {"text": content}
elif isinstance(content, str): if not isinstance(content, dict):
text = content return []
else:
text = "" normalStyle = self._createNormalStyle(styles)
if "inlineRuns" in content:
runs = self._inlineRunsFromContent(content)
xml = self._renderInlineRunsToPdfXml(runs)
if xml:
return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)]
return []
text = content.get("text", "")
if text: if text:
return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))] return [self._paragraphFromInlineMarkdown(text, normalStyle)]
return [] return []
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
return [] return []

View file

@ -59,7 +59,7 @@ class RendererPptx(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes) return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
""" """
Render content as PowerPoint presentation from JSON data. Render content as PowerPoint presentation from JSON data.
@ -68,7 +68,7 @@ class RendererPptx(BaseRenderer):
title: Title for the presentation title: Title for the presentation
userPrompt: User prompt for AI styling userPrompt: User prompt for AI styling
aiService: AI service for styling aiService: AI service for styling
**kwargs: Additional rendering options style: Unified style dict from pipeline (preferred over AI-generated styles)
Returns: Returns:
Base64-encoded PowerPoint presentation as string Base64-encoded PowerPoint presentation as string
@ -81,8 +81,19 @@ class RendererPptx(BaseRenderer):
from pptx.dml.color import RGBColor from pptx.dml.color import RGBColor
import re import re
# Get style set: use styles from metadata if available, otherwise enhance with AI # Get style set: prefer unified style, then metadata, then AI-enhanced
styles = await self._getStyleSet(extractedContent, userPrompt, aiService) if style:
internalStyle = self._convertUnifiedStyleToInternal(style)
defaultPptx = self._getDefaultStyleSet()
for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"):
internalStyle[key] = defaultPptx.get(key)
internalStyle["heading"] = internalStyle["heading1"]
internalStyle["subheading"] = internalStyle["heading2"]
styles = internalStyle
self._unifiedStyle = style
else:
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
self._unifiedStyle = None
# Create new presentation # Create new presentation
prs = Presentation() prs = Presentation()
@ -910,15 +921,17 @@ JSON ONLY. NO OTHER TEXT."""
# Extract from nested content structure # Extract from nested content structure
content = paragraph_data.get("content", {}) content = paragraph_data.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
text = content.get("text", "") if content.get("inlineRuns"):
text = "".join(r.get("value", "") for r in content["inlineRuns"])
else:
text = content.get("text", "")
elif isinstance(content, str): elif isinstance(content, str):
text = content text = content
else: else:
text = "" text = ""
if text: if text:
# Limit paragraph length based on content density max_length = 200
max_length = 200 # Default limit
if len(text) > max_length: if len(text) > max_length:
text = text[:max_length] + "..." text = text[:max_length] + "..."
@ -1303,6 +1316,32 @@ JSON ONLY. NO OTHER TEXT."""
r.text = text[pos:] r.text = text[pos:]
_applyBase(r) _applyBase(r)
def _renderInlineRunsPptx(self, runs, paragraph, fontSize=None, fontColor=None):
"""Process InlineRun dicts into pptx text runs."""
from pptx.util import Pt
paragraph.text = ""
us = getattr(self, '_unifiedStyle', None)
monoFont = us["fonts"]["monospace"] if us else "Courier New"
for run in runs:
runType = run.get("type", "text")
value = run.get("value", "")
r = paragraph.add_run()
r.text = value
if fontSize:
r.font.size = fontSize
if fontColor:
r.font.color.rgb = fontColor
if runType == "bold":
r.font.bold = True
elif runType == "italic":
r.font.italic = True
elif runType == "code":
r.font.name = monoFont
if fontSize and hasattr(fontSize, 'pt'):
r.font.size = Pt(max(8, int(fontSize.pt * 0.85)))
elif runType == "link":
r.font.underline = True
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None: def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None:
"""Add a PowerPoint table to slide.""" """Add a PowerPoint table to slide."""
try: try:
@ -1374,7 +1413,8 @@ JSON ONLY. NO OTHER TEXT."""
cell = table.cell(0, col_idx) cell = table.cell(0, col_idx)
# Clear existing text and set new text # Clear existing text and set new text
cell.text_frame.clear() cell.text_frame.clear()
header_text = str(header) if header else "" cellRuns = self._inlineRunsForCell(header)
header_text = "".join(r.get("value", "") for r in cellRuns)
cell.text = header_text cell.text = header_text
# Ensure paragraph exists # Ensure paragraph exists
@ -1420,7 +1460,8 @@ JSON ONLY. NO OTHER TEXT."""
cell = table.cell(row_idx, col_idx) cell = table.cell(row_idx, col_idx)
# Clear existing text and set new text # Clear existing text and set new text
cell.text_frame.clear() cell.text_frame.clear()
cell_text = str(cell_data) if cell_data is not None else "" cellRuns = self._inlineRunsForCell(cell_data)
cell_text = "".join(r.get("value", "") for r in cellRuns)
cell.text = cell_text cell.text = cell_text
# Ensure paragraph exists # Ensure paragraph exists
@ -1462,9 +1503,8 @@ JSON ONLY. NO OTHER TEXT."""
fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47)))) fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47))))
for item in items: for item in items:
itemText = item.get("text", "") if isinstance(item, dict) else str(item) runs = self._inlineRunsForListItem(item)
if not itemText or not itemText.strip(): isNewFormat = isinstance(item, list)
continue
p = text_frame.add_paragraph() p = text_frame.add_paragraph()
p.level = 0 p.level = 0
@ -1472,21 +1512,33 @@ JSON ONLY. NO OTHER TEXT."""
p.space_before = Pt(2) p.space_before = Pt(2)
p.space_after = Pt(2) p.space_after = Pt(2)
# Consistent bullet prefix if isNewFormat:
self._addMarkdownInlineRuns(p, f"{itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs
self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor)
else:
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
if not itemText or not itemText.strip():
continue
self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
# Subitems # Subitems (only for dict-style items)
if isinstance(item, dict): if isinstance(item, dict):
for sub in item.get("subitems", []): for sub in item.get("subitems", []):
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub) subRuns = self._inlineRunsForListItem(sub)
if not subText: isSubNew = isinstance(sub, list)
continue
sp = text_frame.add_paragraph() sp = text_frame.add_paragraph()
sp.level = 0 sp.level = 0
sp.alignment = PP_ALIGN.LEFT sp.alignment = PP_ALIGN.LEFT
sp.space_before = Pt(1) sp.space_before = Pt(1)
sp.space_after = Pt(1) sp.space_after = Pt(1)
self._addMarkdownInlineRuns(sp, f" {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False) if isSubNew:
subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns
self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor)
else:
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
if not subText:
continue
self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
except Exception as e: except Exception as e:
logger.warning(f"Error adding bullet list to slide: {str(e)}") logger.warning(f"Error adding bullet list to slide: {str(e)}")
@ -1540,42 +1592,53 @@ JSON ONLY. NO OTHER TEXT."""
# Extract from nested content structure # Extract from nested content structure
content = element.get("content", {}) content = element.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
inlineRuns = self._inlineRunsFromContent(content)
hasInlineRuns = content.get("inlineRuns") is not None
text = content.get("text", "") text = content.get("text", "")
elif isinstance(content, str): elif isinstance(content, str):
text = content text = content
inlineRuns = [{"type": "text", "value": text}] if text else []
hasInlineRuns = False
else: else:
text = "" text = ""
inlineRuns = []
hasInlineRuns = False
if text: if not inlineRuns and not text:
p = text_frame.add_paragraph() return
p.level = 0
p = text_frame.add_paragraph()
try: p.level = 0
if hasattr(p, 'paragraph_format'):
p.paragraph_format.bullet.type = None try:
except (AttributeError, TypeError): if hasattr(p, 'paragraph_format'):
pass p.paragraph_format.bullet.type = None
except (AttributeError, TypeError):
paragraph_style = styles.get("paragraph", {}) pass
base_font_size = paragraph_style.get("font_size", 14)
calculated_size = max(10, int(base_font_size * font_size_multiplier)) paragraph_style = styles.get("paragraph", {})
fSize = Pt(calculated_size) base_font_size = paragraph_style.get("font_size", 14)
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) calculated_size = max(10, int(base_font_size * font_size_multiplier))
fBold = paragraph_style.get("bold", False) fSize = Pt(calculated_size)
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
fBold = paragraph_style.get("bold", False)
if hasInlineRuns:
self._renderInlineRunsPptx(inlineRuns, p, fontSize=fSize, fontColor=fColor)
else:
self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold) self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold)
# Add proper spacing p.space_before = Pt(6)
p.space_before = Pt(6) # Space before paragraph p.space_after = Pt(6)
p.space_after = Pt(6) # Space after paragraph p.line_spacing = 1.2
p.line_spacing = 1.2 # Line spacing for readability
align = paragraph_style.get("align", "left")
align = paragraph_style.get("align", "left") if align == "center":
if align == "center": p.alignment = PP_ALIGN.CENTER
p.alignment = PP_ALIGN.CENTER elif align == "right":
elif align == "right": p.alignment = PP_ALIGN.RIGHT
p.alignment = PP_ALIGN.RIGHT else:
else: p.alignment = PP_ALIGN.LEFT
p.alignment = PP_ALIGN.LEFT
except Exception as e: except Exception as e:
logger.warning(f"Error adding paragraph to slide: {str(e)}") logger.warning(f"Error adding paragraph to slide: {str(e)}")

View file

@ -68,17 +68,17 @@ class RendererXlsx(BaseRenderer):
from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelJson import supportedSectionTypes
return list(supportedSectionTypes) return list(supportedSectionTypes)
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling.""" """Render extracted JSON content to Excel format using AI-analyzed styling."""
try: try:
if not OPENPYXL_AVAILABLE: if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available # Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv from .rendererCsv import RendererCsv
csvRenderer = RendererCsv() csvRenderer = RendererCsv()
return await csvRenderer.render(extractedContent, title, userPrompt, aiService) return await csvRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
# Generate Excel using AI-analyzed styling # Generate Excel using AI-analyzed styling
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService, style=style)
# Extract metadata for document type and other info # Extract metadata for document type and other info
metadata = extractedContent.get("metadata", {}) if extractedContent else {} metadata = extractedContent.get("metadata", {}) if extractedContent else {}
@ -298,15 +298,22 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}") self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling.""" """Generate Excel content from structured JSON document using AI-generated styling."""
try: try:
# Debug output # Debug output
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
# Get style set: use styles from metadata if available, otherwise enhance with AI # Store unified style for use by inline-run helpers
styles = await self._getStyleSet(jsonContent, userPrompt, aiService) self._unifiedStyle = style
# Get style set: prefer unified style, fall back to legacy approach
if style:
styles = self._convertUnifiedStyleToInternal(style)
styles = self._convertColorsFormat(styles)
else:
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]}) # Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
if not self._validateJsonStructure(jsonContent): if not self._validateJsonStructure(jsonContent):
@ -511,6 +518,10 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"} "code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
} }
def _renderInlineRuns(self, runs: list) -> str:
"""Flatten inline runs to plain text for Excel cells."""
return "".join(r.get("value", "") for r in runs)
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper Excel color conversion.""" """Get AI styles with proper Excel color conversion."""
if not aiService: if not aiService:
@ -1206,7 +1217,9 @@ class RendererXlsx(BaseRenderer):
# Add headers with formatting - OPTIMIZED: use cached style objects # Add headers with formatting - OPTIMIZED: use cached style objects
for col, header in enumerate(headers, 1): for col, header in enumerate(headers, 1):
sanitized_header = self._sanitizeCellValue(header) runs = self._inlineRunsForCell(header)
headerText = self._renderInlineRuns(runs)
sanitized_header = self._sanitizeCellValue(headerText)
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
# Apply styling with fallbacks - use pre-calculated objects # Apply styling with fallbacks - use pre-calculated objects
@ -1272,7 +1285,9 @@ class RendererXlsx(BaseRenderer):
cell_values = cell_values[:header_count] cell_values = cell_values[:header_count]
for col, cell_value in enumerate(cell_values, 1): for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value) runs = self._inlineRunsForCell(cell_value)
cellText = self._renderInlineRuns(runs)
sanitized_value = self._sanitizeCellValue(cellText)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value) cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
# Apply styling with fallbacks - use pre-calculated objects # Apply styling with fallbacks - use pre-calculated objects
@ -1311,20 +1326,20 @@ class RendererXlsx(BaseRenderer):
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a list element to Excel sheet. Expects nested content structure.""" """Add a list element to Excel sheet. Expects nested content structure."""
try: try:
# Extract from nested content structure
content = element.get("content", {}) content = element.get("content", {})
if not isinstance(content, dict): if not isinstance(content, dict):
return startRow return startRow
list_items = content.get("items") or [] listItems = content.get("items") or []
# Ensure list_items is a list if not isinstance(listItems, list):
if not isinstance(list_items, list): listItems = []
list_items = []
list_style = styles.get("bullet_list", {}) listStyle = styles.get("bullet_list", {})
for item in list_items: for item in listItems:
sheet.cell(row=startRow, column=1, value=f"{item}") runs = self._inlineRunsForListItem(item)
if list_style.get("color"): text = self._renderInlineRuns(runs)
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"])) sheet.cell(row=startRow, column=1, value=f"\u2022 {text}")
if listStyle.get("color"):
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"]))
startRow += 1 startRow += 1
return startRow return startRow
@ -1336,10 +1351,10 @@ class RendererXlsx(BaseRenderer):
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a paragraph element to Excel sheet. Expects nested content structure.""" """Add a paragraph element to Excel sheet. Expects nested content structure."""
try: try:
# Extract from nested content structure
content = element.get("content", {}) content = element.get("content", {})
if isinstance(content, dict): if isinstance(content, dict):
text = content.get("text", "") runs = self._inlineRunsFromContent(content)
text = self._renderInlineRuns(runs)
elif isinstance(content, str): elif isinstance(content, str):
text = content text = content
else: else:

View file

@ -0,0 +1,75 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Default style definitions and style resolution for document rendering."""
from typing import Any, Dict
DEFAULT_STYLE: Dict[str, Any] = {
"fonts": {
"primary": "Calibri",
"monospace": "Consolas",
},
"colors": {
"primary": "#1F3864",
"secondary": "#2C3E50",
"accent": "#2980B9",
"background": "#FFFFFF",
},
"headings": {
"h1": {"sizePt": 24, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 12, "spaceAfterPt": 6},
"h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 10, "spaceAfterPt": 4},
"h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 8, "spaceAfterPt": 3},
"h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 6, "spaceAfterPt": 2},
},
"paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"},
"table": {
"headerBg": "#1F3864",
"headerFg": "#FFFFFF",
"headerSizePt": 10,
"bodySizePt": 10,
"rowBandingEven": "#F2F6FC",
"rowBandingOdd": "#FFFFFF",
"borderColor": "#CBD5E1",
"borderWidthPt": 0.5,
},
"list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11},
"image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"},
"codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"},
"page": {
"format": "A4",
"marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60},
"showPageNumbers": True,
"headerHeight": 30,
"footerHeight": 30,
"headerLogo": None,
"headerText": "",
"footerText": "",
},
}
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
result = {}
for key in base:
if key in override:
baseVal = base[key]
overVal = override[key]
if isinstance(baseVal, dict) and isinstance(overVal, dict):
result[key] = _deepMerge(baseVal, overVal)
else:
result[key] = overVal
else:
result[key] = base[key]
for key in override:
if key not in base:
result[key] = override[key]
return result
def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]:
"""Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict."""
if not agentStyle:
return dict(DEFAULT_STYLE)
return _deepMerge(DEFAULT_STYLE, agentStyle)

View file

@ -9,11 +9,70 @@ from typing import Any, Dict
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _parseInlineRuns(text: str) -> list:
"""
Parse inline markdown formatting into a list of InlineRun dicts.
Handles: images, links, bold, italic, inline code, plain text.
Uses a regex-based tokenizer that processes tokens left-to-right.
"""
if not text:
return [{"type": "text", "value": ""}]
# Pattern order matters: images before links, bold before italic
_TOKEN_RE = re.compile(
r'!\[(?P<imgAlt>[^\]]*)\]\((?P<imgSrc>[^)"]+)(?:\s+"(?P<imgWidth>\d+)pt")?\)' # image
r'|\[(?P<linkText>[^\]]+)\]\((?P<linkHref>[^)]+)\)' # link
r'|`(?P<code>[^`]+)`' # inline code
r'|\*\*(?P<bold>.+?)\*\*' # bold
r'|(?<!\w)\*(?P<italic1>.+?)\*(?!\w)' # italic *x*
r'|(?<!\w)_(?P<italic2>.+?)_(?!\w)' # italic _x_
)
runs = []
lastEnd = 0
for m in _TOKEN_RE.finditer(text):
# Plain text before this match
if m.start() > lastEnd:
runs.append({"type": "text", "value": text[lastEnd:m.start()]})
if m.group("imgAlt") is not None or m.group("imgSrc") is not None:
alt = (m.group("imgAlt") or "").strip() or "Image"
src = (m.group("imgSrc") or "").strip()
widthStr = m.group("imgWidth")
run = {"type": "image", "value": alt}
if src.startswith("file:"):
run["fileId"] = src[5:]
else:
run["href"] = src
if widthStr:
run["widthPt"] = int(widthStr)
runs.append(run)
elif m.group("linkText") is not None:
runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")})
elif m.group("code") is not None:
runs.append({"type": "code", "value": m.group("code")})
elif m.group("bold") is not None:
runs.append({"type": "bold", "value": m.group("bold")})
elif m.group("italic1") is not None:
runs.append({"type": "italic", "value": m.group("italic1")})
elif m.group("italic2") is not None:
runs.append({"type": "italic", "value": m.group("italic2")})
lastEnd = m.end()
# Trailing plain text
if lastEnd < len(text):
runs.append({"type": "text", "value": text[lastEnd:]})
return runs if runs else [{"type": "text", "value": text}]
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]: def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
""" """
Convert markdown content to the standard document JSON format expected by renderReport. Convert markdown content to the standard document JSON format with Inline-Run model.
Supports headings, code blocks, tables, lists, images (file: refs), paragraphs. Sections use inlineRuns (list of run dicts) instead of plain text strings.
For plain text: wraps entire content in a single paragraph section. Supports headings, code blocks, tables, lists, images, paragraphs.
""" """
if not isinstance(markdown, str): if not isinstance(markdown, str):
markdown = str(markdown) if markdown else "" markdown = str(markdown) if markdown else ""
@ -31,7 +90,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
while i < len(lines): while i < len(lines):
line = lines[i] line = lines[i]
# Headings # Headings (plain text, no inline formatting)
headingMatch = re.match(r"^(#{1,6})\s+(.+)", line) headingMatch = re.match(r"^(#{1,6})\s+(.+)", line)
if headingMatch: if headingMatch:
level = len(headingMatch.group(1)) level = len(headingMatch.group(1))
@ -43,7 +102,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
i += 1 i += 1
continue continue
# Fenced code blocks # Fenced code blocks (no inline formatting)
codeMatch = re.match(r"^```(\w*)", line) codeMatch = re.match(r"^```(\w*)", line)
if codeMatch: if codeMatch:
lang = codeMatch.group(1) or "text" lang = codeMatch.group(1) or "text"
@ -59,14 +118,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
}) })
continue continue
# Tables # Tables - cells are List[InlineRun]
tableMatch = re.match(r"^\|(.+)\|$", line) tableMatch = re.match(r"^\|(.+)\|$", line)
if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]): if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]):
headerCells = [c.strip() for c in tableMatch.group(1).split("|")] headerCells = [_parseInlineRuns(c.strip()) for c in tableMatch.group(1).split("|")]
i += 2 i += 2
rows = [] rows = []
while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]): while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]):
rowCells = [c.strip() for c in lines[i][1:-1].split("|")] rowCells = [_parseInlineRuns(c.strip()) for c in lines[i][1:-1].split("|")]
rows.append(rowCells) rows.append(rowCells)
i += 1 i += 1
sections.append({ sections.append({
@ -75,14 +134,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
}) })
continue continue
# Bullet / numbered lists # Bullet / numbered lists - items are List[List[InlineRun]]
listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line) listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line)
if listMatch: if listMatch:
isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2))) isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2)))
items = [] items = []
while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]): while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]):
m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]) m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i])
items.append({"text": m.group(3).strip()}) items.append(_parseInlineRuns(m.group(3).strip()))
i += 1 i += 1
sections.append({ sections.append({
"id": _nextId(), "content_type": "bullet_list", "order": order, "id": _nextId(), "content_type": "bullet_list", "order": order,
@ -95,46 +154,50 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
i += 1 i += 1
continue continue
# Images (simplified: store as paragraph with ref for now - full resolution needs Knowledge Store) # Standalone image on its own line -> block-level image section
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)]+)\)", line) imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", line)
if imgMatch: if imgMatch:
altText = imgMatch.group(1).strip() or "Image" altText = imgMatch.group(1).strip() or "Image"
src = imgMatch.group(2).strip() src = imgMatch.group(2).strip()
widthStr = imgMatch.group(3)
fileId = src[5:] if src.startswith("file:") else "" fileId = src[5:] if src.startswith("file:") else ""
content = {
"altText": altText,
"base64Data": "",
"_fileRef": fileId,
"_srcUrl": src if not fileId else "",
}
if widthStr:
content["widthPt"] = int(widthStr)
sections.append({ sections.append({
"id": _nextId(), "content_type": "image", "order": order, "id": _nextId(), "content_type": "image", "order": order,
"elements": [{ "elements": [{"content": content}],
"content": {
"altText": altText,
"base64Data": "",
"_fileRef": fileId,
"_srcUrl": src if not fileId else "",
}
}],
}) })
i += 1 i += 1
continue continue
# Paragraph # Paragraph - produces inlineRuns
paraLines = [] paraLines = []
while i < len(lines) and lines[i].strip() and not re.match( while i < len(lines) and lines[i].strip() and not re.match(
r"^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )", lines[i] r"^(#{1,6}\s|```|\|.+\||!\[[^\]]*\]\([^)]+\)\s*$|(\s*)([-*+]|\d+[.)]) )", lines[i]
): ):
paraLines.append(lines[i]) paraLines.append(lines[i])
i += 1 i += 1
if paraLines: if paraLines:
combinedText = " ".join(paraLines)
sections.append({ sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order, "id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": " ".join(paraLines)}}], "elements": [{"content": {"inlineRuns": _parseInlineRuns(combinedText)}}],
}) })
continue continue
i += 1 i += 1
if not sections: if not sections:
fallbackText = markdown.strip() or "(empty)"
sections.append({ sections.append({
"id": _nextId(), "content_type": "paragraph", "order": order, "id": _nextId(), "content_type": "paragraph", "order": order,
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}], "elements": [{"content": {"inlineRuns": _parseInlineRuns(fallbackText)}}],
}) })
return { return {

View file

@ -0,0 +1,18 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Shared helpers for AI workflow actions."""
def applyCommonAiParams(parameters: dict, request) -> None:
"""Apply common AI parameters (requireNeutralization, allowedModels) from node to request."""
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
request.requireNeutralization = bool(requireNeutralization)
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
if not request.options:
from modules.datamodels.datamodelAi import AiCallOptions
request.options = AiCallOptions()
request.options.allowedModels = allowedModels

View file

@ -67,6 +67,8 @@ async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult:
prompt=prompt, prompt=prompt,
options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE), options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE),
) )
from modules.workflows.methods.methodAi._common import applyCommonAiParams
applyCommonAiParams(parameters, req)
resp = await ai_service.callAi(req) resp = await ai_service.callAi(req)
except (SubscriptionInactiveException, BillingContextError): except (SubscriptionInactiveException, BillingContextError):
raise raise

View file

@ -36,6 +36,10 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
} }
if parentOperationId: if parentOperationId:
processParams["parentOperationId"] = parentOperationId processParams["parentOperationId"] = parentOperationId
if parameters.get("allowedModels"):
processParams["allowedModels"] = parameters["allowedModels"]
if parameters.get("requireNeutralization") is not None:
processParams["requireNeutralization"] = parameters["requireNeutralization"]
return await self.process(processParams) return await self.process(processParams)

View file

@ -55,6 +55,16 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
processingMode=ProcessingModeEnum.DETAILED processingMode=ProcessingModeEnum.DETAILED
) )
# Apply node-level AI params
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
options.allowedModels = allowedModels
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
_ctx = getattr(self.services, '_context', None)
if _ctx:
_ctx.requireNeutralization = bool(requireNeutralization)
# outputFormat: Optional - if None, formats determined from prompt by AI # outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent( aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt, prompt=prompt,

View file

@ -59,6 +59,16 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
compressContext=False compressContext=False
) )
# Apply node-level AI params
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
options.allowedModels = allowedModels
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
_ctx = getattr(self.services, '_context', None)
if _ctx:
_ctx.requireNeutralization = bool(requireNeutralization)
# outputFormat: Optional - if None, formats determined from prompt by AI # outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent( aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt, prompt=prompt,

View file

@ -212,6 +212,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
) )
) )
from modules.workflows.methods.methodAi._common import applyCommonAiParams
applyCommonAiParams(parameters, request)
aiResponse_obj = await self.services.ai.callAi(request) aiResponse_obj = await self.services.ai.callAi(request)
# Convert AiCallResponse to AiResponse format # Convert AiCallResponse to AiResponse format
@ -243,6 +246,16 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
) )
# Apply node-level AI params (allowedModels, requireNeutralization)
allowedModels = parameters.get("allowedModels")
if allowedModels and isinstance(allowedModels, list):
options.allowedModels = allowedModels
requireNeutralization = parameters.get("requireNeutralization")
if requireNeutralization is not None:
_ctx = getattr(self.services, '_context', None)
if _ctx:
_ctx.requireNeutralization = bool(requireNeutralization)
# Get generationIntent from parameters (required for DATA_GENERATE) # Get generationIntent from parameters (required for DATA_GENERATE)
# Default to "document" if not provided (most common use case) # Default to "document" if not provided (most common use case)
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code" # For code generation, use ai.generateCode action or explicitly pass generationIntent="code"

View file

@ -39,6 +39,10 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
} }
if parentOperationId: if parentOperationId:
processParams["parentOperationId"] = parentOperationId processParams["parentOperationId"] = parentOperationId
if parameters.get("allowedModels"):
processParams["allowedModels"] = parameters["allowedModels"]
if parameters.get("requireNeutralization") is not None:
processParams["requireNeutralization"] = parameters["requireNeutralization"]
return await self.process(processParams) return await self.process(processParams)

View file

@ -41,6 +41,10 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
processParams["resultType"] = resultType processParams["resultType"] = resultType
if parentOperationId: if parentOperationId:
processParams["parentOperationId"] = parentOperationId processParams["parentOperationId"] = parentOperationId
if parameters.get("allowedModels"):
processParams["allowedModels"] = parameters["allowedModels"]
if parameters.get("requireNeutralization") is not None:
processParams["requireNeutralization"] = parameters["requireNeutralization"]
return await self.process(processParams) return await self.process(processParams)

View file

View file

@ -0,0 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.datamodels.datamodelAi import AiCallOptions
def test_allowed_models_field_exists():
opts = AiCallOptions(allowedModels=["gpt-5-mini", "claude-4-7-opus"])
assert opts.allowedModels == ["gpt-5-mini", "claude-4-7-opus"]
def test_allowed_models_default_none():
opts = AiCallOptions()
assert opts.allowedModels is None

View file

View file

@ -0,0 +1,23 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
def test_inline_image_in_paragraph():
md = "Results show ![chart](file:abc \"200pt\") clearly."
result = markdownToDocumentJson(md, "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
types = [r["type"] for r in runs]
assert "text" in types
assert "image" in types
imgRun = next(r for r in runs if r["type"] == "image")
assert imgRun.get("fileId") == "abc"
def test_multiple_inline_images():
md = "A ![x](file:1) B ![y](file:2) C"
result = markdownToDocumentJson(md, "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
images = [r for r in runs if r["type"] == "image"]
assert len(images) == 2

View file

@ -0,0 +1,71 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
def test_basic_paragraph():
result = markdownToDocumentJson("Hello world", "Test")
doc = result["documents"][0]
section = doc["sections"][0]
assert section["content_type"] == "paragraph"
assert section["elements"][0]["content"]["inlineRuns"][0] == {"type": "text", "value": "Hello world"}
def test_inline_bold():
result = markdownToDocumentJson("This is **bold** text", "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
assert any(r["type"] == "bold" and r["value"] == "bold" for r in runs)
def test_inline_image():
result = markdownToDocumentJson("Text ![logo](file:abc123) more", "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
assert any(r["type"] == "image" and r.get("fileId") == "abc123" for r in runs)
def test_inline_link():
result = markdownToDocumentJson("Click [here](https://example.com)", "Test")
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
assert any(r["type"] == "link" and r.get("href") == "https://example.com" for r in runs)
def test_table_cells_are_inline_runs():
md = "| A | B |\n| --- | --- |\n| **x** | y |"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "table"
rows = section["elements"][0]["content"]["rows"]
assert isinstance(rows[0][0], list)
def test_bullet_list_inline_runs():
md = "- Item **one**\n- Item two"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "bullet_list"
items = section["elements"][0]["content"]["items"]
assert isinstance(items[0], list)
def test_standalone_image_block():
md = "![Big chart](file:chart123)"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "image"
def test_heading_unchanged():
result = markdownToDocumentJson("# Title", "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "heading"
assert section["elements"][0]["content"]["text"] == "Title"
assert section["elements"][0]["content"]["level"] == 1
def test_code_block_unchanged():
md = "```python\nprint('hi')\n```"
result = markdownToDocumentJson(md, "Test")
section = result["documents"][0]["sections"][0]
assert section["content_type"] == "code_block"
assert section["elements"][0]["content"]["code"] == "print('hi')"

View file

@ -0,0 +1,39 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import pytest
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE
def test_resolve_none_returns_defaults():
result = resolveStyle(None)
assert result == DEFAULT_STYLE
def test_resolve_empty_returns_defaults():
result = resolveStyle({})
assert result == DEFAULT_STYLE
def test_override_single_color():
result = resolveStyle({"colors": {"primary": "#FF0000"}})
assert result["colors"]["primary"] == "#FF0000"
assert result["colors"]["secondary"] == DEFAULT_STYLE["colors"]["secondary"]
def test_override_nested_heading():
result = resolveStyle({"headings": {"h1": {"sizePt": 30}}})
assert result["headings"]["h1"]["sizePt"] == 30
assert result["headings"]["h1"]["weight"] == "bold"
def test_override_font():
result = resolveStyle({"fonts": {"primary": "Arial"}})
assert result["fonts"]["primary"] == "Arial"
assert result["fonts"]["monospace"] == "Consolas"
def test_full_style_passthrough():
custom = {"fonts": {"primary": "Helvetica", "monospace": "Monaco"}}
result = resolveStyle(custom)
assert result["fonts"]["primary"] == "Helvetica"
assert result["fonts"]["monospace"] == "Monaco"