plan d implemented - generationn styles
This commit is contained in:
parent
b12671bbb5
commit
afd7e9d941
32 changed files with 1411 additions and 485 deletions
|
|
@ -162,6 +162,7 @@ class AiCallOptions(BaseModel):
|
|||
|
||||
# Provider filtering (from UI multiselect or automation config)
|
||||
allowedProviders: Optional[List[str]] = Field(default=None, description="List of allowed AI providers to use (empty = all RBAC-permitted)")
|
||||
allowedModels: Optional[List[str]] = Field(default=None, description="Whitelist of allowed model names (AND-filter with allowedProviders). None/empty = all allowed.")
|
||||
|
||||
|
||||
class AiCallRequest(BaseModel):
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Unified JSON document schema and helpers used by both generation prompts and ren
|
|||
This defines a single canonical template and the supported section types.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Literal, TypedDict
|
||||
|
||||
# Canonical list of supported section types across the system
|
||||
supportedSectionTypes: List[str] = [
|
||||
|
|
@ -18,6 +18,21 @@ supportedSectionTypes: List[str] = [
|
|||
"image",
|
||||
]
|
||||
|
||||
class InlineRun(TypedDict, total=False):
|
||||
"""Single inline content run. Every paragraph/cell/list-item is a List[InlineRun]."""
|
||||
type: Literal["text", "image", "link", "bold", "italic", "code"]
|
||||
value: str # text content (for text/bold/italic/code/link-label)
|
||||
fileId: str # for type=image: reference to FileItem
|
||||
base64Data: str # for type=image: resolved base64 (post-processing)
|
||||
mimeType: str # for type=image: e.g. "image/png"
|
||||
widthPt: int # for type=image: optional render width
|
||||
href: str # for type=link: URL target
|
||||
|
||||
supportedInlineRunTypes: List[str] = [
|
||||
"text", "image", "link", "bold", "italic", "code",
|
||||
]
|
||||
|
||||
|
||||
# Canonical JSON template used for AI generation (documents array + sections)
|
||||
# This template is used for STRUCTURE generation - sections have empty elements arrays.
|
||||
# For content generation, elements arrays will be populated later.
|
||||
|
|
|
|||
|
|
@ -3,6 +3,15 @@
|
|||
|
||||
from modules.shared.i18nRegistry import t
|
||||
|
||||
_AI_COMMON_PARAMS = [
|
||||
{"name": "requireNeutralization", "type": "boolean", "required": False,
|
||||
"frontendType": "checkbox", "default": False,
|
||||
"description": t("Eingaben fuer diesen Call neutralisieren")},
|
||||
{"name": "allowedModels", "type": "array", "required": False,
|
||||
"frontendType": "modelMultiSelect", "default": [],
|
||||
"description": t("Erlaubte LLM-Modelle (leer = alle erlaubten)")},
|
||||
]
|
||||
|
||||
AI_NODES = [
|
||||
{
|
||||
"id": "ai.prompt",
|
||||
|
|
@ -19,7 +28,7 @@ AI_NODES = [
|
|||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "simpleMode", "type": "boolean", "required": False, "frontendType": "checkbox",
|
||||
"description": t("Einfacher Modus"), "default": True},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": [
|
||||
|
|
@ -38,7 +47,7 @@ AI_NODES = [
|
|||
"parameters": [
|
||||
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
|
||||
"description": t("Recherche-Anfrage")},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["Transit"]}},
|
||||
|
|
@ -58,7 +67,7 @@ AI_NODES = [
|
|||
{"name": "summaryLength", "type": "string", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["brief", "medium", "detailed"]},
|
||||
"description": t("Kurz, mittel oder ausführlich"), "default": "medium"},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
|
||||
|
|
@ -77,7 +86,7 @@ AI_NODES = [
|
|||
"description": t("Dokumentenliste (via Wire oder DataRef)"), "default": ""},
|
||||
{"name": "targetLanguage", "type": "string", "required": True, "frontendType": "text",
|
||||
"description": t("Zielsprache (z.B. de, en, French)")},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
|
||||
|
|
@ -97,7 +106,7 @@ AI_NODES = [
|
|||
{"name": "targetFormat", "type": "string", "required": True, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"]},
|
||||
"description": t("Zielformat")},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["DocumentList", "Transit"]}},
|
||||
|
|
@ -114,7 +123,7 @@ AI_NODES = [
|
|||
"parameters": [
|
||||
{"name": "prompt", "type": "string", "required": True, "frontendType": "textarea",
|
||||
"description": t("Generierungs-Prompt")},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["Transit"]}},
|
||||
|
|
@ -134,7 +143,7 @@ AI_NODES = [
|
|||
{"name": "resultType", "type": "string", "required": False, "frontendType": "select",
|
||||
"frontendOptions": {"options": ["py", "js", "ts", "html", "java", "cpp", "txt", "json", "csv", "xml"]},
|
||||
"description": t("Datei-Endung der erzeugten Code-Datei"), "default": "py"},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["Transit"]}},
|
||||
|
|
@ -154,7 +163,7 @@ AI_NODES = [
|
|||
"description": t("Konsolidierungsmodus"), "default": "summarize"},
|
||||
{"name": "prompt", "type": "string", "required": False, "frontendType": "textarea",
|
||||
"description": t("Optionaler Prompt für die Konsolidierung"), "default": ""},
|
||||
],
|
||||
] + _AI_COMMON_PARAMS,
|
||||
"inputs": 1,
|
||||
"outputs": 1,
|
||||
"inputPorts": {0: {"accepts": ["AggregateResult", "Transit"]}},
|
||||
|
|
|
|||
|
|
@ -361,6 +361,17 @@ QUICK_ACTIONS = [
|
|||
# The placeholder {{featureInstanceId}} is replaced by _copyTemplateWorkflows.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_FINANCE_STYLE_HINT = (
|
||||
"\n\nWenn du ein Dokument erstellst, verwende einen professionellen Finanz-Stil:\n"
|
||||
"- Schriftart: Calibri\n"
|
||||
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
|
||||
"- Akzentfarbe: #2980B9\n"
|
||||
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
|
||||
"- Konservatives, seriöses Layout\n"
|
||||
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
|
||||
)
|
||||
|
||||
|
||||
def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
|
||||
"""Build a standard analysis graph: trigger -> refreshAccountingData -> ai.prompt."""
|
||||
return {
|
||||
|
|
@ -370,8 +381,9 @@ def _buildAnalysisWorkflowGraph(prompt: str) -> Dict[str, Any]:
|
|||
"parameters": {"featureInstanceId": "{{featureInstanceId}}", "forceRefresh": False}, "position": {"x": 250, "y": 0}},
|
||||
{"id": "analyse", "type": "ai.prompt", "label": "Analyse", "_method": "ai", "_action": "process",
|
||||
"parameters": {
|
||||
"aiPrompt": prompt,
|
||||
"aiPrompt": prompt + _FINANCE_STYLE_HINT,
|
||||
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
|
||||
"requireNeutralization": True,
|
||||
"simpleMode": False,
|
||||
}, "position": {"x": 500, "y": 0}},
|
||||
],
|
||||
|
|
@ -454,10 +466,19 @@ TEMPLATE_WORKFLOWS = [
|
|||
"Verwende die uebergebene Budget-Datei als Soll-Quelle und die im "
|
||||
"Kontext bereitgestellten Buchhaltungsdaten als Ist-Quelle.\n"
|
||||
"WICHTIG: Erstelle KEINEN separaten Chart pro Konto. Nur EIN "
|
||||
"Uebersichts-Chart ueber alle Konten ist gewuenscht."
|
||||
"Uebersichts-Chart ueber alle Konten ist gewuenscht.\n\n"
|
||||
"Hinweis: Das documentTheme ist 'finance'. Wenn du ein Dokument erstellst, "
|
||||
"verwende einen professionellen Finanz-Stil:\n"
|
||||
"- Schriftart: Calibri\n"
|
||||
"- Primaerfarbe: #1F3864 (Dunkelblau)\n"
|
||||
"- Akzentfarbe: #2980B9\n"
|
||||
"- Tabellen mit dunklem Header (#1F3864, weisse Schrift)\n"
|
||||
"- Konservatives, seriöses Layout\n"
|
||||
"Nutze den style-Parameter von renderDocument um diese Vorgaben umzusetzen."
|
||||
),
|
||||
"resultType": "xlsx",
|
||||
"documentTheme": "finance",
|
||||
"requireNeutralization": True,
|
||||
"documentList": {"type": "ref", "nodeId": "trigger", "path": ["payload", "documentList"]},
|
||||
"context": {"type": "ref", "nodeId": "refresh", "path": ["data", "accountingData"]},
|
||||
"simpleMode": False,
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
# All rights reserved.
|
||||
"""Workspace feature data models — WorkspaceUserSettings."""
|
||||
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
from pydantic import Field
|
||||
from modules.datamodels.datamodelBase import PowerOnModel
|
||||
from modules.shared.i18nRegistry import i18nModel
|
||||
import uuid
|
||||
|
|
@ -52,3 +52,18 @@ class WorkspaceUserSettings(PowerOnModel):
|
|||
description="Max agent rounds override (None = instance default)",
|
||||
json_schema_extra={"label": "Max. Agenten-Runden", "frontend_type": "number", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
requireNeutralization: bool = Field(
|
||||
default=False,
|
||||
description="Default neutralization setting for this user",
|
||||
json_schema_extra={"label": "Neutralisierung", "frontend_type": "checkbox", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
allowedProviders: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Allowed AI providers (empty = all permitted by RBAC)",
|
||||
json_schema_extra={"label": "Erlaubte Provider", "frontend_type": "multiselect", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
allowedModels: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Allowed AI models (empty = all permitted)",
|
||||
json_schema_extra={"label": "Erlaubte Modelle", "frontend_type": "modelMultiSelect", "frontend_readonly": False, "frontend_required": False},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ class WorkspaceInputRequest(BaseModel):
|
|||
workflowId: Optional[str] = Field(default=None, description="Continue existing workflow")
|
||||
userLanguage: str = Field(default="en", description="User language code")
|
||||
allowedProviders: List[str] = Field(default_factory=list, description="Restrict AI to these providers")
|
||||
allowedModels: List[str] = Field(default_factory=list, description="Restrict AI to these models")
|
||||
requireNeutralization: Optional[bool] = Field(default=None, description="Per-request neutralization override")
|
||||
|
||||
|
||||
|
|
@ -635,6 +636,7 @@ async def streamWorkspaceStart(
|
|||
userLanguage=userInput.userLanguage,
|
||||
instanceConfig=instanceConfig,
|
||||
allowedProviders=userInput.allowedProviders,
|
||||
allowedModels=userInput.allowedModels,
|
||||
requireNeutralization=userInput.requireNeutralization,
|
||||
billingFeatureCode=wsBillingFeatureCode,
|
||||
)
|
||||
|
|
@ -692,6 +694,7 @@ async def _runWorkspaceAgent(
|
|||
userLanguage: str = "en",
|
||||
instanceConfig: Dict[str, Any] = None,
|
||||
allowedProviders: List[str] = None,
|
||||
allowedModels: List[str] = None,
|
||||
requireNeutralization: Optional[bool] = None,
|
||||
billingFeatureCode: Optional[str] = None,
|
||||
):
|
||||
|
|
@ -715,6 +718,9 @@ async def _runWorkspaceAgent(
|
|||
logger.info(f"Workspace agent: allowedProviders={allowedProviders}")
|
||||
else:
|
||||
logger.debug("Workspace agent: no allowedProviders in request")
|
||||
if allowedModels:
|
||||
aiService.services.allowedModels = allowedModels
|
||||
logger.info(f"Workspace agent: allowedModels={allowedModels}")
|
||||
if requireNeutralization is not None:
|
||||
ctx.requireNeutralization = requireNeutralization
|
||||
|
||||
|
|
@ -2139,6 +2145,76 @@ async def updateGeneralSettings(
|
|||
return await getGeneralSettings(request, instanceId, context)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# User-level AI settings (neutralisation, providers, models)
|
||||
# =========================================================================
|
||||
|
||||
@router.get("/{instanceId}/user-settings")
|
||||
@limiter.limit("120/minute")
|
||||
async def getWorkspaceUserSettings(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Get the current user's workspace AI settings (auto-creates with defaults if not exists)."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
|
||||
settings = wsInterface.getWorkspaceUserSettings(userId)
|
||||
if settings:
|
||||
return JSONResponse({
|
||||
"requireNeutralization": settings.requireNeutralization,
|
||||
"allowedProviders": settings.allowedProviders,
|
||||
"allowedModels": settings.allowedModels,
|
||||
})
|
||||
|
||||
data = {
|
||||
"userId": userId,
|
||||
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||
"featureInstanceId": instanceId,
|
||||
}
|
||||
created = wsInterface.saveWorkspaceUserSettings(data)
|
||||
return JSONResponse({
|
||||
"requireNeutralization": created.requireNeutralization,
|
||||
"allowedProviders": created.allowedProviders,
|
||||
"allowedModels": created.allowedModels,
|
||||
})
|
||||
|
||||
|
||||
@router.put("/{instanceId}/user-settings")
|
||||
@limiter.limit("120/minute")
|
||||
async def putWorkspaceUserSettings(
|
||||
request: Request,
|
||||
instanceId: str = Path(...),
|
||||
body: dict = Body(...),
|
||||
context: RequestContext = Depends(getRequestContext),
|
||||
):
|
||||
"""Save the current user's workspace AI settings."""
|
||||
_mandateId, _ = _validateInstanceAccess(instanceId, context)
|
||||
wsInterface = _getWorkspaceInterface(context, instanceId)
|
||||
userId = str(context.user.id)
|
||||
|
||||
data = {
|
||||
"userId": userId,
|
||||
"mandateId": str(context.mandateId) if context.mandateId else "",
|
||||
"featureInstanceId": instanceId,
|
||||
}
|
||||
if "requireNeutralization" in body:
|
||||
data["requireNeutralization"] = bool(body["requireNeutralization"])
|
||||
if "allowedProviders" in body:
|
||||
data["allowedProviders"] = body["allowedProviders"]
|
||||
if "allowedModels" in body:
|
||||
data["allowedModels"] = body["allowedModels"]
|
||||
|
||||
saved = wsInterface.saveWorkspaceUserSettings(data)
|
||||
return JSONResponse({
|
||||
"requireNeutralization": saved.requireNeutralization,
|
||||
"allowedProviders": saved.allowedProviders,
|
||||
"allowedModels": saved.allowedModels,
|
||||
})
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# RAG / Knowledge — anonymised instance statistics (presentation / KPIs)
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -111,6 +111,19 @@ class AiObjects:
|
|||
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
|
||||
allowedModels = getattr(options, 'allowedModels', None) if options else None
|
||||
if allowedModels:
|
||||
filteredModels = [m for m in availableModels if m.name in allowedModels]
|
||||
if filteredModels:
|
||||
availableModels = filteredModels
|
||||
else:
|
||||
errorMsg = f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}"
|
||||
logger.error(errorMsg)
|
||||
return AiCallResponse(
|
||||
content=errorMsg, modelName="error", priceCHF=0.0,
|
||||
processingTime=0.0, bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(prompt, context, options, availableModels)
|
||||
|
||||
if not failoverModelList:
|
||||
|
|
@ -364,6 +377,19 @@ class AiObjects:
|
|||
)
|
||||
return
|
||||
|
||||
allowedModels = getattr(options, 'allowedModels', None) if options else None
|
||||
if allowedModels:
|
||||
filtered = [m for m in availableModels if m.name in allowedModels]
|
||||
if filtered:
|
||||
availableModels = filtered
|
||||
else:
|
||||
yield AiCallResponse(
|
||||
content=f"No models match allowedModels {allowedModels} (providers={allowedProviders}) for operation {options.operationType}",
|
||||
modelName="error", priceCHF=0.0, processingTime=0.0,
|
||||
bytesSent=0, bytesReceived=0, errorCount=1,
|
||||
)
|
||||
return
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(
|
||||
request.prompt, request.context or "", options, availableModels
|
||||
)
|
||||
|
|
@ -516,6 +542,14 @@ class AiObjects:
|
|||
else:
|
||||
logger.warning(f"No embedding models match allowedProviders {allowedProviders}")
|
||||
|
||||
allowedModels = getattr(options, 'allowedModels', None) if options else None
|
||||
if allowedModels:
|
||||
filtered = [m for m in availableModels if m.name in allowedModels]
|
||||
if filtered:
|
||||
availableModels = filtered
|
||||
else:
|
||||
logger.warning(f"No embedding models match allowedModels {allowedModels}")
|
||||
|
||||
failoverModelList = modelSelector.getFailoverModelList(
|
||||
combinedText, "", options, availableModels
|
||||
)
|
||||
|
|
|
|||
|
|
@ -25,142 +25,11 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
# ---- Document rendering tool ----
|
||||
|
||||
def _markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||
"""Convert markdown content to the standard document JSON format expected by renderers."""
|
||||
import re as _re
|
||||
|
||||
sections = []
|
||||
order = 0
|
||||
lines = markdown.split("\n")
|
||||
i = 0
|
||||
|
||||
def _nextId():
|
||||
nonlocal order
|
||||
order += 1
|
||||
return f"s_{order}"
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# --- Headings ---
|
||||
headingMatch = _re.match(r'^(#{1,6})\s+(.+)', line)
|
||||
if headingMatch:
|
||||
level = len(headingMatch.group(1))
|
||||
text = headingMatch.group(2).strip()
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "heading", "order": order,
|
||||
"elements": [{"content": {"text": text, "level": level}}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Fenced code blocks ---
|
||||
codeMatch = _re.match(r'^```(\w*)', line)
|
||||
if codeMatch:
|
||||
lang = codeMatch.group(1) or "text"
|
||||
codeLines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].startswith("```"):
|
||||
codeLines.append(lines[i])
|
||||
i += 1
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "code_block", "order": order,
|
||||
"elements": [{"content": {"code": "\n".join(codeLines), "language": lang}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Tables ---
|
||||
tableMatch = _re.match(r'^\|(.+)\|$', line)
|
||||
if tableMatch and (i + 1) < len(lines) and _re.match(r'^\|[\s\-:|]+\|$', lines[i + 1]):
|
||||
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
|
||||
i += 2
|
||||
rows = []
|
||||
while i < len(lines) and _re.match(r'^\|(.+)\|$', lines[i]):
|
||||
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
|
||||
rows.append(rowCells)
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "table", "order": order,
|
||||
"elements": [{"content": {"headers": headerCells, "rows": rows}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Bullet / numbered lists ---
|
||||
listMatch = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', line)
|
||||
if listMatch:
|
||||
isNumbered = bool(_re.match(r'\d+[.)]', listMatch.group(2)))
|
||||
items = []
|
||||
while i < len(lines) and _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i]):
|
||||
m = _re.match(r'^(\s*)([-*+]|\d+[.)]) (.+)', lines[i])
|
||||
items.append({"text": m.group(3).strip()})
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "bullet_list", "order": order,
|
||||
"elements": [{"content": {"items": items, "list_type": "numbered" if isNumbered else "bullet"}}],
|
||||
})
|
||||
continue
|
||||
|
||||
# --- Empty lines (skip) ---
|
||||
if not line.strip():
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Images:  or  ---
|
||||
imgMatch = _re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', line)
|
||||
if imgMatch:
|
||||
altText = imgMatch.group(1).strip() or "Image"
|
||||
src = imgMatch.group(2).strip()
|
||||
fileId = ""
|
||||
if src.startswith("file:"):
|
||||
fileId = src[5:]
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "image", "order": order,
|
||||
"elements": [{
|
||||
"content": {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# --- Paragraph (collect consecutive non-empty lines) ---
|
||||
paraLines = []
|
||||
while i < len(lines) and lines[i].strip() and not _re.match(r'^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )', lines[i]):
|
||||
paraLines.append(lines[i])
|
||||
i += 1
|
||||
if paraLines:
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": " ".join(paraLines)}}],
|
||||
})
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
if not sections:
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
|
||||
})
|
||||
|
||||
return {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "agent_rendering",
|
||||
"title": title,
|
||||
"language": language,
|
||||
},
|
||||
"documents": [{
|
||||
"id": "doc_1",
|
||||
"title": title,
|
||||
"sections": sections,
|
||||
}],
|
||||
}
|
||||
"""Delegate to the consolidated parser in subDocumentUtility."""
|
||||
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
|
||||
result = markdownToDocumentJson(markdown, title, language)
|
||||
result["metadata"]["extraction_method"] = "agent_rendering"
|
||||
return result
|
||||
|
||||
async def _renderDocument(args: Dict[str, Any], context: Dict[str, Any]):
|
||||
"""Render agent-produced markdown content into any document format via the RendererRegistry."""
|
||||
|
|
@ -245,35 +114,75 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
except Exception as e:
|
||||
logger.warning(f"renderDocument: knowledge service unavailable: {e}")
|
||||
resolvedImages = 0
|
||||
|
||||
def _resolveImageRef(targetObj, fileRefKey="_fileRef", fileIdKey="fileId"):
|
||||
"""Resolve a single image reference dict to base64Data in-place."""
|
||||
nonlocal resolvedImages
|
||||
fileRef = targetObj.get(fileRefKey, "") or targetObj.get(fileIdKey, "")
|
||||
if not fileRef or targetObj.get("base64Data"):
|
||||
return
|
||||
if knowledgeService:
|
||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
|
||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||
if imageChunks:
|
||||
targetObj["base64Data"] = imageChunks[0].get("data", "")
|
||||
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
|
||||
targetObj["mimeType"] = chunkMime
|
||||
resolvedImages += 1
|
||||
if not targetObj.get("base64Data"):
|
||||
try:
|
||||
rawBytes = services.chat.getFileData(fileRef)
|
||||
if rawBytes:
|
||||
import base64 as _b64
|
||||
targetObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
|
||||
targetObj["mimeType"] = "image/png"
|
||||
resolvedImages += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}")
|
||||
targetObj.pop("_fileRef", None)
|
||||
targetObj.pop("_srcUrl", None)
|
||||
|
||||
def _resolveInlineRuns(runsList):
|
||||
"""Scan a list of inline runs and resolve any image runs with fileId."""
|
||||
for run in runsList:
|
||||
if run.get("type") == "image" and run.get("fileId") and not run.get("base64Data"):
|
||||
_resolveImageRef(run, fileRefKey="fileId", fileIdKey="fileId")
|
||||
|
||||
for doc in structuredContent.get("documents", []):
|
||||
for section in doc.get("sections", []):
|
||||
if section.get("content_type") != "image":
|
||||
cType = section.get("content_type")
|
||||
# Block-level image sections
|
||||
if cType == "image":
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
_resolveImageRef(contentObj)
|
||||
continue
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
fileRef = contentObj.get("_fileRef", "")
|
||||
if not fileRef or contentObj.get("base64Data"):
|
||||
continue
|
||||
if knowledgeService:
|
||||
chunks = knowledgeService._knowledgeDb.getContentChunks(fileRef)
|
||||
imageChunks = [c for c in (chunks or []) if c.get("contentType") == "image"]
|
||||
if imageChunks:
|
||||
contentObj["base64Data"] = imageChunks[0].get("data", "")
|
||||
chunkMime = imageChunks[0].get("contextRef", {}).get("mimeType", "image/png")
|
||||
contentObj["mimeType"] = chunkMime
|
||||
resolvedImages += 1
|
||||
if not contentObj.get("base64Data"):
|
||||
try:
|
||||
rawBytes = services.chat.getFileData(fileRef)
|
||||
if rawBytes:
|
||||
import base64 as _b64
|
||||
contentObj["base64Data"] = _b64.b64encode(rawBytes).decode("ascii")
|
||||
contentObj["mimeType"] = "image/png"
|
||||
resolvedImages += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"renderDocument: image resolve failed for fileRef={fileRef}: {e}")
|
||||
contentObj.pop("_fileRef", None)
|
||||
contentObj.pop("_srcUrl", None)
|
||||
# Paragraphs with inlineRuns
|
||||
if cType == "paragraph":
|
||||
for element in section.get("elements", []):
|
||||
runs = element.get("content", {}).get("inlineRuns")
|
||||
if runs:
|
||||
_resolveInlineRuns(runs)
|
||||
continue
|
||||
# Bullet lists - items are List[List[InlineRun]]
|
||||
if cType == "bullet_list":
|
||||
for element in section.get("elements", []):
|
||||
items = element.get("content", {}).get("items", [])
|
||||
for item in items:
|
||||
if isinstance(item, list):
|
||||
_resolveInlineRuns(item)
|
||||
continue
|
||||
# Tables - headers and row cells are List[InlineRun]
|
||||
if cType == "table":
|
||||
for element in section.get("elements", []):
|
||||
contentObj = element.get("content", {})
|
||||
for cell in contentObj.get("headers", []):
|
||||
if isinstance(cell, list):
|
||||
_resolveInlineRuns(cell)
|
||||
for row in contentObj.get("rows", []):
|
||||
for cell in row:
|
||||
if isinstance(cell, list):
|
||||
_resolveInlineRuns(cell)
|
||||
|
||||
sectionCount = len(structuredContent.get("documents", [{}])[0].get("sections", []))
|
||||
logger.info(f"renderDocument: parsed {sectionCount} sections from markdown ({len(content)} chars), resolved {resolvedImages} image(s), format={outputFormat}")
|
||||
|
|
@ -285,6 +194,7 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
language=language,
|
||||
title=title,
|
||||
userPrompt=content,
|
||||
style=args.get("style"),
|
||||
)
|
||||
|
||||
if not documents:
|
||||
|
|
@ -367,6 +277,20 @@ def _registerMediaTools(registry: ToolRegistry, services):
|
|||
"outputFormat": {"type": "string", "description": "Target format: pdf, docx, xlsx, pptx, csv, html, md, json, txt", "default": "pdf"},
|
||||
"title": {"type": "string", "description": "Document title", "default": "Document"},
|
||||
"language": {"type": "string", "description": "Document language (ISO 639-1)", "default": "de"},
|
||||
"style": {
|
||||
"type": "object",
|
||||
"description": (
|
||||
"Optional style overrides for the rendered document. Supports nested keys: "
|
||||
"fonts (primary, monospace), colors (primary, secondary, accent, background), "
|
||||
"headings (h1-h4 with sizePt, weight, color, spaceBeforePt, spaceAfterPt), "
|
||||
"paragraph (sizePt, lineSpacing, color), table (headerBg, headerFg, headerSizePt, "
|
||||
"bodySizePt, rowBandingEven, rowBandingOdd, borderColor, borderWidthPt), "
|
||||
"list (bulletChar, indentPt, sizePt), image (defaultWidthPt, maxWidthPt, alignment), "
|
||||
"codeBlock (fontSizePt, background, borderColor), "
|
||||
"page (format, marginsPt, showPageNumbers, headerHeight, footerHeight, headerLogo, headerText, footerText). "
|
||||
"Only provided keys override defaults; omitted keys keep their default values."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
readOnly=False,
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class _ServicesAdapter:
|
|||
return getattr(w, "featureCode", None) if w else None
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
if name in ("allowedProviders", "preferredProviders", "currentUserLanguage"):
|
||||
if name in ("allowedProviders", "allowedModels", "preferredProviders", "currentUserLanguage"):
|
||||
return getattr(self.workflow, name, None) if self.workflow else None
|
||||
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
|
||||
|
||||
|
|
@ -177,6 +177,11 @@ class AiService:
|
|||
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
|
||||
logger.debug(f"Effective allowedProviders for AI request: {effectiveProviders}")
|
||||
|
||||
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
|
||||
effectiveModels = self._calculateEffectiveModels(request)
|
||||
if effectiveModels and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
|
||||
|
||||
# Neutralize prompt if enabled (before AI call)
|
||||
_wasNeutralized = False
|
||||
_excludedDocs: List[str] = []
|
||||
|
|
@ -225,6 +230,11 @@ class AiService:
|
|||
if effectiveProviders and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedProviders': effectiveProviders})
|
||||
|
||||
# Calculate effective allowedModels: Workflow ∩ Request (node-level)
|
||||
effectiveModels = self._calculateEffectiveModels(request)
|
||||
if effectiveModels and request.options:
|
||||
request.options = request.options.model_copy(update={'allowedModels': effectiveModels})
|
||||
|
||||
# Neutralize prompt if enabled (before streaming)
|
||||
_wasNeutralized = False
|
||||
_excludedDocs: List[str] = []
|
||||
|
|
@ -1240,6 +1250,43 @@ detectedIntent-Werte:
|
|||
logger.warning(f"Error calculating effective providers: {e}")
|
||||
return None
|
||||
|
||||
def _calculateEffectiveModels(self, request: AiCallRequest = None) -> Optional[List[str]]:
|
||||
"""
|
||||
Calculate effective allowed models: Workflow.allowedModels ∩ request.options.allowedModels.
|
||||
|
||||
AND-logic intersection:
|
||||
- If workflow specifies allowedModels, start with those.
|
||||
- If request (node-level) also specifies allowedModels, intersect.
|
||||
- Returns None if no model filtering is needed.
|
||||
"""
|
||||
try:
|
||||
effectiveModels = None
|
||||
|
||||
# Workflow-level allowedModels (from automation config)
|
||||
workflowModels = getattr(self.services, 'allowedModels', None)
|
||||
if workflowModels:
|
||||
effectiveModels = list(workflowModels)
|
||||
|
||||
# Request-level (node-level) allowedModels
|
||||
requestModels = None
|
||||
if request and request.options and request.options.allowedModels:
|
||||
requestModels = request.options.allowedModels
|
||||
|
||||
if requestModels:
|
||||
if effectiveModels:
|
||||
effectiveModels = [m for m in effectiveModels if m in requestModels]
|
||||
else:
|
||||
effectiveModels = list(requestModels)
|
||||
|
||||
if effectiveModels:
|
||||
logger.debug(f"Model filter: Workflow={workflowModels}, Request={requestModels}, Effective={effectiveModels}")
|
||||
|
||||
return effectiveModels if effectiveModels else None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error calculating effective models: {e}")
|
||||
return None
|
||||
|
||||
async def ensureAiObjectsInitialized(self):
|
||||
"""Ensure aiObjects is initialized and submodules are ready."""
|
||||
if self.aiObjects is None:
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from .subDocumentUtility import (
|
|||
detectMimeTypeFromData,
|
||||
convertDocumentDataToString
|
||||
)
|
||||
from .styleDefaults import resolveStyle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -382,7 +383,7 @@ class GenerationService:
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None, style: Optional[Dict[str, Any]] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to the specified output format.
|
||||
Processes EACH document separately and calls renderer for each.
|
||||
|
|
@ -399,12 +400,14 @@ class GenerationService:
|
|||
userPrompt: User's original prompt for report generation
|
||||
aiService: AI service instance for generation prompt creation
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
style: Optional style overrides (deep-merged with DEFAULT_STYLE)
|
||||
|
||||
Returns:
|
||||
List of RenderedDocument objects.
|
||||
Each RenderedDocument represents one rendered file (main document or supporting file)
|
||||
"""
|
||||
try:
|
||||
resolvedStyle = resolveStyle(style)
|
||||
# Validate JSON input
|
||||
if not isinstance(extractedContent, dict):
|
||||
raise ValueError("extractedContent must be a JSON dictionary")
|
||||
|
|
@ -469,7 +472,7 @@ class GenerationService:
|
|||
docTitle = doc.get("title", title)
|
||||
|
||||
# Render this document (can return multiple files, e.g., HTML + images)
|
||||
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
|
||||
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService, style=resolvedStyle)
|
||||
allRenderedDocuments.extend(renderedDocs)
|
||||
|
||||
logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ class BaseRenderer(ABC):
|
|||
return list(supportedSectionTypes)
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to multiple documents.
|
||||
Each renderer must implement this method.
|
||||
|
|
@ -95,6 +95,9 @@ class BaseRenderer(ABC):
|
|||
title: Report title
|
||||
userPrompt: Original user prompt for context
|
||||
aiService: AI service instance for additional processing
|
||||
style: Fully-resolved unified style dict from styleDefaults.resolveStyle().
|
||||
When provided, renderers use these values instead of their
|
||||
own defaults / AI-generated styles.
|
||||
|
||||
Returns:
|
||||
List of RenderedDocument objects.
|
||||
|
|
@ -102,6 +105,112 @@ class BaseRenderer(ABC):
|
|||
Even if only one document is returned, it must be wrapped in a list.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _convertUnifiedStyleToInternal(self, style: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert the unified resolvedStyle dict (from styleDefaults) into
|
||||
the renderer-internal style-set format that all rendering methods already
|
||||
consume. Override in subclasses for format-specific tweaks."""
|
||||
h1 = style["headings"]["h1"]
|
||||
h2 = style["headings"]["h2"]
|
||||
h3 = style["headings"].get("h3", h2)
|
||||
h4 = style["headings"].get("h4", h3)
|
||||
tbl = style["table"]
|
||||
para = style["paragraph"]
|
||||
lst = style["list"]
|
||||
cb = style["codeBlock"]
|
||||
return {
|
||||
"title": {
|
||||
"font_size": h1["sizePt"], "color": h1["color"],
|
||||
"bold": h1.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading1": {
|
||||
"font_size": h1["sizePt"], "color": h1["color"],
|
||||
"bold": h1.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading2": {
|
||||
"font_size": h2["sizePt"], "color": h2["color"],
|
||||
"bold": h2.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading3": {
|
||||
"font_size": h3["sizePt"], "color": h3["color"],
|
||||
"bold": h3.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"heading4": {
|
||||
"font_size": h4["sizePt"], "color": h4["color"],
|
||||
"bold": h4.get("weight") == "bold", "align": "left",
|
||||
},
|
||||
"paragraph": {
|
||||
"font_size": para["sizePt"], "color": para["color"],
|
||||
"bold": False, "align": "left",
|
||||
},
|
||||
"table_header": {
|
||||
"background": tbl["headerBg"], "text_color": tbl["headerFg"],
|
||||
"bold": True, "align": "center",
|
||||
},
|
||||
"table_cell": {
|
||||
"background": tbl["rowBandingOdd"], "text_color": para["color"],
|
||||
"bold": False, "align": "left",
|
||||
},
|
||||
"table_border": {
|
||||
"style": "grid", "color": tbl["borderColor"],
|
||||
},
|
||||
"bullet_list": {
|
||||
"font_size": lst["sizePt"], "color": para["color"],
|
||||
"indent": lst["indentPt"],
|
||||
},
|
||||
"code_block": {
|
||||
"font": style["fonts"]["monospace"],
|
||||
"font_size": cb["fontSizePt"], "color": para["color"],
|
||||
"background": cb["background"],
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _inlineRunsFromContent(content: Dict[str, Any], *, itemsKey: str = None) -> Any:
|
||||
"""Extract inline runs from new-format content, falling back to old format.
|
||||
|
||||
For paragraphs (itemsKey=None):
|
||||
new: content["inlineRuns"] -> List[InlineRun]
|
||||
old: content["text"] -> wrapped in [{"type":"text","value":text}]
|
||||
|
||||
For list items (itemsKey="items"):
|
||||
new: content["items"] is List[List[InlineRun]]
|
||||
old: content["items"] is List[str] or List[{"text":…}]
|
||||
Returns the items list (caller decides per-item conversion).
|
||||
|
||||
For table headers/cells:
|
||||
new: each header/cell is List[InlineRun]
|
||||
old: each header/cell is a plain str
|
||||
Caller handles per-cell.
|
||||
"""
|
||||
if itemsKey:
|
||||
return content.get(itemsKey, [])
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns:
|
||||
return inlineRuns
|
||||
text = content.get("text", "")
|
||||
if text:
|
||||
return [{"type": "text", "value": text}]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _inlineRunsForCell(cell) -> list:
|
||||
"""Normalize a single table header or cell value to List[InlineRun].
|
||||
Accepts either a plain string or an already-correct list of run dicts."""
|
||||
if isinstance(cell, list):
|
||||
return cell
|
||||
return [{"type": "text", "value": str(cell) if cell is not None else ""}]
|
||||
|
||||
@staticmethod
|
||||
def _inlineRunsForListItem(item) -> list:
|
||||
"""Normalize a single list item to List[InlineRun].
|
||||
Accepts a plain string, a dict with 'text', or an already-correct list of run dicts."""
|
||||
if isinstance(item, list):
|
||||
return item
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text", "")
|
||||
return [{"type": "text", "value": text}]
|
||||
return [{"type": "text", "value": str(item)}]
|
||||
|
||||
def _determineFilename(self, title: str, mimeType: str) -> str:
|
||||
"""Determine filename from title and mimeType."""
|
||||
|
|
|
|||
|
|
@ -53,18 +53,17 @@ class RendererDocx(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||
try:
|
||||
if not DOCX_AVAILABLE:
|
||||
# Fallback to HTML if python-docx not available
|
||||
from .rendererHtml import RendererHtml
|
||||
htmlRenderer = RendererHtml()
|
||||
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Generate DOCX using AI-analyzed styling
|
||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
|
|
@ -114,23 +113,27 @@ class RendererDocx(BaseRenderer):
|
|||
)
|
||||
]
|
||||
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, unifiedStyle: Dict[str, Any] = None) -> str:
|
||||
"""Generate DOCX content from structured JSON document."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
try:
|
||||
self.logger.debug("_generateDocxFromJson: Starting document generation")
|
||||
# Create new document
|
||||
doc = Document()
|
||||
self.logger.debug(f"_generateDocxFromJson: Document created in {time.time() - start_time:.2f}s")
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
template_from_metadata = None
|
||||
if json_content and isinstance(json_content.get("metadata"), dict):
|
||||
template_from_metadata = json_content["metadata"].get("templateName")
|
||||
# Phase 3: prefer unified style when provided
|
||||
style_start = time.time()
|
||||
self.logger.debug("_generateDocxFromJson: About to get style set")
|
||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
|
||||
if unifiedStyle:
|
||||
styleSet = self._convertUnifiedStyleToInternal(unifiedStyle)
|
||||
self._unifiedStyle = unifiedStyle
|
||||
else:
|
||||
template_from_metadata = None
|
||||
if json_content and isinstance(json_content.get("metadata"), dict):
|
||||
template_from_metadata = json_content["metadata"].get("templateName")
|
||||
styleSet = await self._getStyleSet(json_content, userPrompt, aiService, templateName=template_from_metadata)
|
||||
self._unifiedStyle = None
|
||||
self.logger.debug(f"_generateDocxFromJson: Style set retrieved in {time.time() - style_start:.2f}s")
|
||||
|
||||
# Setup basic document styles and create all styles from style set
|
||||
|
|
@ -298,11 +301,11 @@ class RendererDocx(BaseRenderer):
|
|||
def _setupBasicDocumentStyles(self, doc: Document) -> None:
|
||||
"""Set up basic document styles."""
|
||||
try:
|
||||
# Set default font
|
||||
style = doc.styles['Normal']
|
||||
font = style.font
|
||||
font.name = 'Calibri'
|
||||
font.size = Pt(11)
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
font.name = us["fonts"]["primary"] if us else 'Calibri'
|
||||
font.size = Pt(us["paragraph"]["sizePt"] if us else 11)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not set up basic document styles: {str(e)}")
|
||||
|
||||
|
|
@ -421,6 +424,8 @@ class RendererDocx(BaseRenderer):
|
|||
def _addMarkdownInlineRuns(self, paragraph, text: str) -> None:
|
||||
"""Parse markdown inline formatting and add corresponding Runs to a python-docx paragraph."""
|
||||
pos = 0
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier New"
|
||||
for m in self._MD_INLINE_RE.finditer(text):
|
||||
if m.start() > pos:
|
||||
paragraph.add_run(text[pos:m.start()])
|
||||
|
|
@ -434,12 +439,45 @@ class RendererDocx(BaseRenderer):
|
|||
paragraph.add_run(m.group(6)).italic = True
|
||||
elif m.group(7):
|
||||
run = paragraph.add_run(m.group(7))
|
||||
run.font.name = "Courier New"
|
||||
run.font.name = monoFont
|
||||
run.font.size = Pt(9)
|
||||
pos = m.end()
|
||||
if pos < len(text):
|
||||
paragraph.add_run(text[pos:])
|
||||
|
||||
def _renderInlineRuns(self, runs: list, paragraph, styleSet: Dict[str, Any]) -> None:
|
||||
"""Process a list of InlineRun dicts into python-docx Runs on a paragraph."""
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier New"
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = run.get("value", "")
|
||||
if runType == "text":
|
||||
paragraph.add_run(value)
|
||||
elif runType == "bold":
|
||||
paragraph.add_run(value).bold = True
|
||||
elif runType == "italic":
|
||||
paragraph.add_run(value).italic = True
|
||||
elif runType == "code":
|
||||
r = paragraph.add_run(value)
|
||||
r.font.name = monoFont
|
||||
r.font.size = Pt(9)
|
||||
elif runType == "link":
|
||||
r = paragraph.add_run(value)
|
||||
r.font.underline = True
|
||||
r.font.color.rgb = RGBColor(0x29, 0x80, 0xB9)
|
||||
elif runType == "image":
|
||||
b64 = run.get("base64Data", "")
|
||||
if b64:
|
||||
try:
|
||||
imgBytes = base64.b64decode(b64)
|
||||
imgStream = io.BytesIO(imgBytes)
|
||||
paragraph.add_run().add_picture(imgStream, width=Inches(2))
|
||||
except Exception:
|
||||
paragraph.add_run(f"[Image: {run.get('altText', '')}]")
|
||||
else:
|
||||
paragraph.add_run(value)
|
||||
|
||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Render a JSON table to DOCX using AI-generated styles.
|
||||
|
|
@ -485,7 +523,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.error(f"Error rendering table: {str(e)}", exc_info=True)
|
||||
|
||||
def _renderTableFastXml(self, doc: Document, headers: List[str], rows: List[List[Any]], styles: Dict[str, Any]) -> None:
|
||||
def _renderTableFastXml(self, doc: Document, headers: list, rows: list, styles: Dict[str, Any]) -> None:
|
||||
"""
|
||||
High-performance table rendering using direct XML manipulation.
|
||||
|
||||
|
|
@ -546,24 +584,34 @@ class RendererDocx(BaseRenderer):
|
|||
# Build all rows using fast XML
|
||||
rows_start = time.time()
|
||||
|
||||
# Header row
|
||||
headerRow = self._createTableRowXml(headers, isHeader=True)
|
||||
# Resolve header style colors
|
||||
tableStyle = styles.get("table_header", {})
|
||||
headerBg = tableStyle.get("background", "")
|
||||
headerFg = tableStyle.get("text_color", "")
|
||||
|
||||
# Flatten inline-run headers to plain strings for fast XML path
|
||||
flatHeaders = []
|
||||
for h in headers:
|
||||
runs = self._inlineRunsForCell(h)
|
||||
flatHeaders.append("".join(r.get("value", "") for r in runs))
|
||||
|
||||
headerRow = self._createTableRowXml(flatHeaders, isHeader=True, headerBgHex=headerBg or None, headerFgHex=headerFg or None)
|
||||
tbl.append(headerRow)
|
||||
|
||||
|
||||
header_time = time.time() - rows_start
|
||||
self.logger.debug(f"_renderTableFastXml: Header row created in {header_time:.3f}s")
|
||||
|
||||
# Data rows - batch process for performance
|
||||
|
||||
data_start = time.time()
|
||||
rowCount = len(rows)
|
||||
|
||||
|
||||
for idx, rowData in enumerate(rows):
|
||||
# Convert all cells to strings
|
||||
cellTexts = [str(cell) if cell is not None else '' for cell in rowData]
|
||||
# Pad if needed
|
||||
while len(cellTexts) < len(headers):
|
||||
cellTexts = []
|
||||
for cell in rowData:
|
||||
runs = self._inlineRunsForCell(cell)
|
||||
cellTexts.append("".join(r.get("value", "") for r in runs))
|
||||
while len(cellTexts) < len(flatHeaders):
|
||||
cellTexts.append('')
|
||||
|
||||
|
||||
row = self._createTableRowXml(cellTexts, isHeader=False)
|
||||
tbl.append(row)
|
||||
|
||||
|
|
@ -641,74 +689,64 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return tblBorders
|
||||
|
||||
def _createTableRowXml(self, cells: List[str], isHeader: bool = False) -> Any:
|
||||
"""
|
||||
Create a table row XML element with cells.
|
||||
|
||||
This is the core fast-path: builds the row XML directly without
|
||||
going through python-docx's slow cell.text assignment.
|
||||
"""
|
||||
def _createTableRowXml(self, cells: list, isHeader: bool = False, headerBgHex: str = None, headerFgHex: str = None) -> Any:
|
||||
"""Create a table row XML element with cells.
|
||||
Fast-path: builds row XML directly via lxml."""
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
||||
|
||||
if headerBgHex is None:
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
headerBgHex = us["table"]["headerBg"].lstrip('#') if us else '1F3864'
|
||||
else:
|
||||
headerBgHex = headerBgHex.lstrip('#')
|
||||
if headerFgHex is None:
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
headerFgHex = us["table"]["headerFg"].lstrip('#') if us else 'FFFFFF'
|
||||
else:
|
||||
headerFgHex = headerFgHex.lstrip('#')
|
||||
|
||||
tr = OxmlElement('w:tr')
|
||||
|
||||
# Row properties for header
|
||||
if isHeader:
|
||||
trPr = OxmlElement('w:trPr')
|
||||
tblHeader = OxmlElement('w:tblHeader')
|
||||
trPr.append(tblHeader)
|
||||
trPr.append(OxmlElement('w:tblHeader'))
|
||||
tr.append(trPr)
|
||||
|
||||
|
||||
for cellText in cells:
|
||||
# Create cell
|
||||
tc = OxmlElement('w:tc')
|
||||
|
||||
# Cell properties
|
||||
tcPr = OxmlElement('w:tcPr')
|
||||
tcW = OxmlElement('w:tcW')
|
||||
tcW.set(qn('w:type'), 'auto')
|
||||
tcW.set(qn('w:w'), '0')
|
||||
tcPr.append(tcW)
|
||||
|
||||
# Header cell styling - light blue background
|
||||
|
||||
if isHeader:
|
||||
shd = OxmlElement('w:shd')
|
||||
shd.set(qn('w:val'), 'clear')
|
||||
shd.set(qn('w:color'), 'auto')
|
||||
shd.set(qn('w:fill'), '4472C4') # Professional blue
|
||||
shd.set(qn('w:fill'), headerBgHex)
|
||||
tcPr.append(shd)
|
||||
|
||||
|
||||
tc.append(tcPr)
|
||||
|
||||
# Paragraph with text
|
||||
p = OxmlElement('w:p')
|
||||
|
||||
# Add run with text
|
||||
r = OxmlElement('w:r')
|
||||
|
||||
# Header text styling - bold and white
|
||||
|
||||
if isHeader:
|
||||
rPr = OxmlElement('w:rPr')
|
||||
b = OxmlElement('w:b')
|
||||
rPr.append(b)
|
||||
# White text color
|
||||
rPr.append(OxmlElement('w:b'))
|
||||
color = OxmlElement('w:color')
|
||||
color.set(qn('w:val'), 'FFFFFF')
|
||||
color.set(qn('w:val'), headerFgHex)
|
||||
rPr.append(color)
|
||||
r.append(rPr)
|
||||
|
||||
# Text element
|
||||
|
||||
t = OxmlElement('w:t')
|
||||
# Preserve spaces if text starts/ends with whitespace
|
||||
if cellText and (cellText[0] == ' ' or cellText[-1] == ' '):
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
t.text = cellText
|
||||
r.append(t)
|
||||
|
||||
p.append(r)
|
||||
tc.append(p)
|
||||
tr.append(tc)
|
||||
|
||||
|
||||
return tr
|
||||
|
||||
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||
|
|
@ -836,47 +874,37 @@ class RendererDocx(BaseRenderer):
|
|||
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON bullet list to DOCX using AI-generated styles - OPTIMIZED for performance."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = list_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
items = content.get("items", [])
|
||||
bullet_style = styles.get("bullet_list", {})
|
||||
|
||||
# Pre-calculate and cache style objects to avoid repeated parsing
|
||||
font_size_pt = None
|
||||
|
||||
font_size_pt = Pt(bullet_style["font_size"]) if bullet_style.get("font_size") else None
|
||||
text_color_rgb = None
|
||||
if bullet_style:
|
||||
if "font_size" in bullet_style:
|
||||
font_size_pt = Pt(bullet_style["font_size"])
|
||||
if "color" in bullet_style:
|
||||
color_hex = bullet_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
if bullet_style.get("color"):
|
||||
color_hex = bullet_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
|
||||
for item in items:
|
||||
itemText = item if isinstance(item, str) else (item.get("text", "") if isinstance(item, dict) else "")
|
||||
if not itemText:
|
||||
itemRuns = self._inlineRunsForListItem(item)
|
||||
if not itemRuns or not any(r.get("value") for r in itemRuns):
|
||||
continue
|
||||
para = doc.add_paragraph(style='List Bullet')
|
||||
self._addMarkdownInlineRuns(para, itemText)
|
||||
|
||||
# Apply bullet list styling from style set - use cached objects
|
||||
if bullet_style and para.runs:
|
||||
# Use direct access instead of iterating
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = para.add_run()
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
isNewRunFormat = isinstance(item, list)
|
||||
if isNewRunFormat:
|
||||
self._renderInlineRuns(itemRuns, para, styles)
|
||||
else:
|
||||
itemText = "".join(r.get("value", "") for r in itemRuns)
|
||||
self._addMarkdownInlineRuns(para, itemText)
|
||||
|
||||
if bullet_style and para.runs and len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
|
||||
|
|
@ -905,90 +933,79 @@ class RendererDocx(BaseRenderer):
|
|||
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON paragraph to DOCX using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = paragraph_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
inlineRuns = self._inlineRunsFromContent(content)
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
inlineRuns = [{"type": "text", "value": content}]
|
||||
else:
|
||||
text = ""
|
||||
|
||||
# CRITICAL: Prevent rendering base64 image data as text
|
||||
# Base64 image data typically starts with /9j/ (JPEG) or iVBORw0KGgo (PNG)
|
||||
if text and (text.startswith("/9j/") or text.startswith("iVBORw0KGgo") or
|
||||
(len(text) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in text[:100]))):
|
||||
# This looks like base64 data - don't render as text
|
||||
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(text)})")
|
||||
inlineRuns = []
|
||||
|
||||
if not inlineRuns:
|
||||
return
|
||||
|
||||
plainText = "".join(r.get("value", "") for r in inlineRuns)
|
||||
if plainText and (plainText.startswith("/9j/") or plainText.startswith("iVBORw0KGgo") or
|
||||
(len(plainText) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in plainText[:100]))):
|
||||
self.logger.warning(f"Skipping rendering of what appears to be base64 data in paragraph (length: {len(plainText)})")
|
||||
para = doc.add_paragraph("[Error: Image data found in text content - image embedding may have failed]")
|
||||
if para.runs:
|
||||
para.runs[0].font.color.rgb = RGBColor(255, 0, 0) # Red color for error
|
||||
para.runs[0].font.color.rgb = RGBColor(255, 0, 0)
|
||||
return
|
||||
|
||||
if text:
|
||||
para = doc.add_paragraph()
|
||||
self._addMarkdownInlineRuns(para, text)
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style:
|
||||
# Pre-calculate and cache style objects
|
||||
font_size_pt = None
|
||||
text_color_rgb = None
|
||||
if "font_size" in paragraph_style:
|
||||
font_size_pt = Pt(paragraph_style["font_size"])
|
||||
if "color" in paragraph_style:
|
||||
color_hex = paragraph_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
bold = paragraph_style.get("bold", False)
|
||||
|
||||
# Use direct access instead of iterating
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
para = doc.add_paragraph()
|
||||
hasNewRuns = content.get("inlineRuns") if isinstance(content, dict) else None
|
||||
if hasNewRuns:
|
||||
self._renderInlineRuns(inlineRuns, para, styles)
|
||||
else:
|
||||
self._addMarkdownInlineRuns(para, plainText)
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style:
|
||||
font_size_pt = Pt(paragraph_style["font_size"]) if "font_size" in paragraph_style else None
|
||||
text_color_rgb = None
|
||||
if "color" in paragraph_style:
|
||||
color_hex = paragraph_style["color"].lstrip('#')
|
||||
text_color_rgb = RGBColor(int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16))
|
||||
bold = paragraph_style.get("bold", False)
|
||||
if len(para.runs) > 0:
|
||||
run = para.runs[0]
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
if "align" in paragraph_style:
|
||||
align = paragraph_style["align"]
|
||||
if align == "center":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
elif align == "right":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
else:
|
||||
# Create run if none exists
|
||||
run = para.add_run()
|
||||
if font_size_pt:
|
||||
run.font.size = font_size_pt
|
||||
run.font.bold = bold
|
||||
if text_color_rgb:
|
||||
run.font.color.rgb = text_color_rgb
|
||||
|
||||
if "align" in paragraph_style:
|
||||
align = paragraph_style["align"]
|
||||
if align == "center":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
elif align == "right":
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
else:
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
|
||||
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON code block to DOCX using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = code_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return
|
||||
code = content.get("code", "")
|
||||
language = content.get("language", "")
|
||||
code_style = styles.get("code_block", {})
|
||||
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
|
||||
if code:
|
||||
if language:
|
||||
lang_para = doc.add_paragraph(f"Code ({language}):")
|
||||
if len(lang_para.runs) > 0:
|
||||
lang_para.runs[0].bold = True
|
||||
|
||||
# Pre-calculate and cache style objects
|
||||
code_font_name = code_style.get("font", "Courier New")
|
||||
code_font_size_pt = Pt(code_style.get("font_size", 9))
|
||||
|
||||
code_font_name = code_style.get("font", us["fonts"]["monospace"] if us else "Courier New")
|
||||
code_font_size_pt = Pt(code_style.get("font_size", us["codeBlock"]["fontSizePt"] if us else 9))
|
||||
code_text_color_rgb = None
|
||||
if "color" in code_style:
|
||||
color_hex = code_style["color"].lstrip('#')
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class RendererHtml(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render HTML document with images as separate files.
|
||||
Returns list of documents: [HTML document, image1, image2, ...]
|
||||
|
|
@ -54,7 +54,7 @@ class RendererHtml(BaseRenderer):
|
|||
self._renderedImages = images
|
||||
|
||||
# Generate HTML using AI-analyzed styling
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Replace base64 data URIs with relative file paths if images exist
|
||||
if images:
|
||||
|
|
@ -107,11 +107,16 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
return resultDocuments
|
||||
|
||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
|
||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
# Use unified style when provided, otherwise fall back to existing flow
|
||||
if style:
|
||||
styles = self._convertUnifiedStyleToInternal(style)
|
||||
self._unifiedStyle = style
|
||||
else:
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
self._unifiedStyle = None
|
||||
|
||||
# Validate JSON structure
|
||||
if not self._validateJsonStructure(jsonContent):
|
||||
|
|
@ -272,6 +277,10 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
|
||||
"""Generate CSS from style definitions."""
|
||||
# When unified style is available, generate CSS directly from it
|
||||
if getattr(self, "_unifiedStyle", None):
|
||||
return self._generateCssFromUnifiedStyle(self._unifiedStyle)
|
||||
|
||||
css_parts = []
|
||||
|
||||
# Body styles
|
||||
|
|
@ -368,6 +377,164 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
return '\n'.join(css_parts)
|
||||
|
||||
def _generateCssFromUnifiedStyle(self, style: Dict[str, Any]) -> str:
|
||||
"""Generate CSS directly from unified style dict."""
|
||||
fonts = style.get("fonts", {})
|
||||
colors = style.get("colors", {})
|
||||
headings = style.get("headings", {})
|
||||
para = style.get("paragraph", {})
|
||||
tbl = style.get("table", {})
|
||||
lst = style.get("list", {})
|
||||
cb = style.get("codeBlock", {})
|
||||
page = style.get("page", {})
|
||||
|
||||
primaryFont = fonts.get("primary", "Arial, sans-serif")
|
||||
monoFont = fonts.get("monospace", "Courier New, monospace")
|
||||
bgColor = colors.get("background", "#FFFFFF")
|
||||
primaryColor = colors.get("primary", "#1F3864")
|
||||
paraColor = para.get("color", "#333333")
|
||||
paraSizePt = para.get("sizePt", 11)
|
||||
lineSpacing = para.get("lineSpacing", 1.15)
|
||||
|
||||
css_parts = []
|
||||
|
||||
# Body
|
||||
css_parts.append("body {")
|
||||
css_parts.append(f" font-family: {primaryFont};")
|
||||
css_parts.append(f" background: {bgColor};")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" font-size: {paraSizePt}pt;")
|
||||
css_parts.append(f" line-height: {lineSpacing};")
|
||||
margins = page.get("marginsPt", {})
|
||||
if margins:
|
||||
css_parts.append(f" margin: {margins.get('top', 60)}pt {margins.get('right', 60)}pt {margins.get('bottom', 60)}pt {margins.get('left', 60)}pt;")
|
||||
else:
|
||||
css_parts.append(" margin: 0; padding: 20px;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Document title (uses h1 style)
|
||||
h1 = headings.get("h1", {})
|
||||
css_parts.append(".document-title {")
|
||||
css_parts.append(f" font-size: {h1.get('sizePt', 24)}pt;")
|
||||
css_parts.append(f" color: {h1.get('color', primaryColor)};")
|
||||
css_parts.append(f" font-weight: {h1.get('weight', 'bold')};")
|
||||
css_parts.append(" margin: 0 0 1em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Headings h1-h4
|
||||
for level in range(1, 5):
|
||||
key = f"h{level}"
|
||||
h = headings.get(key, h1 if level == 1 else headings.get(f"h{level-1}", {}))
|
||||
css_parts.append(f"h{level} {{")
|
||||
css_parts.append(f" font-size: {h.get('sizePt', max(24 - (level-1)*4, 12))}pt;")
|
||||
css_parts.append(f" color: {h.get('color', primaryColor)};")
|
||||
css_parts.append(f" font-weight: {h.get('weight', 'bold')};")
|
||||
css_parts.append(f" margin: 1.2em 0 0.4em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Paragraphs
|
||||
css_parts.append("p {")
|
||||
css_parts.append(f" font-size: {paraSizePt}pt;")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" line-height: {lineSpacing};")
|
||||
css_parts.append(" margin: 0 0 1em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Tables
|
||||
borderColor = tbl.get("borderColor", "#DEE2E6")
|
||||
css_parts.append("table {")
|
||||
css_parts.append(f" border-collapse: collapse;")
|
||||
css_parts.append(f" width: 100%;")
|
||||
css_parts.append(f" margin: 1em 0;")
|
||||
css_parts.append(f" border: 1px solid {borderColor};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Table headers
|
||||
css_parts.append("th {")
|
||||
css_parts.append(f" background: {tbl.get('headerBg', '#1F3864')};")
|
||||
css_parts.append(f" color: {tbl.get('headerFg', '#FFFFFF')};")
|
||||
css_parts.append(" font-weight: bold;")
|
||||
css_parts.append(" text-align: center;")
|
||||
css_parts.append(f" padding: 10px;")
|
||||
css_parts.append(f" border: 1px solid {borderColor};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Table cells
|
||||
css_parts.append("td {")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(" padding: 8px;")
|
||||
css_parts.append(f" border: 1px solid {borderColor};")
|
||||
css_parts.append("}")
|
||||
|
||||
# Lists
|
||||
css_parts.append("ul {")
|
||||
css_parts.append(f" font-size: {lst.get('sizePt', paraSizePt)}pt;")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" padding-left: {lst.get('indentPt', 18)}pt;")
|
||||
css_parts.append(" margin: 0 0 1em 0;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Code blocks
|
||||
css_parts.append("pre {")
|
||||
css_parts.append(f" font-family: {monoFont};")
|
||||
css_parts.append(f" font-size: {cb.get('fontSizePt', 9)}pt;")
|
||||
css_parts.append(f" color: {paraColor};")
|
||||
css_parts.append(f" background: {cb.get('background', '#F8F9FA')};")
|
||||
css_parts.append(f" border: 1px solid {cb.get('borderColor', '#E2E8F0')};")
|
||||
css_parts.append(" border-radius: 4px;")
|
||||
css_parts.append(" padding: 1em;")
|
||||
css_parts.append(" margin: 1em 0;")
|
||||
css_parts.append(" overflow-x: auto;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Images
|
||||
css_parts.append("img {")
|
||||
css_parts.append(" max-width: 100%;")
|
||||
css_parts.append(" height: auto;")
|
||||
css_parts.append(" margin: 1em 0;")
|
||||
css_parts.append(" border-radius: 4px;")
|
||||
css_parts.append("}")
|
||||
|
||||
# Generated info
|
||||
css_parts.append(".generated-info {")
|
||||
css_parts.append(" font-size: 0.9em;")
|
||||
css_parts.append(" color: #666;")
|
||||
css_parts.append(" text-align: center;")
|
||||
css_parts.append(" margin-top: 2em;")
|
||||
css_parts.append(" padding-top: 1em;")
|
||||
css_parts.append(" border-top: 1px solid #ddd;")
|
||||
css_parts.append("}")
|
||||
|
||||
return '\n'.join(css_parts)
|
||||
|
||||
def _renderInlineRuns(self, runs: list) -> str:
|
||||
"""Convert inline runs to HTML markup."""
|
||||
import html as htmlLib
|
||||
parts = []
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = htmlLib.escape(run.get("value", ""))
|
||||
if runType == "text":
|
||||
parts.append(value)
|
||||
elif runType == "bold":
|
||||
parts.append(f"<strong>{value}</strong>")
|
||||
elif runType == "italic":
|
||||
parts.append(f"<em>{value}</em>")
|
||||
elif runType == "code":
|
||||
parts.append(f"<code>{value}</code>")
|
||||
elif runType == "link":
|
||||
href = htmlLib.escape(run.get("href", ""))
|
||||
parts.append(f'<a href="{href}">{value}</a>')
|
||||
elif runType == "image":
|
||||
b64 = run.get("base64Data", "")
|
||||
mime = run.get("mimeType", "image/png")
|
||||
alt = value
|
||||
if b64:
|
||||
parts.append(f'<img src="data:{mime};base64,{b64}" alt="{alt}" style="max-width:100%;height:auto;">')
|
||||
else:
|
||||
parts.append(value)
|
||||
return "".join(parts)
|
||||
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to HTML using AI-generated styles.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
|
|
@ -419,6 +586,11 @@ class RendererHtml(BaseRenderer):
|
|||
# Regular paragraph element - extract from nested content structure (standard JSON format)
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
# New format: inlineRuns
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns and isinstance(inlineRuns, list):
|
||||
htmlParts.append(f'<p>{self._renderInlineRuns(inlineRuns)}</p>')
|
||||
continue
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
@ -495,7 +667,8 @@ class RendererHtml(BaseRenderer):
|
|||
# Table header
|
||||
htmlParts.append('<thead><tr>')
|
||||
for header in headers:
|
||||
htmlParts.append(f'<th>{header}</th>')
|
||||
runs = self._inlineRunsForCell(header)
|
||||
htmlParts.append(f'<th>{self._renderInlineRuns(runs)}</th>')
|
||||
htmlParts.append('</tr></thead>')
|
||||
|
||||
# Table body
|
||||
|
|
@ -503,7 +676,8 @@ class RendererHtml(BaseRenderer):
|
|||
for row in rows:
|
||||
htmlParts.append('<tr>')
|
||||
for cellData in row:
|
||||
htmlParts.append(f'<td>{cellData}</td>')
|
||||
runs = self._inlineRunsForCell(cellData)
|
||||
htmlParts.append(f'<td>{self._renderInlineRuns(runs)}</td>')
|
||||
htmlParts.append('</tr>')
|
||||
htmlParts.append('</tbody>')
|
||||
|
||||
|
|
@ -528,10 +702,8 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
htmlParts = ['<ul>']
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
htmlParts.append(f'<li>{item}</li>')
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
htmlParts.append(f'<li>{item["text"]}</li>')
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
htmlParts.append(f'<li>{self._renderInlineRuns(runs)}</li>')
|
||||
htmlParts.append('</ul>')
|
||||
|
||||
return '\n'.join(htmlParts)
|
||||
|
|
@ -571,6 +743,11 @@ class RendererHtml(BaseRenderer):
|
|||
if isinstance(el, dict):
|
||||
content = el.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
# New format: inlineRuns
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns and isinstance(inlineRuns, list):
|
||||
texts.append(self._renderInlineRuns(inlineRuns))
|
||||
continue
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
@ -581,16 +758,18 @@ class RendererHtml(BaseRenderer):
|
|||
elif isinstance(el, str):
|
||||
texts.append(el)
|
||||
if texts:
|
||||
# Join multiple paragraphs with <p> tags
|
||||
return '\n'.join(f'<p>{text}</p>' for text in texts)
|
||||
return ""
|
||||
elif isinstance(paragraphData, str):
|
||||
return f'<p>{paragraphData}</p>'
|
||||
elif isinstance(paragraphData, dict):
|
||||
# Handle nested content structure: element.content vs element.text
|
||||
# Extract from nested content structure
|
||||
content = paragraphData.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
# New format: inlineRuns
|
||||
inlineRuns = content.get("inlineRuns")
|
||||
if inlineRuns and isinstance(inlineRuns, list):
|
||||
return f'<p>{self._renderInlineRuns(inlineRuns)}</p>'
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
|
|
|
|||
|
|
@ -106,17 +106,17 @@ class RendererPdf(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await html_renderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Generate PDF using AI-analyzed styling
|
||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService, unifiedStyle=style)
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
|
|
@ -163,11 +163,28 @@ class RendererPdf(BaseRenderer):
|
|||
)
|
||||
]
|
||||
|
||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, unifiedStyle: Dict[str, Any] = None) -> str:
|
||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||
# Get style set from unified style or legacy approach
|
||||
if unifiedStyle:
|
||||
styles = self._convertUnifiedStyleToInternal(unifiedStyle)
|
||||
self._unifiedStyle = unifiedStyle
|
||||
for level in range(1, 7):
|
||||
hKey = f"heading{level}"
|
||||
if hKey not in styles:
|
||||
styles[hKey] = self._defaultHeadingStyleDef(level)
|
||||
else:
|
||||
styles[hKey].setdefault("space_after", 12)
|
||||
styles[hKey].setdefault("space_before", 12)
|
||||
styles["paragraph"].setdefault("space_after", 6)
|
||||
styles["paragraph"].setdefault("line_height", unifiedStyle["paragraph"].get("lineSpacing", 1.2))
|
||||
styles["bullet_list"].setdefault("space_after", 3)
|
||||
styles["code_block"].setdefault("space_after", 6)
|
||||
styles["code_block"].setdefault("align", "left")
|
||||
else:
|
||||
styles = await self._getStyleSet(json_content, userPrompt, aiService)
|
||||
self._unifiedStyle = None
|
||||
|
||||
# Validate JSON structure
|
||||
if not self._validateJsonStructure(json_content):
|
||||
|
|
@ -179,15 +196,13 @@ class RendererPdf(BaseRenderer):
|
|||
# Create a buffer to hold the PDF
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Create PDF document
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
rightMargin=72,
|
||||
leftMargin=72,
|
||||
topMargin=72,
|
||||
bottomMargin=18
|
||||
)
|
||||
# Create PDF document with unified page margins or defaults
|
||||
pageCfg = unifiedStyle["page"] if unifiedStyle else None
|
||||
if pageCfg:
|
||||
m = pageCfg["marginsPt"]
|
||||
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=m["right"], leftMargin=m["left"], topMargin=m["top"], bottomMargin=m["bottom"])
|
||||
else:
|
||||
doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
|
||||
|
||||
# Build PDF content (no cover page — body starts on page 1; filename still uses `title`)
|
||||
story = []
|
||||
|
|
@ -609,6 +624,31 @@ class RendererPdf(BaseRenderer):
|
|||
.replace(">", ">")
|
||||
)
|
||||
|
||||
def _renderInlineRunsToPdfXml(self, runs: list) -> str:
|
||||
"""Convert inline runs to ReportLab Paragraph XML."""
|
||||
parts = []
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier"
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = self._escapeReportlabXml(run.get("value", ""))
|
||||
if runType == "text":
|
||||
parts.append(value)
|
||||
elif runType == "bold":
|
||||
parts.append(f"<b>{value}</b>")
|
||||
elif runType == "italic":
|
||||
parts.append(f"<i>{value}</i>")
|
||||
elif runType == "code":
|
||||
parts.append(f'<font name="{monoFont}">{value}</font>')
|
||||
elif runType == "link":
|
||||
href = self._escapeReportlabXml(run.get("href", ""))
|
||||
parts.append(f'<a href="{href}">{value}</a>')
|
||||
elif runType == "image":
|
||||
parts.append(f"[Image: {value}]")
|
||||
else:
|
||||
parts.append(value)
|
||||
return "".join(parts)
|
||||
|
||||
def _applyInlineMarkdownToEscapedPlain(self, text: str) -> str:
|
||||
"""Escape XML then apply bold/italic to a segment with no `code` spans (code is handled separately)."""
|
||||
if not text:
|
||||
|
|
@ -744,10 +784,10 @@ class RendererPdf(BaseRenderer):
|
|||
return []
|
||||
headers = content.get("headers", [])
|
||||
rows = content.get("rows", [])
|
||||
|
||||
|
||||
if not headers or not rows:
|
||||
return []
|
||||
|
||||
|
||||
numCols = len(headers)
|
||||
colWidth = _PDF_CONTENT_WIDTH_PT / max(numCols, 1)
|
||||
colWidths = [colWidth] * numCols
|
||||
|
|
@ -755,8 +795,12 @@ class RendererPdf(BaseRenderer):
|
|||
hdrPs = self._createTableCellParagraphStyle(styles, header=True, tableStyleKey="table_header")
|
||||
cellPs = self._createTableCellParagraphStyle(styles, header=False, tableStyleKey="table_cell")
|
||||
|
||||
def _cellPara(val, ps):
|
||||
return self._paragraphFromInlineMarkdown(str(val) if val is not None else "", ps)
|
||||
def _cellPara(cell, ps):
|
||||
runs = self._inlineRunsForCell(cell)
|
||||
if isinstance(cell, list):
|
||||
xml = self._renderInlineRunsToPdfXml(runs)
|
||||
return Paragraph(_wrapEmojiSpansInXml(xml), ps)
|
||||
return self._paragraphFromInlineMarkdown(str(cell) if cell is not None else "", ps)
|
||||
|
||||
headerRow = [_cellPara(h, hdrPs) for h in headers]
|
||||
bodyRows = []
|
||||
|
|
@ -786,7 +830,7 @@ class RendererPdf(BaseRenderer):
|
|||
]
|
||||
table.setStyle(TableStyle(table_style))
|
||||
return [table, Spacer(1, 12)]
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return []
|
||||
|
|
@ -794,32 +838,29 @@ class RendererPdf(BaseRenderer):
|
|||
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = list_data.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return []
|
||||
items = content.get("items", [])
|
||||
bullet_style_def = styles.get("bullet_list", {})
|
||||
|
||||
bulletStyleDef = styles.get("bullet_list", {})
|
||||
normalStyle = self._createNormalStyle(styles)
|
||||
|
||||
elements = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
elements.append(
|
||||
Paragraph(f"• {self._markdownInlineToReportlabXml(item)}", self._createNormalStyle(styles))
|
||||
)
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
if isinstance(item, list):
|
||||
xml = self._renderInlineRunsToPdfXml(runs)
|
||||
elements.append(Paragraph(f"\u2022 {_wrapEmojiSpansInXml(xml)}", normalStyle))
|
||||
elif isinstance(item, str):
|
||||
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item)}", normalStyle))
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
elements.append(
|
||||
Paragraph(
|
||||
f"• {self._markdownInlineToReportlabXml(item['text'])}",
|
||||
self._createNormalStyle(styles),
|
||||
)
|
||||
)
|
||||
|
||||
elements.append(Paragraph(f"\u2022 {self._markdownInlineToReportlabXml(item['text'])}", normalStyle))
|
||||
|
||||
if elements:
|
||||
elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
|
||||
|
||||
elements.append(Spacer(1, bulletStyleDef.get("space_after", 3)))
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return []
|
||||
|
|
@ -848,20 +889,27 @@ class RendererPdf(BaseRenderer):
|
|||
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = paragraph_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = ""
|
||||
|
||||
if isinstance(content, str):
|
||||
content = {"text": content}
|
||||
if not isinstance(content, dict):
|
||||
return []
|
||||
|
||||
normalStyle = self._createNormalStyle(styles)
|
||||
|
||||
if "inlineRuns" in content:
|
||||
runs = self._inlineRunsFromContent(content)
|
||||
xml = self._renderInlineRunsToPdfXml(runs)
|
||||
if xml:
|
||||
return [Paragraph(_wrapEmojiSpansInXml(xml), normalStyle)]
|
||||
return []
|
||||
|
||||
text = content.get("text", "")
|
||||
if text:
|
||||
return [self._paragraphFromInlineMarkdown(text, self._createNormalStyle(styles))]
|
||||
|
||||
return [self._paragraphFromInlineMarkdown(text, normalStyle)]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ class RendererPptx(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render content as PowerPoint presentation from JSON data.
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ class RendererPptx(BaseRenderer):
|
|||
title: Title for the presentation
|
||||
userPrompt: User prompt for AI styling
|
||||
aiService: AI service for styling
|
||||
**kwargs: Additional rendering options
|
||||
style: Unified style dict from pipeline (preferred over AI-generated styles)
|
||||
|
||||
Returns:
|
||||
Base64-encoded PowerPoint presentation as string
|
||||
|
|
@ -81,8 +81,19 @@ class RendererPptx(BaseRenderer):
|
|||
from pptx.dml.color import RGBColor
|
||||
import re
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
|
||||
# Get style set: prefer unified style, then metadata, then AI-enhanced
|
||||
if style:
|
||||
internalStyle = self._convertUnifiedStyleToInternal(style)
|
||||
defaultPptx = self._getDefaultStyleSet()
|
||||
for key in ("slide_size", "content_per_slide", "design_theme", "color_scheme", "background_style", "accent_colors", "professional_grade", "executive_ready"):
|
||||
internalStyle[key] = defaultPptx.get(key)
|
||||
internalStyle["heading"] = internalStyle["heading1"]
|
||||
internalStyle["subheading"] = internalStyle["heading2"]
|
||||
styles = internalStyle
|
||||
self._unifiedStyle = style
|
||||
else:
|
||||
styles = await self._getStyleSet(extractedContent, userPrompt, aiService)
|
||||
self._unifiedStyle = None
|
||||
|
||||
# Create new presentation
|
||||
prs = Presentation()
|
||||
|
|
@ -910,15 +921,17 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
# Extract from nested content structure
|
||||
content = paragraph_data.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
if content.get("inlineRuns"):
|
||||
text = "".join(r.get("value", "") for r in content["inlineRuns"])
|
||||
else:
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = ""
|
||||
|
||||
if text:
|
||||
# Limit paragraph length based on content density
|
||||
max_length = 200 # Default limit
|
||||
max_length = 200
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
|
||||
|
|
@ -1303,6 +1316,32 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
r.text = text[pos:]
|
||||
_applyBase(r)
|
||||
|
||||
def _renderInlineRunsPptx(self, runs, paragraph, fontSize=None, fontColor=None):
|
||||
"""Process InlineRun dicts into pptx text runs."""
|
||||
from pptx.util import Pt
|
||||
paragraph.text = ""
|
||||
us = getattr(self, '_unifiedStyle', None)
|
||||
monoFont = us["fonts"]["monospace"] if us else "Courier New"
|
||||
for run in runs:
|
||||
runType = run.get("type", "text")
|
||||
value = run.get("value", "")
|
||||
r = paragraph.add_run()
|
||||
r.text = value
|
||||
if fontSize:
|
||||
r.font.size = fontSize
|
||||
if fontColor:
|
||||
r.font.color.rgb = fontColor
|
||||
if runType == "bold":
|
||||
r.font.bold = True
|
||||
elif runType == "italic":
|
||||
r.font.italic = True
|
||||
elif runType == "code":
|
||||
r.font.name = monoFont
|
||||
if fontSize and hasattr(fontSize, 'pt'):
|
||||
r.font.size = Pt(max(8, int(fontSize.pt * 0.85)))
|
||||
elif runType == "link":
|
||||
r.font.underline = True
|
||||
|
||||
def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float = None, max_width: float = None) -> None:
|
||||
"""Add a PowerPoint table to slide."""
|
||||
try:
|
||||
|
|
@ -1374,7 +1413,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
cell = table.cell(0, col_idx)
|
||||
# Clear existing text and set new text
|
||||
cell.text_frame.clear()
|
||||
header_text = str(header) if header else ""
|
||||
cellRuns = self._inlineRunsForCell(header)
|
||||
header_text = "".join(r.get("value", "") for r in cellRuns)
|
||||
cell.text = header_text
|
||||
|
||||
# Ensure paragraph exists
|
||||
|
|
@ -1420,7 +1460,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
cell = table.cell(row_idx, col_idx)
|
||||
# Clear existing text and set new text
|
||||
cell.text_frame.clear()
|
||||
cell_text = str(cell_data) if cell_data is not None else ""
|
||||
cellRuns = self._inlineRunsForCell(cell_data)
|
||||
cell_text = "".join(r.get("value", "") for r in cellRuns)
|
||||
cell.text = cell_text
|
||||
|
||||
# Ensure paragraph exists
|
||||
|
|
@ -1462,9 +1503,8 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
fontColor = RGBColor(*self._getSafeColor(listStyle.get("color", (47, 47, 47))))
|
||||
|
||||
for item in items:
|
||||
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
|
||||
if not itemText or not itemText.strip():
|
||||
continue
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
isNewFormat = isinstance(item, list)
|
||||
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
|
|
@ -1472,21 +1512,33 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
p.space_before = Pt(2)
|
||||
p.space_after = Pt(2)
|
||||
|
||||
# Consistent bullet prefix
|
||||
self._addMarkdownInlineRuns(p, f" • {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
if isNewFormat:
|
||||
bulletRuns = [{"type": "text", "value": " \u2022 "}] + runs
|
||||
self._renderInlineRunsPptx(bulletRuns, p, fontSize=fontSize, fontColor=fontColor)
|
||||
else:
|
||||
itemText = item.get("text", "") if isinstance(item, dict) else str(item)
|
||||
if not itemText or not itemText.strip():
|
||||
continue
|
||||
self._addMarkdownInlineRuns(p, f" \u2022 {itemText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
|
||||
# Subitems
|
||||
# Subitems (only for dict-style items)
|
||||
if isinstance(item, dict):
|
||||
for sub in item.get("subitems", []):
|
||||
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
|
||||
if not subText:
|
||||
continue
|
||||
subRuns = self._inlineRunsForListItem(sub)
|
||||
isSubNew = isinstance(sub, list)
|
||||
sp = text_frame.add_paragraph()
|
||||
sp.level = 0
|
||||
sp.alignment = PP_ALIGN.LEFT
|
||||
sp.space_before = Pt(1)
|
||||
sp.space_after = Pt(1)
|
||||
self._addMarkdownInlineRuns(sp, f" – {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
if isSubNew:
|
||||
subBulletRuns = [{"type": "text", "value": " \u2013 "}] + subRuns
|
||||
self._renderInlineRunsPptx(subBulletRuns, sp, fontSize=fontSize, fontColor=fontColor)
|
||||
else:
|
||||
subText = sub.get("text", "") if isinstance(sub, dict) else str(sub)
|
||||
if not subText:
|
||||
continue
|
||||
self._addMarkdownInlineRuns(sp, f" \u2013 {subText}", fontSize=fontSize, fontColor=fontColor, fontBold=False)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adding bullet list to slide: {str(e)}")
|
||||
|
|
@ -1540,42 +1592,53 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
inlineRuns = self._inlineRunsFromContent(content)
|
||||
hasInlineRuns = content.get("inlineRuns") is not None
|
||||
text = content.get("text", "")
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
inlineRuns = [{"type": "text", "value": text}] if text else []
|
||||
hasInlineRuns = False
|
||||
else:
|
||||
text = ""
|
||||
inlineRuns = []
|
||||
hasInlineRuns = False
|
||||
|
||||
if text:
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
|
||||
try:
|
||||
if hasattr(p, 'paragraph_format'):
|
||||
p.paragraph_format.bullet.type = None
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
base_font_size = paragraph_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier))
|
||||
fSize = Pt(calculated_size)
|
||||
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
fBold = paragraph_style.get("bold", False)
|
||||
if not inlineRuns and not text:
|
||||
return
|
||||
|
||||
p = text_frame.add_paragraph()
|
||||
p.level = 0
|
||||
|
||||
try:
|
||||
if hasattr(p, 'paragraph_format'):
|
||||
p.paragraph_format.bullet.type = None
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
base_font_size = paragraph_style.get("font_size", 14)
|
||||
calculated_size = max(10, int(base_font_size * font_size_multiplier))
|
||||
fSize = Pt(calculated_size)
|
||||
fColor = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47))))
|
||||
fBold = paragraph_style.get("bold", False)
|
||||
|
||||
if hasInlineRuns:
|
||||
self._renderInlineRunsPptx(inlineRuns, p, fontSize=fSize, fontColor=fColor)
|
||||
else:
|
||||
self._addMarkdownInlineRuns(p, text, fontSize=fSize, fontColor=fColor, fontBold=fBold)
|
||||
|
||||
# Add proper spacing
|
||||
p.space_before = Pt(6) # Space before paragraph
|
||||
p.space_after = Pt(6) # Space after paragraph
|
||||
p.line_spacing = 1.2 # Line spacing for readability
|
||||
|
||||
align = paragraph_style.get("align", "left")
|
||||
if align == "center":
|
||||
p.alignment = PP_ALIGN.CENTER
|
||||
elif align == "right":
|
||||
p.alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
|
||||
p.space_before = Pt(6)
|
||||
p.space_after = Pt(6)
|
||||
p.line_spacing = 1.2
|
||||
|
||||
align = paragraph_style.get("align", "left")
|
||||
if align == "center":
|
||||
p.alignment = PP_ALIGN.CENTER
|
||||
elif align == "right":
|
||||
p.alignment = PP_ALIGN.RIGHT
|
||||
else:
|
||||
p.alignment = PP_ALIGN.LEFT
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adding paragraph to slide: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -68,17 +68,17 @@ class RendererXlsx(BaseRenderer):
|
|||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
return list(supportedSectionTypes)
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||
try:
|
||||
if not OPENPYXL_AVAILABLE:
|
||||
# Fallback to CSV if openpyxl not available
|
||||
from .rendererCsv import RendererCsv
|
||||
csvRenderer = RendererCsv()
|
||||
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return await csvRenderer.render(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Generate Excel using AI-analyzed styling
|
||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService, style=style)
|
||||
|
||||
# Extract metadata for document type and other info
|
||||
metadata = extractedContent.get("metadata", {}) if extractedContent else {}
|
||||
|
|
@ -298,15 +298,22 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
||||
|
||||
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None, *, style: Dict[str, Any] = None) -> str:
|
||||
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Debug output
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
||||
|
||||
# Get style set: use styles from metadata if available, otherwise enhance with AI
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
# Store unified style for use by inline-run helpers
|
||||
self._unifiedStyle = style
|
||||
|
||||
# Get style set: prefer unified style, fall back to legacy approach
|
||||
if style:
|
||||
styles = self._convertUnifiedStyleToInternal(style)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
else:
|
||||
styles = await self._getStyleSet(jsonContent, userPrompt, aiService)
|
||||
|
||||
# Validate JSON structure (standardized schema: {metadata: {...}, documents: [{sections: [...]}]})
|
||||
if not self._validateJsonStructure(jsonContent):
|
||||
|
|
@ -511,6 +518,10 @@ class RendererXlsx(BaseRenderer):
|
|||
"code_block": {"font": "Courier New", "font_size": 10, "color": "FF2F2F2F", "background": "FFF5F5F5"}
|
||||
}
|
||||
|
||||
def _renderInlineRuns(self, runs: list) -> str:
|
||||
"""Flatten inline runs to plain text for Excel cells."""
|
||||
return "".join(r.get("value", "") for r in runs)
|
||||
|
||||
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get AI styles with proper Excel color conversion."""
|
||||
if not aiService:
|
||||
|
|
@ -1206,7 +1217,9 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
# Add headers with formatting - OPTIMIZED: use cached style objects
|
||||
for col, header in enumerate(headers, 1):
|
||||
sanitized_header = self._sanitizeCellValue(header)
|
||||
runs = self._inlineRunsForCell(header)
|
||||
headerText = self._renderInlineRuns(runs)
|
||||
sanitized_header = self._sanitizeCellValue(headerText)
|
||||
cell = sheet.cell(row=headerRow, column=col, value=sanitized_header)
|
||||
|
||||
# Apply styling with fallbacks - use pre-calculated objects
|
||||
|
|
@ -1272,7 +1285,9 @@ class RendererXlsx(BaseRenderer):
|
|||
cell_values = cell_values[:header_count]
|
||||
|
||||
for col, cell_value in enumerate(cell_values, 1):
|
||||
sanitized_value = self._sanitizeCellValue(cell_value)
|
||||
runs = self._inlineRunsForCell(cell_value)
|
||||
cellText = self._renderInlineRuns(runs)
|
||||
sanitized_value = self._sanitizeCellValue(cellText)
|
||||
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)
|
||||
|
||||
# Apply styling with fallbacks - use pre-calculated objects
|
||||
|
|
@ -1311,20 +1326,20 @@ class RendererXlsx(BaseRenderer):
|
|||
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a list element to Excel sheet. Expects nested content structure."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if not isinstance(content, dict):
|
||||
return startRow
|
||||
list_items = content.get("items") or []
|
||||
# Ensure list_items is a list
|
||||
if not isinstance(list_items, list):
|
||||
list_items = []
|
||||
listItems = content.get("items") or []
|
||||
if not isinstance(listItems, list):
|
||||
listItems = []
|
||||
|
||||
list_style = styles.get("bullet_list", {})
|
||||
for item in list_items:
|
||||
sheet.cell(row=startRow, column=1, value=f"• {item}")
|
||||
if list_style.get("color"):
|
||||
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
|
||||
listStyle = styles.get("bullet_list", {})
|
||||
for item in listItems:
|
||||
runs = self._inlineRunsForListItem(item)
|
||||
text = self._renderInlineRuns(runs)
|
||||
sheet.cell(row=startRow, column=1, value=f"\u2022 {text}")
|
||||
if listStyle.get("color"):
|
||||
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(listStyle["color"]))
|
||||
startRow += 1
|
||||
|
||||
return startRow
|
||||
|
|
@ -1336,10 +1351,10 @@ class RendererXlsx(BaseRenderer):
|
|||
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a paragraph element to Excel sheet. Expects nested content structure."""
|
||||
try:
|
||||
# Extract from nested content structure
|
||||
content = element.get("content", {})
|
||||
if isinstance(content, dict):
|
||||
text = content.get("text", "")
|
||||
runs = self._inlineRunsFromContent(content)
|
||||
text = self._renderInlineRuns(runs)
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
"""Default style definitions and style resolution for document rendering."""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
DEFAULT_STYLE: Dict[str, Any] = {
|
||||
"fonts": {
|
||||
"primary": "Calibri",
|
||||
"monospace": "Consolas",
|
||||
},
|
||||
"colors": {
|
||||
"primary": "#1F3864",
|
||||
"secondary": "#2C3E50",
|
||||
"accent": "#2980B9",
|
||||
"background": "#FFFFFF",
|
||||
},
|
||||
"headings": {
|
||||
"h1": {"sizePt": 24, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 12, "spaceAfterPt": 6},
|
||||
"h2": {"sizePt": 18, "weight": "bold", "color": "#1F3864", "spaceBeforePt": 10, "spaceAfterPt": 4},
|
||||
"h3": {"sizePt": 14, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 8, "spaceAfterPt": 3},
|
||||
"h4": {"sizePt": 12, "weight": "bold", "color": "#2C3E50", "spaceBeforePt": 6, "spaceAfterPt": 2},
|
||||
},
|
||||
"paragraph": {"sizePt": 11, "lineSpacing": 1.15, "color": "#333333"},
|
||||
"table": {
|
||||
"headerBg": "#1F3864",
|
||||
"headerFg": "#FFFFFF",
|
||||
"headerSizePt": 10,
|
||||
"bodySizePt": 10,
|
||||
"rowBandingEven": "#F2F6FC",
|
||||
"rowBandingOdd": "#FFFFFF",
|
||||
"borderColor": "#CBD5E1",
|
||||
"borderWidthPt": 0.5,
|
||||
},
|
||||
"list": {"bulletChar": "\u2022", "indentPt": 18, "sizePt": 11},
|
||||
"image": {"defaultWidthPt": 480, "maxWidthPt": 800, "alignment": "center"},
|
||||
"codeBlock": {"fontSizePt": 9, "background": "#F8F9FA", "borderColor": "#E2E8F0"},
|
||||
"page": {
|
||||
"format": "A4",
|
||||
"marginsPt": {"top": 60, "bottom": 60, "left": 60, "right": 60},
|
||||
"showPageNumbers": True,
|
||||
"headerHeight": 30,
|
||||
"footerHeight": 30,
|
||||
"headerLogo": None,
|
||||
"headerText": "",
|
||||
"footerText": "",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _deepMerge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Recursively merge override into base. Both dicts left unchanged; returns new dict."""
|
||||
result = {}
|
||||
for key in base:
|
||||
if key in override:
|
||||
baseVal = base[key]
|
||||
overVal = override[key]
|
||||
if isinstance(baseVal, dict) and isinstance(overVal, dict):
|
||||
result[key] = _deepMerge(baseVal, overVal)
|
||||
else:
|
||||
result[key] = overVal
|
||||
else:
|
||||
result[key] = base[key]
|
||||
for key in override:
|
||||
if key not in base:
|
||||
result[key] = override[key]
|
||||
return result
|
||||
|
||||
|
||||
def resolveStyle(agentStyle: dict | None) -> Dict[str, Any]:
|
||||
"""Deep-merge DEFAULT_STYLE <- agentStyle. Returns fully resolved style dict."""
|
||||
if not agentStyle:
|
||||
return dict(DEFAULT_STYLE)
|
||||
return _deepMerge(DEFAULT_STYLE, agentStyle)
|
||||
|
|
@ -9,11 +9,70 @@ from typing import Any, Dict
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _parseInlineRuns(text: str) -> list:
|
||||
"""
|
||||
Parse inline markdown formatting into a list of InlineRun dicts.
|
||||
Handles: images, links, bold, italic, inline code, plain text.
|
||||
Uses a regex-based tokenizer that processes tokens left-to-right.
|
||||
"""
|
||||
if not text:
|
||||
return [{"type": "text", "value": ""}]
|
||||
|
||||
# Pattern order matters: images before links, bold before italic
|
||||
_TOKEN_RE = re.compile(
|
||||
r'!\[(?P<imgAlt>[^\]]*)\]\((?P<imgSrc>[^)"]+)(?:\s+"(?P<imgWidth>\d+)pt")?\)' # image
|
||||
r'|\[(?P<linkText>[^\]]+)\]\((?P<linkHref>[^)]+)\)' # link
|
||||
r'|`(?P<code>[^`]+)`' # inline code
|
||||
r'|\*\*(?P<bold>.+?)\*\*' # bold
|
||||
r'|(?<!\w)\*(?P<italic1>.+?)\*(?!\w)' # italic *x*
|
||||
r'|(?<!\w)_(?P<italic2>.+?)_(?!\w)' # italic _x_
|
||||
)
|
||||
|
||||
runs = []
|
||||
lastEnd = 0
|
||||
|
||||
for m in _TOKEN_RE.finditer(text):
|
||||
# Plain text before this match
|
||||
if m.start() > lastEnd:
|
||||
runs.append({"type": "text", "value": text[lastEnd:m.start()]})
|
||||
|
||||
if m.group("imgAlt") is not None or m.group("imgSrc") is not None:
|
||||
alt = (m.group("imgAlt") or "").strip() or "Image"
|
||||
src = (m.group("imgSrc") or "").strip()
|
||||
widthStr = m.group("imgWidth")
|
||||
run = {"type": "image", "value": alt}
|
||||
if src.startswith("file:"):
|
||||
run["fileId"] = src[5:]
|
||||
else:
|
||||
run["href"] = src
|
||||
if widthStr:
|
||||
run["widthPt"] = int(widthStr)
|
||||
runs.append(run)
|
||||
elif m.group("linkText") is not None:
|
||||
runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")})
|
||||
elif m.group("code") is not None:
|
||||
runs.append({"type": "code", "value": m.group("code")})
|
||||
elif m.group("bold") is not None:
|
||||
runs.append({"type": "bold", "value": m.group("bold")})
|
||||
elif m.group("italic1") is not None:
|
||||
runs.append({"type": "italic", "value": m.group("italic1")})
|
||||
elif m.group("italic2") is not None:
|
||||
runs.append({"type": "italic", "value": m.group("italic2")})
|
||||
|
||||
lastEnd = m.end()
|
||||
|
||||
# Trailing plain text
|
||||
if lastEnd < len(text):
|
||||
runs.append({"type": "text", "value": text[lastEnd:]})
|
||||
|
||||
return runs if runs else [{"type": "text", "value": text}]
|
||||
|
||||
|
||||
def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> Dict[str, Any]:
|
||||
"""
|
||||
Convert markdown content to the standard document JSON format expected by renderReport.
|
||||
Supports headings, code blocks, tables, lists, images (file: refs), paragraphs.
|
||||
For plain text: wraps entire content in a single paragraph section.
|
||||
Convert markdown content to the standard document JSON format with Inline-Run model.
|
||||
Sections use inlineRuns (list of run dicts) instead of plain text strings.
|
||||
Supports headings, code blocks, tables, lists, images, paragraphs.
|
||||
"""
|
||||
if not isinstance(markdown, str):
|
||||
markdown = str(markdown) if markdown else ""
|
||||
|
|
@ -31,7 +90,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
while i < len(lines):
|
||||
line = lines[i]
|
||||
|
||||
# Headings
|
||||
# Headings (plain text, no inline formatting)
|
||||
headingMatch = re.match(r"^(#{1,6})\s+(.+)", line)
|
||||
if headingMatch:
|
||||
level = len(headingMatch.group(1))
|
||||
|
|
@ -43,7 +102,7 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
i += 1
|
||||
continue
|
||||
|
||||
# Fenced code blocks
|
||||
# Fenced code blocks (no inline formatting)
|
||||
codeMatch = re.match(r"^```(\w*)", line)
|
||||
if codeMatch:
|
||||
lang = codeMatch.group(1) or "text"
|
||||
|
|
@ -59,14 +118,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
})
|
||||
continue
|
||||
|
||||
# Tables
|
||||
# Tables - cells are List[InlineRun]
|
||||
tableMatch = re.match(r"^\|(.+)\|$", line)
|
||||
if tableMatch and (i + 1) < len(lines) and re.match(r"^\|[\s\-:|]+\|$", lines[i + 1]):
|
||||
headerCells = [c.strip() for c in tableMatch.group(1).split("|")]
|
||||
headerCells = [_parseInlineRuns(c.strip()) for c in tableMatch.group(1).split("|")]
|
||||
i += 2
|
||||
rows = []
|
||||
while i < len(lines) and re.match(r"^\|(.+)\|$", lines[i]):
|
||||
rowCells = [c.strip() for c in lines[i][1:-1].split("|")]
|
||||
rowCells = [_parseInlineRuns(c.strip()) for c in lines[i][1:-1].split("|")]
|
||||
rows.append(rowCells)
|
||||
i += 1
|
||||
sections.append({
|
||||
|
|
@ -75,14 +134,14 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
})
|
||||
continue
|
||||
|
||||
# Bullet / numbered lists
|
||||
# Bullet / numbered lists - items are List[List[InlineRun]]
|
||||
listMatch = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", line)
|
||||
if listMatch:
|
||||
isNumbered = bool(re.match(r"\d+[.)]", listMatch.group(2)))
|
||||
items = []
|
||||
while i < len(lines) and re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i]):
|
||||
m = re.match(r"^(\s*)([-*+]|\d+[.)]) (.+)", lines[i])
|
||||
items.append({"text": m.group(3).strip()})
|
||||
items.append(_parseInlineRuns(m.group(3).strip()))
|
||||
i += 1
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "bullet_list", "order": order,
|
||||
|
|
@ -95,46 +154,50 @@ def markdownToDocumentJson(markdown: str, title: str, language: str = "de") -> D
|
|||
i += 1
|
||||
continue
|
||||
|
||||
# Images (simplified: store as paragraph with ref for now - full resolution needs Knowledge Store)
|
||||
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)]+)\)", line)
|
||||
# Standalone image on its own line -> block-level image section
|
||||
imgMatch = re.match(r"^!\[([^\]]*)\]\(([^)\"]+)(?:\s+\"(\d+)pt\")?\)\s*$", line)
|
||||
if imgMatch:
|
||||
altText = imgMatch.group(1).strip() or "Image"
|
||||
src = imgMatch.group(2).strip()
|
||||
widthStr = imgMatch.group(3)
|
||||
fileId = src[5:] if src.startswith("file:") else ""
|
||||
content = {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
if widthStr:
|
||||
content["widthPt"] = int(widthStr)
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "image", "order": order,
|
||||
"elements": [{
|
||||
"content": {
|
||||
"altText": altText,
|
||||
"base64Data": "",
|
||||
"_fileRef": fileId,
|
||||
"_srcUrl": src if not fileId else "",
|
||||
}
|
||||
}],
|
||||
"elements": [{"content": content}],
|
||||
})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Paragraph
|
||||
# Paragraph - produces inlineRuns
|
||||
paraLines = []
|
||||
while i < len(lines) and lines[i].strip() and not re.match(
|
||||
r"^(#{1,6}\s|```|\|.+\||!\[|(\s*)([-*+]|\d+[.)]) )", lines[i]
|
||||
r"^(#{1,6}\s|```|\|.+\||!\[[^\]]*\]\([^)]+\)\s*$|(\s*)([-*+]|\d+[.)]) )", lines[i]
|
||||
):
|
||||
paraLines.append(lines[i])
|
||||
i += 1
|
||||
if paraLines:
|
||||
combinedText = " ".join(paraLines)
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": " ".join(paraLines)}}],
|
||||
"elements": [{"content": {"inlineRuns": _parseInlineRuns(combinedText)}}],
|
||||
})
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
if not sections:
|
||||
fallbackText = markdown.strip() or "(empty)"
|
||||
sections.append({
|
||||
"id": _nextId(), "content_type": "paragraph", "order": order,
|
||||
"elements": [{"content": {"text": markdown.strip() or "(empty)"}}],
|
||||
"elements": [{"content": {"inlineRuns": _parseInlineRuns(fallbackText)}}],
|
||||
})
|
||||
|
||||
return {
|
||||
|
|
|
|||
18
modules/workflows/methods/methodAi/_common.py
Normal file
18
modules/workflows/methods/methodAi/_common.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
|
||||
"""Shared helpers for AI workflow actions."""
|
||||
|
||||
|
||||
def applyCommonAiParams(parameters: dict, request) -> None:
|
||||
"""Apply common AI parameters (requireNeutralization, allowedModels) from node to request."""
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
request.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
if not request.options:
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
request.options = AiCallOptions()
|
||||
request.options.allowedModels = allowedModels
|
||||
|
|
@ -67,6 +67,8 @@ async def consolidate(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
prompt=prompt,
|
||||
options=AiCallOptions(operationType=OperationTypeEnum.DATA_ANALYSE),
|
||||
)
|
||||
from modules.workflows.methods.methodAi._common import applyCommonAiParams
|
||||
applyCommonAiParams(parameters, req)
|
||||
resp = await ai_service.callAi(req)
|
||||
except (SubscriptionInactiveException, BillingContextError):
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -36,6 +36,10 @@ async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
}
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
if parameters.get("allowedModels"):
|
||||
processParams["allowedModels"] = parameters["allowedModels"]
|
||||
if parameters.get("requireNeutralization") is not None:
|
||||
processParams["requireNeutralization"] = parameters["requireNeutralization"]
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,16 @@ async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
|
||||
# Apply node-level AI params
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
options.allowedModels = allowedModels
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
_ctx = getattr(self.services, '_context', None)
|
||||
if _ctx:
|
||||
_ctx.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
|
|
|
|||
|
|
@ -59,6 +59,16 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
compressContext=False
|
||||
)
|
||||
|
||||
# Apply node-level AI params
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
options.allowedModels = allowedModels
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
_ctx = getattr(self.services, '_context', None)
|
||||
if _ctx:
|
||||
_ctx.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||
prompt=prompt,
|
||||
|
|
|
|||
|
|
@ -212,6 +212,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
)
|
||||
)
|
||||
|
||||
from modules.workflows.methods.methodAi._common import applyCommonAiParams
|
||||
applyCommonAiParams(parameters, request)
|
||||
|
||||
aiResponse_obj = await self.services.ai.callAi(request)
|
||||
|
||||
# Convert AiCallResponse to AiResponse format
|
||||
|
|
@ -243,6 +246,16 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
|
||||
)
|
||||
|
||||
# Apply node-level AI params (allowedModels, requireNeutralization)
|
||||
allowedModels = parameters.get("allowedModels")
|
||||
if allowedModels and isinstance(allowedModels, list):
|
||||
options.allowedModels = allowedModels
|
||||
requireNeutralization = parameters.get("requireNeutralization")
|
||||
if requireNeutralization is not None:
|
||||
_ctx = getattr(self.services, '_context', None)
|
||||
if _ctx:
|
||||
_ctx.requireNeutralization = bool(requireNeutralization)
|
||||
|
||||
# Get generationIntent from parameters (required for DATA_GENERATE)
|
||||
# Default to "document" if not provided (most common use case)
|
||||
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code"
|
||||
|
|
|
|||
|
|
@ -39,6 +39,10 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
}
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
if parameters.get("allowedModels"):
|
||||
processParams["allowedModels"] = parameters["allowedModels"]
|
||||
if parameters.get("requireNeutralization") is not None:
|
||||
processParams["requireNeutralization"] = parameters["requireNeutralization"]
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,10 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
processParams["resultType"] = resultType
|
||||
if parentOperationId:
|
||||
processParams["parentOperationId"] = parentOperationId
|
||||
if parameters.get("allowedModels"):
|
||||
processParams["allowedModels"] = parameters["allowedModels"]
|
||||
if parameters.get("requireNeutralization") is not None:
|
||||
processParams["requireNeutralization"] = parameters["requireNeutralization"]
|
||||
|
||||
return await self.process(processParams)
|
||||
|
||||
|
|
|
|||
0
tests/serviceAi/__init__.py
Normal file
0
tests/serviceAi/__init__.py
Normal file
14
tests/serviceAi/test_allowed_models_whitelist.py
Normal file
14
tests/serviceAi/test_allowed_models_whitelist.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import pytest
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
|
||||
|
||||
def test_allowed_models_field_exists():
|
||||
opts = AiCallOptions(allowedModels=["gpt-5-mini", "claude-4-7-opus"])
|
||||
assert opts.allowedModels == ["gpt-5-mini", "claude-4-7-opus"]
|
||||
|
||||
|
||||
def test_allowed_models_default_none():
|
||||
opts = AiCallOptions()
|
||||
assert opts.allowedModels is None
|
||||
0
tests/serviceGeneration/__init__.py
Normal file
0
tests/serviceGeneration/__init__.py
Normal file
23
tests/serviceGeneration/test_inline_image_paragraph.py
Normal file
23
tests/serviceGeneration/test_inline_image_paragraph.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import pytest
|
||||
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
|
||||
|
||||
|
||||
def test_inline_image_in_paragraph():
|
||||
md = "Results show  clearly."
|
||||
result = markdownToDocumentJson(md, "Test")
|
||||
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
|
||||
types = [r["type"] for r in runs]
|
||||
assert "text" in types
|
||||
assert "image" in types
|
||||
imgRun = next(r for r in runs if r["type"] == "image")
|
||||
assert imgRun.get("fileId") == "abc"
|
||||
|
||||
|
||||
def test_multiple_inline_images():
|
||||
md = "A  B  C"
|
||||
result = markdownToDocumentJson(md, "Test")
|
||||
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
|
||||
images = [r for r in runs if r["type"] == "image"]
|
||||
assert len(images) == 2
|
||||
71
tests/serviceGeneration/test_md_to_json_consolidation.py
Normal file
71
tests/serviceGeneration/test_md_to_json_consolidation.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import pytest
|
||||
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
|
||||
|
||||
|
||||
def test_basic_paragraph():
|
||||
result = markdownToDocumentJson("Hello world", "Test")
|
||||
doc = result["documents"][0]
|
||||
section = doc["sections"][0]
|
||||
assert section["content_type"] == "paragraph"
|
||||
assert section["elements"][0]["content"]["inlineRuns"][0] == {"type": "text", "value": "Hello world"}
|
||||
|
||||
|
||||
def test_inline_bold():
|
||||
result = markdownToDocumentJson("This is **bold** text", "Test")
|
||||
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
|
||||
assert any(r["type"] == "bold" and r["value"] == "bold" for r in runs)
|
||||
|
||||
|
||||
def test_inline_image():
|
||||
result = markdownToDocumentJson("Text  more", "Test")
|
||||
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
|
||||
assert any(r["type"] == "image" and r.get("fileId") == "abc123" for r in runs)
|
||||
|
||||
|
||||
def test_inline_link():
|
||||
result = markdownToDocumentJson("Click [here](https://example.com)", "Test")
|
||||
runs = result["documents"][0]["sections"][0]["elements"][0]["content"]["inlineRuns"]
|
||||
assert any(r["type"] == "link" and r.get("href") == "https://example.com" for r in runs)
|
||||
|
||||
|
||||
def test_table_cells_are_inline_runs():
|
||||
md = "| A | B |\n| --- | --- |\n| **x** | y |"
|
||||
result = markdownToDocumentJson(md, "Test")
|
||||
section = result["documents"][0]["sections"][0]
|
||||
assert section["content_type"] == "table"
|
||||
rows = section["elements"][0]["content"]["rows"]
|
||||
assert isinstance(rows[0][0], list)
|
||||
|
||||
|
||||
def test_bullet_list_inline_runs():
|
||||
md = "- Item **one**\n- Item two"
|
||||
result = markdownToDocumentJson(md, "Test")
|
||||
section = result["documents"][0]["sections"][0]
|
||||
assert section["content_type"] == "bullet_list"
|
||||
items = section["elements"][0]["content"]["items"]
|
||||
assert isinstance(items[0], list)
|
||||
|
||||
|
||||
def test_standalone_image_block():
|
||||
md = ""
|
||||
result = markdownToDocumentJson(md, "Test")
|
||||
section = result["documents"][0]["sections"][0]
|
||||
assert section["content_type"] == "image"
|
||||
|
||||
|
||||
def test_heading_unchanged():
|
||||
result = markdownToDocumentJson("# Title", "Test")
|
||||
section = result["documents"][0]["sections"][0]
|
||||
assert section["content_type"] == "heading"
|
||||
assert section["elements"][0]["content"]["text"] == "Title"
|
||||
assert section["elements"][0]["content"]["level"] == 1
|
||||
|
||||
|
||||
def test_code_block_unchanged():
|
||||
md = "```python\nprint('hi')\n```"
|
||||
result = markdownToDocumentJson(md, "Test")
|
||||
section = result["documents"][0]["sections"][0]
|
||||
assert section["content_type"] == "code_block"
|
||||
assert section["elements"][0]["content"]["code"] == "print('hi')"
|
||||
39
tests/serviceGeneration/test_style_resolver.py
Normal file
39
tests/serviceGeneration/test_style_resolver.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright (c) 2025 Patrick Motsch
|
||||
# All rights reserved.
|
||||
import pytest
|
||||
from modules.serviceCenter.services.serviceGeneration.styleDefaults import resolveStyle, DEFAULT_STYLE
|
||||
|
||||
|
||||
def test_resolve_none_returns_defaults():
|
||||
result = resolveStyle(None)
|
||||
assert result == DEFAULT_STYLE
|
||||
|
||||
|
||||
def test_resolve_empty_returns_defaults():
|
||||
result = resolveStyle({})
|
||||
assert result == DEFAULT_STYLE
|
||||
|
||||
|
||||
def test_override_single_color():
|
||||
result = resolveStyle({"colors": {"primary": "#FF0000"}})
|
||||
assert result["colors"]["primary"] == "#FF0000"
|
||||
assert result["colors"]["secondary"] == DEFAULT_STYLE["colors"]["secondary"]
|
||||
|
||||
|
||||
def test_override_nested_heading():
|
||||
result = resolveStyle({"headings": {"h1": {"sizePt": 30}}})
|
||||
assert result["headings"]["h1"]["sizePt"] == 30
|
||||
assert result["headings"]["h1"]["weight"] == "bold"
|
||||
|
||||
|
||||
def test_override_font():
|
||||
result = resolveStyle({"fonts": {"primary": "Arial"}})
|
||||
assert result["fonts"]["primary"] == "Arial"
|
||||
assert result["fonts"]["monospace"] == "Consolas"
|
||||
|
||||
|
||||
def test_full_style_passthrough():
|
||||
custom = {"fonts": {"primary": "Helvetica", "monospace": "Monaco"}}
|
||||
result = resolveStyle(custom)
|
||||
assert result["fonts"]["primary"] == "Helvetica"
|
||||
assert result["fonts"]["monospace"] == "Monaco"
|
||||
Loading…
Reference in a new issue