platform-core/modules/shared/documentUtils.py
ValueOn AG cf0233f193
Some checks failed
Deploy Plattform-Core (Int) / test (push) Failing after 13s
Deploy Plattform-Core (Int) / deploy (push) Has been skipped
refactor: architecture cleanup + fix scheduler Automation2Workflow error
Fix: add missing Automation2Workflow/Automation2WorkflowRun imports to interfaceFeatureGraphicalEditor.py (caused scheduler crash on boot)
Refactor: gdprDeletion via onUserDelete lifecycle hooks
Refactor: i18nBootSync accounting labels via app.py parameter injection
Refactor: serviceHub moved to serviceCenter/serviceHub.py
Split: teamsbot/service.py, realEstate/main, routeTrustee, routeBilling
Cleanup: remove obsolete methodTrustee, serviceExceptions shim
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-07 07:59:31 +02:00

64 lines
2.3 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document utility functions (Layer L0 - shared).
Pure text-processing helpers with zero internal dependencies.
"""
import re
def parseInlineRuns(text: str) -> list:
"""
Parse inline markdown formatting into a list of InlineRun dicts.
Handles: images, links, bold, italic, inline code, plain text.
Uses a regex-based tokenizer that processes tokens left-to-right.
"""
if not text:
return [{"type": "text", "value": ""}]
_TOKEN_RE = re.compile(
r'!\[(?P<imgAlt>[^\]]*)\]\((?P<imgSrc>[^)"]+)(?:\s+"(?P<imgWidth>\d+)pt")?\)'
r'|\[(?P<linkText>[^\]]+)\]\((?P<linkHref>[^)]+)\)'
r'|`(?P<code>[^`]+)`'
r'|\*\*(?P<bold>.+?)\*\*'
r'|(?<!\w)\*(?P<italic1>.+?)\*(?!\w)'
r'|(?<!\w)_(?P<italic2>.+?)_(?!\w)'
)
runs = []
lastEnd = 0
for m in _TOKEN_RE.finditer(text):
if m.start() > lastEnd:
runs.append({"type": "text", "value": text[lastEnd:m.start()]})
if m.group("imgAlt") is not None or m.group("imgSrc") is not None:
alt = (m.group("imgAlt") or "").strip() or "Image"
src = (m.group("imgSrc") or "").strip()
widthStr = m.group("imgWidth")
run = {"type": "image", "value": alt}
if src.startswith("file:"):
run["fileId"] = src[5:]
else:
run["href"] = src
if widthStr:
run["widthPt"] = int(widthStr)
runs.append(run)
elif m.group("linkText") is not None:
runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")})
elif m.group("code") is not None:
runs.append({"type": "code", "value": m.group("code")})
elif m.group("bold") is not None:
runs.append({"type": "bold", "value": m.group("bold")})
elif m.group("italic1") is not None:
runs.append({"type": "italic", "value": m.group("italic1")})
elif m.group("italic2") is not None:
runs.append({"type": "italic", "value": m.group("italic2")})
lastEnd = m.end()
if lastEnd < len(text):
runs.append({"type": "text", "value": text[lastEnd:]})
return runs if runs else [{"type": "text", "value": text}]