# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Document utility functions (Layer L0 - shared). Pure text-processing helpers with zero internal dependencies. """ import re def parseInlineRuns(text: str) -> list: """ Parse inline markdown formatting into a list of InlineRun dicts. Handles: images, links, bold, italic, inline code, plain text. Uses a regex-based tokenizer that processes tokens left-to-right. """ if not text: return [{"type": "text", "value": ""}] _TOKEN_RE = re.compile( r'!\[(?P[^\]]*)\]\((?P[^)"]+)(?:\s+"(?P\d+)pt")?\)' r'|\[(?P[^\]]+)\]\((?P[^)]+)\)' r'|`(?P[^`]+)`' r'|\*\*(?P.+?)\*\*' r'|(?.+?)\*(?!\w)' r'|(?.+?)_(?!\w)' ) runs = [] lastEnd = 0 for m in _TOKEN_RE.finditer(text): if m.start() > lastEnd: runs.append({"type": "text", "value": text[lastEnd:m.start()]}) if m.group("imgAlt") is not None or m.group("imgSrc") is not None: alt = (m.group("imgAlt") or "").strip() or "Image" src = (m.group("imgSrc") or "").strip() widthStr = m.group("imgWidth") run = {"type": "image", "value": alt} if src.startswith("file:"): run["fileId"] = src[5:] else: run["href"] = src if widthStr: run["widthPt"] = int(widthStr) runs.append(run) elif m.group("linkText") is not None: runs.append({"type": "link", "value": m.group("linkText"), "href": m.group("linkHref")}) elif m.group("code") is not None: runs.append({"type": "code", "value": m.group("code")}) elif m.group("bold") is not None: runs.append({"type": "bold", "value": m.group("bold")}) elif m.group("italic1") is not None: runs.append({"type": "italic", "value": m.group("italic1")}) elif m.group("italic2") is not None: runs.append({"type": "italic", "value": m.group("italic2")}) lastEnd = m.end() if lastEnd < len(text): runs.append({"type": "text", "value": text[lastEnd:]}) return runs if runs else [{"type": "text", "value": text}]