gateway/modules/shared/mandateNameUtils.py

121 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Slug and validation helpers for Mandate.name (Kurzzeichen).
Format: lowercase [a-z0-9], segments separated by a single hyphen, length 232.
German umlauts are transliterated (ä→ae, ö→oe, ü→ue, ß→ss) before slugging.
"""
from __future__ import annotations
import re
from typing import Iterable, Set
MANDATE_NAME_MIN_LEN = 2
MANDATE_NAME_MAX_LEN = 32
_MANDATE_NAME_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
def _transliterateGerman(text: str) -> str:
"""Map common German characters to ASCII before slugging."""
if not text:
return ""
result: list[str] = []
for ch in text:
lower = ch.lower()
if lower == "ä":
result.append("ae")
elif lower == "ö":
result.append("oe")
elif lower == "ü":
result.append("ue")
elif lower == "ß":
result.append("ss")
else:
result.append(ch)
return "".join(result)
def _collapseHyphensAndTrim(raw: str) -> str:
s = re.sub(r"[^a-z0-9]+", "-", raw.lower())
s = re.sub(r"-+", "-", s).strip("-")
return s
def _ensureMinSlugLength(slug: str) -> str:
if len(slug) >= MANDATE_NAME_MIN_LEN:
return slug
if len(slug) == 1:
return slug + slug
return slug + ("x" * (MANDATE_NAME_MIN_LEN - len(slug)))
def _truncateSlugToMaxLen(slug: str) -> str:
if len(slug) <= MANDATE_NAME_MAX_LEN:
return slug
cut = slug[: MANDATE_NAME_MAX_LEN].rstrip("-")
if "-" in cut:
cut = cut[: cut.rfind("-")]
cut = cut.strip("-")
if len(cut) < MANDATE_NAME_MIN_LEN:
return cut + ("x" * (MANDATE_NAME_MIN_LEN - len(cut)))
return cut
def transliterateGerman(text: str) -> str:
"""Transliterate German umlauts in *text* for further processing."""
return _transliterateGerman(text)
def slugifyMandateName(label: str) -> str:
"""
Build a mandate slug base from a human-readable label.
Result satisfies isValidMandateName except pathological cases (falls back to 'mn').
"""
if not label or not str(label).strip():
t = "mn"
else:
step1 = _transliterateGerman(label.strip())
step2 = _collapseHyphensAndTrim(step1)
if not step2:
t = "mn"
else:
t = _ensureMinSlugLength(step2)
t = _truncateSlugToMaxLen(t)
if not isValidMandateName(t):
return "mn"
return t
def isValidMandateName(name: str) -> bool:
"""True if *name* matches slug rules (length 232, [a-z0-9] and single-hyphen segments)."""
if not isinstance(name, str) or len(name) < MANDATE_NAME_MIN_LEN or len(name) > MANDATE_NAME_MAX_LEN:
return False
return _MANDATE_NAME_RE.match(name) is not None
def allocateUniqueMandateSlug(base: str, taken: Iterable[str]) -> str:
"""
Return a slug not present in *taken*, starting with *base*, then base-2, base-3, ...
*base* must satisfy isValidMandateName (typically from slugifyMandateName).
"""
used: Set[str] = {x for x in taken if x}
if base not in used:
return base
n = 2
while True:
suffix = f"-{n}"
room = MANDATE_NAME_MAX_LEN - len(suffix)
if room < MANDATE_NAME_MIN_LEN:
room = MANDATE_NAME_MIN_LEN
root = base[:room].rstrip("-")
if len(root) < MANDATE_NAME_MIN_LEN:
root = "mn"
cand = (root + suffix)[:MANDATE_NAME_MAX_LEN]
cand = cand.rstrip("-")
if isValidMandateName(cand) and cand not in used:
return cand
n += 1
if n > 100000:
raise ValueError("allocateUniqueMandateSlug: could not allocate a unique slug")