This commit is contained in:
patrick-motsch 2026-02-16 10:03:43 +01:00
commit 1b70c07026
3 changed files with 162 additions and 54 deletions

View file

@ -7,8 +7,10 @@ import asyncio
import hashlib
import json
import logging
import re
import ssl
from typing import Any, Dict, List, Optional, Set
from urllib.parse import urljoin, urlparse
import aiohttp
@ -28,12 +30,26 @@ KANTON_NAMES = {
"VD": "Waadt", "VS": "Wallis", "ZG": "Zug", "ZH": "Zürich",
}
# Quartier/place names -> politische Gemeinde (Swiss Topo geocoding returns quarter names)
# Prevents wrong matches like "Enge" -> Martherenges instead of Zürich
QUARTIER_TO_GEMEINDE: Dict[str, str] = {
"enge": "Zürich", # Kreis 2 Enge (Zürich)
"aussersihl": "Zürich",
"wiedikon": "Zürich",
}
# Known direct BZO PDF URLs for municipalities (by normalized name, lowercase)
# These are tried first to avoid SSL/HTML issues with Tavily search results
KNOWN_BZO_PDF_URLS: Dict[str, str] = {
"schlieren": "https://www.schlieren.ch/_docn/6239470/SKR_10.10_Bauordnung.pdf",
"zürich": "https://www.stadt-zuerich.ch/content/dam/stzh/portal/Deutsch/AmtlicheSammlung/Erlasse/700/100/700.100%20Bau-%20und%20Zonenordnung%20V2.pdf",
"zurich": "https://www.stadt-zuerich.ch/content/dam/stzh/portal/Deutsch/AmtlicheSammlung/Erlasse/700/100/700.100%20Bau-%20und%20Zonenordnung%20V2.pdf",
# Used when Tavily returns no matching PDFs; avoids SSL/HTML issues with Tavily results
# Uster: _docn shows HTML "Erlass ausser Kraft" page; _rtr/dokument_xxx serves the actual PDF
KNOWN_BZO_PDF_URLS: Dict[str, List[str]] = {
"schlieren": ["https://www.schlieren.ch/_docn/6239470/SKR_10.10_Bauordnung.pdf"],
"uster": [
"https://www.uster.ch/_rtr/dokument_3619802", # Direct document (PDF)
"https://www.uster.ch/_docn/3619802/Bau-und-Zonenordnung-teilrevidiert-2021.pdf", # May return HTML first
],
"zürich": ["https://www.stadt-zuerich.ch/content/dam/stzh/portal/Deutsch/AmtlicheSammlung/Erlasse/700/100/700.100%20Bau-%20und%20Zonenordnung%20V2.pdf"],
"zurich": ["https://www.stadt-zuerich.ch/content/dam/stzh/portal/Deutsch/AmtlicheSammlung/Erlasse/700/100/700.100%20Bau-%20und%20Zonenordnung%20V2.pdf"],
"zuerich": ["https://www.stadt-zuerich.ch/content/dam/stzh/portal/Deutsch/AmtlicheSammlung/Erlasse/700/100/700.100%20Bau-%20und%20Zonenordnung%20V2.pdf"],
}
@ -119,9 +135,15 @@ async def ensure_single_gemeinde(
"""
if not gemeinde_name or not gemeinde_name.strip():
return None
# Resolve Quartier/place names to politische Gemeinde (e.g. Enge -> Zürich)
lookup_name = gemeinde_name.strip()
quartier_key = _normalize_gemeinde_for_match(lookup_name)
if quartier_key and quartier_key in QUARTIER_TO_GEMEINDE:
lookup_name = QUARTIER_TO_GEMEINDE[quartier_key]
logger.debug(f"Mapped Quartier '{gemeinde_name}' -> Gemeinde '{lookup_name}'")
try:
connector = SwissTopoMapServerConnector()
gd = await connector.get_gemeinde_by_name(gemeinde_name)
gd = await connector.get_gemeinde_by_name(lookup_name)
except Exception as e:
logger.error(f"Error fetching Gemeinde '{gemeinde_name}' from Swiss Topo: {e}", exc_info=True)
return None
@ -207,9 +229,11 @@ async def fetch_bzo_for_gemeinde(
Deduplication: re-fetches Gemeinde, skips if BZO exists, skips URLs we already have,
creates at most 1 new document per call to avoid duplicates from multiple Tavily URLs.
"""
logger.info(f"fetch_bzo_for_gemeinde: starting for {gemeinde.label} (id={gemeinde.id})")
# Re-fetch Gemeinde to get latest dokumente (avoid race with concurrent requests)
fresh = interface.getGemeinde(gemeinde.id)
if not fresh:
logger.warning(f"fetch_bzo_for_gemeinde: Gemeinde {gemeinde.id} not found after refresh")
return False
gemeinde = fresh
@ -223,12 +247,19 @@ async def fetch_bzo_for_gemeinde(
if q:
existing_quellen.add(q)
if typ in [DokumentTyp.GEMEINDE_BZO_AKTUELL, DokumentTyp.GEMEINDE_BZO_REVISION]:
doc_id = doc.id if hasattr(doc, "id") else doc.get("id")
full = interface.getDokument(doc_id) if doc_id else None
if full and full.dokumentReferenz:
existing_bzo = True
break
if label and any(x in (label or "").upper() for x in ("BZO", "BAU UND ZONENORDNUNG", "PLAN D'AMÉNAGEMENT", "RÈGLEMENT DE CONSTRUCTION", "PIANO DI", "REGOLAMENTO EDILIZIO")):
doc_id = doc.id if hasattr(doc, "id") else doc.get("id")
full = interface.getDokument(doc_id) if doc_id else None
if full and full.dokumentReferenz:
existing_bzo = True
break
if existing_bzo:
logger.info(f"fetch_bzo_for_gemeinde: {gemeinde.label} already has BZO document(s), skipping")
return True
kanton_abk = None
@ -275,6 +306,15 @@ async def fetch_bzo_for_gemeinde(
if (r.url.lower().endswith(".pdf") or "/pdf" in r.url.lower())
and _is_valid_bzo_result(r.url, r.title or "")
]
# If Tavily returned nothing useful, try known direct PDF URLs (Uster, Schlieren, etc.)
gemeinde_key = _normalize_gemeinde_for_match(gemeinde.label or "")
gemeinde_key_alt = gemeinde.label.strip().lower() if gemeinde.label else ""
if not pdf_urls and (gemeinde_key in KNOWN_BZO_PDF_URLS or gemeinde_key_alt in KNOWN_BZO_PDF_URLS):
key = gemeinde_key if gemeinde_key in KNOWN_BZO_PDF_URLS else gemeinde_key_alt
pdf_urls = list(KNOWN_BZO_PDF_URLS[key])
logger.info(f"Using known BZO PDF URL for {gemeinde.label} (no Tavily matches)")
if not pdf_urls:
logger.warning(
f"No PDF URLs with matching Gemeinde name for {gemeinde.label} "
@ -282,12 +322,11 @@ async def fetch_bzo_for_gemeinde(
)
return False
# Prepend known direct PDF URLs for this Gemeinde (avoids SSL/HTML issues with Tavily results)
gemeinde_key = gemeinde.label.strip().lower() if gemeinde.label else ""
# Prepend known direct PDF URL when available (avoids SSL/HTML issues with Tavily results)
if gemeinde_key and gemeinde_key in KNOWN_BZO_PDF_URLS:
known_url = KNOWN_BZO_PDF_URLS[gemeinde_key]
pdf_urls = [known_url] + [u for u in pdf_urls if u != known_url]
logger.info(f"Using known BZO PDF URL for {gemeinde.label}")
known_urls = KNOWN_BZO_PDF_URLS[gemeinde_key]
pdf_urls = list(known_urls) + [u for u in pdf_urls if u not in known_urls]
logger.info(f"Preferring known BZO PDF URL for {gemeinde.label}")
# Use ssl.CERT_NONE to avoid CERTIFICATE_VERIFY_FAILED on Windows/corporate environments
# (same approach as routeRealEstate for external HTTP requests)
@ -295,10 +334,38 @@ async def fetch_bzo_for_gemeinde(
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
connector = aiohttp.TCPConnector(ssl=ssl_context)
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "application/pdf,*/*"}
# Use Accept: application/pdf first to encourage direct PDF delivery (e.g. uster.ch)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "application/pdf,application/octet-stream,*/*",
}
timeout = aiohttp.ClientTimeout(total=30)
async def download_pdf(session: aiohttp.ClientSession, url: str) -> Optional[bytes]:
def _extract_document_url_from_html(html_bytes: bytes, base_url: str) -> Optional[str]:
"""Extract document/PDF URL from HTML (e.g. uster.ch 'Weiter' page)."""
try:
text = html_bytes.decode("utf-8", errors="ignore")
# Swiss municipal doc systems: _rtr/dokument_xxx, .pdf links, or _docn redirect targets
for pat in (
r'href=["\']([^"\']*(?:/_rtr/dokument[_\w]*|dokument_\d+)[^"\']*)["\']',
r'href=["\']([^"\']+\.pdf(?:\?[^"\']*)?)["\']',
r'action=["\']([^"\']+\.pdf[^"\']*)["\']',
):
m = re.search(pat, text, re.I)
if m:
raw = m.group(1).strip()
if raw and not raw.startswith("#") and not raw.lower().startswith("javascript:"):
next_url = urljoin(base_url, raw)
parsed = urlparse(next_url)
if parsed.netloc and parsed.scheme:
return next_url
except Exception:
pass
return None
async def download_pdf(
session: aiohttp.ClientSession, url: str, _followed_from_html: bool = False
) -> Optional[bytes]:
for attempt in range(3):
try:
async with session.get(url, allow_redirects=True) as resp:
@ -306,7 +373,11 @@ async def fetch_bzo_for_gemeinde(
data = await resp.read()
if data and len(data) >= 100 and data.startswith(b"%PDF"):
return data
if data.startswith(b"<") or data.startswith(b"<!DOCTYPE"):
if (data.startswith(b"<") or data.startswith(b"<!DOCTYPE")) and not _followed_from_html:
fallback = _extract_document_url_from_html(data, url)
if fallback and fallback != url:
logger.debug(f"HTML from {url[:60]}..., following link to document")
return await download_pdf(session, fallback, _followed_from_html=True)
raise Exception("Server returned HTML instead of PDF")
elif resp.status == 406 and attempt < 2:
await asyncio.sleep(2)

View file

@ -703,8 +703,22 @@ async def get_parcel_documents(
by_label = interface.getGemeinden(recordFilter={"label": gemeinde, "mandateId": mandateId})
gemeinde_obj = by_label[0] if by_label else None
if not gemeinde_obj:
# Fallback: match by normalized label (e.g. DB has "Stadt Uster", request has "Uster")
all_g = interface.getGemeinden(recordFilter={"mandateId": mandateId})
g_norm = gemeinde.strip().lower()
for g in all_g:
gl = (g.label or "").strip().lower()
if gl == g_norm or g_norm in gl or gl in g_norm:
gemeinde_obj = g
logger.debug(f"parcel-documents: Found Gemeinde by label match '{gemeinde}' -> '{g.label}'")
break
if gemeinde_obj:
logger.debug(f"parcel-documents: Gemeinde '{gemeinde}' resolved: {gemeinde_obj.id}")
if not gemeinde_obj:
logger.info(f"parcel-documents: No Gemeinde for label '{gemeinde}', ensuring via Swiss Topo...")
gemeinde_obj = await ensure_single_gemeinde(interface, mandateId, instanceId, gemeinde_name=gemeinde)
if not gemeinde_obj:
logger.warning(f"parcel-documents: Gemeinde '{gemeinde}' nicht gefunden (mandateId={mandateId[:8]}...)")
return {"documents": [], "error": f"Gemeinde '{gemeinde}' nicht gefunden"}
bzo_docs = []
if gemeinde_obj.dokumente:
@ -717,6 +731,7 @@ async def get_parcel_documents(
if full and full.dokumentReferenz:
bzo_docs.append(full)
if not bzo_docs:
logger.info(f"parcel-documents: No BZO for {gemeinde}, fetching...")
fetched = await fetch_bzo_for_gemeinde(interface, componentInterface, gemeinde_obj, mandateId, instanceId)
if fetched:
gemeinde_obj = interface.getGemeinde(gemeinde_obj.id)

View file

@ -37,13 +37,14 @@ router = APIRouter(
class InvitationCreate(BaseModel):
"""Request model for creating an invitation.
Invitations are feature-instance-level: the user selects a feature instance and
instance-level roles. The mandateId is derived from the feature instance automatically.
Supports two modes:
- Mandate-level: featureInstanceId omitted, roleIds are mandate-level roles (user, viewer, admin)
- Feature-instance-level: featureInstanceId required, roleIds are instance-level roles
"""
targetUsername: str = Field(..., description="Username of the user to invite (must match on acceptance)")
email: Optional[str] = Field(None, description="Email address to send invitation link (optional)")
featureInstanceId: str = Field(..., description="Feature instance to grant access to")
roleIds: List[str] = Field(..., description="Instance-level role IDs to assign to the invited user")
featureInstanceId: Optional[str] = Field(None, description="Feature instance to grant access to (optional for mandate-level invitations)")
roleIds: List[str] = Field(..., description="Role IDs: mandate-level (user, viewer, admin) or instance-level")
frontendUrl: str = Field(..., description="Frontend URL for building the invite link (provided by frontend)")
expiresInHours: int = Field(
72,
@ -117,34 +118,17 @@ def create_invitation(
try:
rootInterface = getRootInterface()
# Validate feature instance exists and get mandateId from it
# Determine mandateId and validate
if data.featureInstanceId:
# Feature-instance-level invitation
instance = rootInterface.getFeatureInstance(data.featureInstanceId)
if not instance:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Feature instance '{data.featureInstanceId}' not found"
)
mandateId = str(instance.mandateId)
# Check admin permission: SysAdmin can invite for any mandate,
# MandateAdmin can invite for their own mandate
if not context.hasSysAdminRole:
if str(context.mandateId) != mandateId:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Feature instance belongs to a different mandate"
)
if not _hasMandateAdminRole(context):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Mandate-Admin role required to create invitations"
)
# Note: targetUsername does NOT need to exist yet!
# The invitation can be for a user who will register later.
# Validate role IDs exist and belong to this feature instance
# Validate roles belong to this feature instance
for roleId in data.roleIds:
role = rootInterface.getRole(roleId)
if not role:
@ -152,21 +136,59 @@ def create_invitation(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Role '{roleId}' not found"
)
# Role must belong to this feature instance
if str(role.featureInstanceId or "") != data.featureInstanceId:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Role '{roleId}' does not belong to feature instance '{data.featureInstanceId}'"
)
else:
# Mandate-level invitation (user, viewer, admin roles)
if not context.mandateId:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="X-Mandate-Id header is required for mandate-level invitations"
)
mandateId = str(context.mandateId)
# Validate roles are mandate-level (no featureInstanceId)
for roleId in data.roleIds:
role = rootInterface.getRole(roleId)
if not role:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Role '{roleId}' not found"
)
if role.featureInstanceId is not None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Role '{roleId}' is an instance-level role; use mandate-level roles (user, viewer, admin) for mandate invitations"
)
if str(role.mandateId or "") != mandateId:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Role '{roleId}' does not belong to mandate"
)
# Check admin permission
if not context.hasSysAdminRole:
if str(context.mandateId) != mandateId:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied to this mandate"
)
if not _hasMandateAdminRole(context):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Mandate-Admin role required to create invitations"
)
# Calculate expiration time
currentTime = getUtcTimestamp()
expiresAt = currentTime + (data.expiresInHours * 3600)
# Create invitation (mandateId derived from feature instance)
# Create invitation
invitation = Invitation(
mandateId=mandateId,
featureInstanceId=data.featureInstanceId,
featureInstanceId=data.featureInstanceId or None,
roleIds=data.roleIds,
targetUsername=data.targetUsername,
email=data.email,