From 1e2a6bea70f552c7869ab0b9abf170a0ccafa266 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@poweron.swiss>
Date: Tue, 2 Jun 2026 09:42:04 +0200
Subject: [PATCH] logging

---
 README.md           | 21 ++++++++++--------
 config.py           | 14 +++++++++++-
 docu/setupserver.md | 53 +++++++++++++++++++--------------------------
 3 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/README.md b/README.md
index 803876f..5a34d77 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,24 @@
 # Private LLM - für Neutralisierung
 
-KI-Dokumentenanalyse mit lokalen Ollama Vision-Modellen.
+KI-Dokumentenanalyse und Text-Neutralisierung mit lokalen Ollama-Modellen.
 
-## Integrierte MVP Features
+## Features
 
-- Rechnungen, Belege, Bankauszüge analysieren
+- Text-Neutralisierung für den Gateway (kein Datenabfluss an externe APIs)
+- Rechnungen, Belege, Bankauszüge analysieren (Vision)
 - Handschrift erkennen
-- PDF-Support
-- 100% lokal - keine Cloud-APIs
+- PDF-Support (PyMuPDF)
+- 100% lokal auf Schweizer Infrastruktur
 
 ## Tech Stack
 
-- **Backend:** Python Flask
-- **AI:** Ollama Vision Models
-- **Server:** Infomaniak Swiss Cloud (GPU)
+- **Backend:** FastAPI (Python), Uvicorn
+- **AI:** Ollama (qwen2.5:7b, qwen2.5vl:7b, granite3.2-vision)
+- **Server:** Infomaniak Swiss Cloud (NVIDIA L4, 24 GB VRAM)
+- **TLS:** Let's Encrypt (`llm.poweron.swiss`)
 
 ## Deployment
 
-Automatisches Deployment via GitHub Actions bei Push zu `main`.
+Automatisches Deployment via Forgejo Actions bei Push zu `main`.
+Ziel: `/opt/ollama-webapp/app/` auf `83.228.200.109`.
 
diff --git a/config.py b/config.py
index 749f612..5141b8f 100644
--- a/config.py
+++ b/config.py
@@ -153,12 +153,24 @@ rateLimiter = RateLimiter(
 # Model Mapping
 # ============================================================================
 
+# Current models (L4 24 GB VRAM — Infomaniak)
 MODEL_MAPPING = {
     "poweron-text-general": "qwen2.5:7b",
     "poweron-vision-general": "qwen2.5vl:7b",
     "poweron-vision-deep": "granite3.2-vision",
 }
 
+# Next-gen models (RTX PRO 6000 96 GB VRAM — prepared, activate after migration)
+# Uncomment and remove the old entries above once the new hardware is live.
+# MODEL_MAPPING = {
+#     "poweron-text-general": "qwen3:14b",
+#     "poweron-text-reasoning": "deepseek-r1:70b",
+#     "poweron-vision-general": "llama4:scout",
+#     "poweron-vision-deep": "qwen2.5vl:72b",
+#     "poweron-embed": "nomic-embed-text",
+#     "poweron-transcribe": "whisper-large-v3-turbo",
+# }
+
 INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()}
 
 
@@ -260,7 +272,7 @@ def _isVisionModel(modelName: str) -> bool:
     if not modelName:
         return False
     modelLower = modelName.lower()
-    visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"]
+    visionIndicators = ["vision", "vl", "llava", "bakllava", "granite", "scout", "llama4"]
     return any(indicator in modelLower for indicator in visionIndicators)
 
 
diff --git a/docu/setupserver.md b/docu/setupserver.md
index 30d1913..70fcbf3 100644
--- a/docu/setupserver.md
+++ b/docu/setupserver.md
@@ -51,25 +51,25 @@ Connect: ssh -i "C:\Users\pmots\Downloads\ollama-deploy-key.pem" ubuntu@83.228.2
 | **GPU** | NVIDIA L4 (24GB VRAM) |
 | **OS** | Ubuntu 24.04 LTS |
 | **SSH User** | `ubuntu` |
-| **App Port** | `5000` |
+| **App Port** | `8000` (HTTPS) |
 | **Ollama Port** | `11434` |
-| **GitHub Repo** | `https://github.com/valueonag/private-llm` |
+| **GitHub Repo** | `https://git.poweron.swiss` (Forgejo) |
 
 ### Installierte Modelle
 
-| Modell | Verwendung |
-|--------|------------|
-| `granite3.2-vision` | Rechnungen, Belege, Dokumente |
-| `qwen2.5vl:7b` | Handschrift |
-| `deepseek-ocr` | OCR / Text-Extraktion |
+| Modell | Ollama-Name | Verwendung |
+|--------|-------------|------------|
+| `poweron-text-general` | `qwen2.5:7b` | Text-Neutralisierung |
+| `poweron-vision-general` | `qwen2.5vl:7b` | Handschrift, Dokumente |
+| `poweron-vision-deep` | `granite3.2-vision` | Rechnungen, Belege |
 
 ### URLs
 
 | Service | URL |
 |---------|-----|
-| **App** | http://83.228.200.109:5000 |
-| **Health Check** | http://83.228.200.109:5000/api/health |
-| **Ollama Status** | http://83.228.200.109:5000/api/ollama/status |
+| **App** | https://llm.poweron.swiss:8000 |
+| **Health Check** | https://llm.poweron.swiss:8000/api/health |
+| **Ollama Status** | https://llm.poweron.swiss:8000/api/ollama/status |
 
 ---
 
@@ -573,14 +573,14 @@ sudo systemctl enable ollama
 ## D.5 Modelle herunterladen
 
 ```bash
-# Fuer Dokumente (Rechnungen, Belege)
-ollama pull granite3.2-vision
+# Text-Neutralisierung
+ollama pull qwen2.5:7b
 
-# Fuer Handschrift
+# Vision: Handschrift, Dokumente
 ollama pull qwen2.5vl:7b
 
-# OCR-Spezialist
-ollama pull deepseek-ocr
+# Vision: Rechnungen, Belege
+ollama pull granite3.2-vision
 ```
 
 ### Modelle pruefen
@@ -606,7 +606,7 @@ python3 -m venv /opt/ollama-webapp/venv
 
 # Basis-Pakete installieren
 /opt/ollama-webapp/venv/bin/pip install --upgrade pip
-/opt/ollama-webapp/venv/bin/pip install flask flask-cors requests pymupdf gunicorn
+/opt/ollama-webapp/venv/bin/pip install -r /opt/ollama-webapp/app/requirements.txt
 ```
 
 ---
@@ -621,26 +621,17 @@ Inhalt:
 
 ```ini
 [Unit]
-Description=Belegscanner Flask App
-After=network.target ollama.service
-Wants=ollama.service
+Description=PowerOn Private-LLM Service
+After=network.target
 
 [Service]
 Type=simple
 User=ubuntu
-Group=ubuntu
 WorkingDirectory=/opt/ollama-webapp/app
-Environment="PATH=/opt/ollama-webapp/venv/bin:/usr/bin"
-Environment="FLASK_ENV=production"
-ExecStart=/opt/ollama-webapp/venv/bin/gunicorn \
-    --bind 0.0.0.0:5000 \
-    --workers 2 \
-    --timeout 3600 \
-    --access-logfile /opt/ollama-webapp/logs/access.log \
-    --error-logfile /opt/ollama-webapp/logs/error.log \
-    app:app
+ExecStart=/opt/ollama-webapp/venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000 --ssl-keyfile /etc/letsencrypt/live/llm.poweron.swiss/privkey.pem --ssl-certfile /etc/letsencrypt/live/llm.poweron.swiss/fullchain.pem
 Restart=always
 RestartSec=5
+Environment=PYTHONUNBUFFERED=1
 
 [Install]
 WantedBy=multi-user.target
@@ -772,10 +763,10 @@ ollama run granite3.2-vision "Beschreibe dieses Bild"
 sudo systemctl status ollama-webapp
 
 # Logs pruefen
-tail -50 /opt/ollama-webapp/logs/error.log
+sudo journalctl -u ollama-webapp -n 50
 
 # Port pruefen
-sudo netstat -tlnp | grep 5000
+sudo netstat -tlnp | grep 8000
 ```
 
 ## Ollama nicht erreichbar