From 1e2a6bea70f552c7869ab0b9abf170a0ccafa266 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 2 Jun 2026 09:42:04 +0200
Subject: [PATCH] logging
---
README.md | 21 ++++++++++--------
config.py | 14 +++++++++++-
docu/setupserver.md | 53 +++++++++++++++++++--------------------------
3 files changed, 47 insertions(+), 41 deletions(-)
diff --git a/README.md b/README.md
index 803876f..5a34d77 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,24 @@
# Private LLM - für Neutralisierung
-KI-Dokumentenanalyse mit lokalen Ollama Vision-Modellen.
+KI-Dokumentenanalyse und Text-Neutralisierung mit lokalen Ollama-Modellen.
-## Integrierte MVP Features
+## Features
-- Rechnungen, Belege, Bankauszüge analysieren
+- Text-Neutralisierung für den Gateway (kein Datenabfluss an externe APIs)
+- Rechnungen, Belege, Bankauszüge analysieren (Vision)
- Handschrift erkennen
-- PDF-Support
-- 100% lokal - keine Cloud-APIs
+- PDF-Support (PyMuPDF)
+- 100% lokal auf Schweizer Infrastruktur
## Tech Stack
-- **Backend:** Python Flask
-- **AI:** Ollama Vision Models
-- **Server:** Infomaniak Swiss Cloud (GPU)
+- **Backend:** FastAPI (Python), Uvicorn
+- **AI:** Ollama (qwen2.5:7b, qwen2.5vl:7b, granite3.2-vision)
+- **Server:** Infomaniak Swiss Cloud (NVIDIA L4, 24 GB VRAM)
+- **TLS:** Let's Encrypt (`llm.poweron.swiss`)
## Deployment
-Automatisches Deployment via GitHub Actions bei Push zu `main`.
+Automatisches Deployment via Forgejo Actions bei Push zu `main`.
+Ziel: `/opt/ollama-webapp/app/` auf `83.228.200.109`.
diff --git a/config.py b/config.py
index 749f612..5141b8f 100644
--- a/config.py
+++ b/config.py
@@ -153,12 +153,24 @@ rateLimiter = RateLimiter(
# Model Mapping
# ============================================================================
+# Current models (L4 24 GB VRAM — Infomaniak)
MODEL_MAPPING = {
"poweron-text-general": "qwen2.5:7b",
"poweron-vision-general": "qwen2.5vl:7b",
"poweron-vision-deep": "granite3.2-vision",
}
+# Next-gen models (RTX PRO 6000 96 GB VRAM — prepared, activate after migration)
+# Uncomment and remove the old entries above once the new hardware is live.
+# MODEL_MAPPING = {
+# "poweron-text-general": "qwen3:14b",
+# "poweron-text-reasoning": "deepseek-r1:70b",
+# "poweron-vision-general": "llama4:scout",
+# "poweron-vision-deep": "qwen2.5vl:72b",
+# "poweron-embed": "nomic-embed-text",
+# "poweron-transcribe": "whisper-large-v3-turbo",
+# }
+
INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()}
@@ -260,7 +272,7 @@ def _isVisionModel(modelName: str) -> bool:
if not modelName:
return False
modelLower = modelName.lower()
- visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"]
+ visionIndicators = ["vision", "vl", "llava", "bakllava", "granite", "scout", "llama4"]
return any(indicator in modelLower for indicator in visionIndicators)
diff --git a/docu/setupserver.md b/docu/setupserver.md
index 30d1913..70fcbf3 100644
--- a/docu/setupserver.md
+++ b/docu/setupserver.md
@@ -51,25 +51,25 @@ Connect: ssh -i "C:\Users\pmots\Downloads\ollama-deploy-key.pem" ubuntu@83.228.2
| **GPU** | NVIDIA L4 (24GB VRAM) |
| **OS** | Ubuntu 24.04 LTS |
| **SSH User** | `ubuntu` |
-| **App Port** | `5000` |
+| **App Port** | `8000` (HTTPS) |
| **Ollama Port** | `11434` |
-| **GitHub Repo** | `https://github.com/valueonag/private-llm` |
+| **GitHub Repo** | `https://git.poweron.swiss` (Forgejo) |
### Installierte Modelle
-| Modell | Verwendung |
-|--------|------------|
-| `granite3.2-vision` | Rechnungen, Belege, Dokumente |
-| `qwen2.5vl:7b` | Handschrift |
-| `deepseek-ocr` | OCR / Text-Extraktion |
+| Modell | Ollama-Name | Verwendung |
+|--------|-------------|------------|
+| `poweron-text-general` | `qwen2.5:7b` | Text-Neutralisierung |
+| `poweron-vision-general` | `qwen2.5vl:7b` | Handschrift, Dokumente |
+| `poweron-vision-deep` | `granite3.2-vision` | Rechnungen, Belege |
### URLs
| Service | URL |
|---------|-----|
-| **App** | http://83.228.200.109:5000 |
-| **Health Check** | http://83.228.200.109:5000/api/health |
-| **Ollama Status** | http://83.228.200.109:5000/api/ollama/status |
+| **App** | https://llm.poweron.swiss:8000 |
+| **Health Check** | https://llm.poweron.swiss:8000/api/health |
+| **Ollama Status** | https://llm.poweron.swiss:8000/api/ollama/status |
---
@@ -573,14 +573,14 @@ sudo systemctl enable ollama
## D.5 Modelle herunterladen
```bash
-# Fuer Dokumente (Rechnungen, Belege)
-ollama pull granite3.2-vision
+# Text-Neutralisierung
+ollama pull qwen2.5:7b
-# Fuer Handschrift
+# Vision: Handschrift, Dokumente
ollama pull qwen2.5vl:7b
-# OCR-Spezialist
-ollama pull deepseek-ocr
+# Vision: Rechnungen, Belege
+ollama pull granite3.2-vision
```
### Modelle pruefen
@@ -606,7 +606,7 @@ python3 -m venv /opt/ollama-webapp/venv
# Basis-Pakete installieren
/opt/ollama-webapp/venv/bin/pip install --upgrade pip
-/opt/ollama-webapp/venv/bin/pip install flask flask-cors requests pymupdf gunicorn
+/opt/ollama-webapp/venv/bin/pip install -r /opt/ollama-webapp/app/requirements.txt
```
---
@@ -621,26 +621,17 @@ Inhalt:
```ini
[Unit]
-Description=Belegscanner Flask App
-After=network.target ollama.service
-Wants=ollama.service
+Description=PowerOn Private-LLM Service
+After=network.target
[Service]
Type=simple
User=ubuntu
-Group=ubuntu
WorkingDirectory=/opt/ollama-webapp/app
-Environment="PATH=/opt/ollama-webapp/venv/bin:/usr/bin"
-Environment="FLASK_ENV=production"
-ExecStart=/opt/ollama-webapp/venv/bin/gunicorn \
- --bind 0.0.0.0:5000 \
- --workers 2 \
- --timeout 3600 \
- --access-logfile /opt/ollama-webapp/logs/access.log \
- --error-logfile /opt/ollama-webapp/logs/error.log \
- app:app
+ExecStart=/opt/ollama-webapp/venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000 --ssl-keyfile /etc/letsencrypt/live/llm.poweron.swiss/privkey.pem --ssl-certfile /etc/letsencrypt/live/llm.poweron.swiss/fullchain.pem
Restart=always
RestartSec=5
+Environment=PYTHONUNBUFFERED=1
[Install]
WantedBy=multi-user.target
@@ -772,10 +763,10 @@ ollama run granite3.2-vision "Beschreibe dieses Bild"
sudo systemctl status ollama-webapp
# Logs pruefen
-tail -50 /opt/ollama-webapp/logs/error.log
+sudo journalctl -u ollama-webapp -n 50
# Port pruefen
-sudo netstat -tlnp | grep 5000
+sudo netstat -tlnp | grep 8000
```
## Ollama nicht erreichbar