This commit is contained in:
parent
5b66aaea0f
commit
1e2a6bea70
3 changed files with 47 additions and 41 deletions
21
README.md
21
README.md
|
|
@ -1,21 +1,24 @@
|
||||||
# Private LLM - für Neutralisierung
|
# Private LLM - für Neutralisierung
|
||||||
|
|
||||||
KI-Dokumentenanalyse mit lokalen Ollama Vision-Modellen.
|
KI-Dokumentenanalyse und Text-Neutralisierung mit lokalen Ollama-Modellen.
|
||||||
|
|
||||||
## Integrierte MVP Features
|
## Features
|
||||||
|
|
||||||
- Rechnungen, Belege, Bankauszüge analysieren
|
- Text-Neutralisierung für den Gateway (kein Datenabfluss an externe APIs)
|
||||||
|
- Rechnungen, Belege, Bankauszüge analysieren (Vision)
|
||||||
- Handschrift erkennen
|
- Handschrift erkennen
|
||||||
- PDF-Support
|
- PDF-Support (PyMuPDF)
|
||||||
- 100% lokal - keine Cloud-APIs
|
- 100% lokal auf Schweizer Infrastruktur
|
||||||
|
|
||||||
## Tech Stack
|
## Tech Stack
|
||||||
|
|
||||||
- **Backend:** Python Flask
|
- **Backend:** FastAPI (Python), Uvicorn
|
||||||
- **AI:** Ollama Vision Models
|
- **AI:** Ollama (qwen2.5:7b, qwen2.5vl:7b, granite3.2-vision)
|
||||||
- **Server:** Infomaniak Swiss Cloud (GPU)
|
- **Server:** Infomaniak Swiss Cloud (NVIDIA L4, 24 GB VRAM)
|
||||||
|
- **TLS:** Let's Encrypt (`llm.poweron.swiss`)
|
||||||
|
|
||||||
## Deployment
|
## Deployment
|
||||||
|
|
||||||
Automatisches Deployment via GitHub Actions bei Push zu `main`.
|
Automatisches Deployment via Forgejo Actions bei Push zu `main`.
|
||||||
|
Ziel: `/opt/ollama-webapp/app/` auf `83.228.200.109`.
|
||||||
|
|
||||||
|
|
|
||||||
14
config.py
14
config.py
|
|
@ -153,12 +153,24 @@ rateLimiter = RateLimiter(
|
||||||
# Model Mapping
|
# Model Mapping
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
# Current models (L4 24 GB VRAM — Infomaniak)
|
||||||
MODEL_MAPPING = {
|
MODEL_MAPPING = {
|
||||||
"poweron-text-general": "qwen2.5:7b",
|
"poweron-text-general": "qwen2.5:7b",
|
||||||
"poweron-vision-general": "qwen2.5vl:7b",
|
"poweron-vision-general": "qwen2.5vl:7b",
|
||||||
"poweron-vision-deep": "granite3.2-vision",
|
"poweron-vision-deep": "granite3.2-vision",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Next-gen models (RTX PRO 6000 96 GB VRAM — prepared, activate after migration)
|
||||||
|
# Uncomment and remove the old entries above once the new hardware is live.
|
||||||
|
# MODEL_MAPPING = {
|
||||||
|
# "poweron-text-general": "qwen3:14b",
|
||||||
|
# "poweron-text-reasoning": "deepseek-r1:70b",
|
||||||
|
# "poweron-vision-general": "llama4:scout",
|
||||||
|
# "poweron-vision-deep": "qwen2.5vl:72b",
|
||||||
|
# "poweron-embed": "nomic-embed-text",
|
||||||
|
# "poweron-transcribe": "whisper-large-v3-turbo",
|
||||||
|
# }
|
||||||
|
|
||||||
INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()}
|
INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -260,7 +272,7 @@ def _isVisionModel(modelName: str) -> bool:
|
||||||
if not modelName:
|
if not modelName:
|
||||||
return False
|
return False
|
||||||
modelLower = modelName.lower()
|
modelLower = modelName.lower()
|
||||||
visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"]
|
visionIndicators = ["vision", "vl", "llava", "bakllava", "granite", "scout", "llama4"]
|
||||||
return any(indicator in modelLower for indicator in visionIndicators)
|
return any(indicator in modelLower for indicator in visionIndicators)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,25 +51,25 @@ Connect: ssh -i "C:\Users\pmots\Downloads\ollama-deploy-key.pem" ubuntu@83.228.2
|
||||||
| **GPU** | NVIDIA L4 (24GB VRAM) |
|
| **GPU** | NVIDIA L4 (24GB VRAM) |
|
||||||
| **OS** | Ubuntu 24.04 LTS |
|
| **OS** | Ubuntu 24.04 LTS |
|
||||||
| **SSH User** | `ubuntu` |
|
| **SSH User** | `ubuntu` |
|
||||||
| **App Port** | `5000` |
|
| **App Port** | `8000` (HTTPS) |
|
||||||
| **Ollama Port** | `11434` |
|
| **Ollama Port** | `11434` |
|
||||||
| **GitHub Repo** | `https://github.com/valueonag/private-llm` |
|
| **GitHub Repo** | `https://git.poweron.swiss` (Forgejo) |
|
||||||
|
|
||||||
### Installierte Modelle
|
### Installierte Modelle
|
||||||
|
|
||||||
| Modell | Verwendung |
|
| Modell | Ollama-Name | Verwendung |
|
||||||
|--------|------------|
|
|--------|-------------|------------|
|
||||||
| `granite3.2-vision` | Rechnungen, Belege, Dokumente |
|
| `poweron-text-general` | `qwen2.5:7b` | Text-Neutralisierung |
|
||||||
| `qwen2.5vl:7b` | Handschrift |
|
| `poweron-vision-general` | `qwen2.5vl:7b` | Handschrift, Dokumente |
|
||||||
| `deepseek-ocr` | OCR / Text-Extraktion |
|
| `poweron-vision-deep` | `granite3.2-vision` | Rechnungen, Belege |
|
||||||
|
|
||||||
### URLs
|
### URLs
|
||||||
|
|
||||||
| Service | URL |
|
| Service | URL |
|
||||||
|---------|-----|
|
|---------|-----|
|
||||||
| **App** | http://83.228.200.109:5000 |
|
| **App** | https://llm.poweron.swiss:8000 |
|
||||||
| **Health Check** | http://83.228.200.109:5000/api/health |
|
| **Health Check** | https://llm.poweron.swiss:8000/api/health |
|
||||||
| **Ollama Status** | http://83.228.200.109:5000/api/ollama/status |
|
| **Ollama Status** | https://llm.poweron.swiss:8000/api/ollama/status |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -573,14 +573,14 @@ sudo systemctl enable ollama
|
||||||
## D.5 Modelle herunterladen
|
## D.5 Modelle herunterladen
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Fuer Dokumente (Rechnungen, Belege)
|
# Text-Neutralisierung
|
||||||
ollama pull granite3.2-vision
|
ollama pull qwen2.5:7b
|
||||||
|
|
||||||
# Fuer Handschrift
|
# Vision: Handschrift, Dokumente
|
||||||
ollama pull qwen2.5vl:7b
|
ollama pull qwen2.5vl:7b
|
||||||
|
|
||||||
# OCR-Spezialist
|
# Vision: Rechnungen, Belege
|
||||||
ollama pull deepseek-ocr
|
ollama pull granite3.2-vision
|
||||||
```
|
```
|
||||||
|
|
||||||
### Modelle pruefen
|
### Modelle pruefen
|
||||||
|
|
@ -606,7 +606,7 @@ python3 -m venv /opt/ollama-webapp/venv
|
||||||
|
|
||||||
# Basis-Pakete installieren
|
# Basis-Pakete installieren
|
||||||
/opt/ollama-webapp/venv/bin/pip install --upgrade pip
|
/opt/ollama-webapp/venv/bin/pip install --upgrade pip
|
||||||
/opt/ollama-webapp/venv/bin/pip install flask flask-cors requests pymupdf gunicorn
|
/opt/ollama-webapp/venv/bin/pip install -r /opt/ollama-webapp/app/requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -621,26 +621,17 @@ Inhalt:
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Belegscanner Flask App
|
Description=PowerOn Private-LLM Service
|
||||||
After=network.target ollama.service
|
After=network.target
|
||||||
Wants=ollama.service
|
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
User=ubuntu
|
User=ubuntu
|
||||||
Group=ubuntu
|
|
||||||
WorkingDirectory=/opt/ollama-webapp/app
|
WorkingDirectory=/opt/ollama-webapp/app
|
||||||
Environment="PATH=/opt/ollama-webapp/venv/bin:/usr/bin"
|
ExecStart=/opt/ollama-webapp/venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000 --ssl-keyfile /etc/letsencrypt/live/llm.poweron.swiss/privkey.pem --ssl-certfile /etc/letsencrypt/live/llm.poweron.swiss/fullchain.pem
|
||||||
Environment="FLASK_ENV=production"
|
|
||||||
ExecStart=/opt/ollama-webapp/venv/bin/gunicorn \
|
|
||||||
--bind 0.0.0.0:5000 \
|
|
||||||
--workers 2 \
|
|
||||||
--timeout 3600 \
|
|
||||||
--access-logfile /opt/ollama-webapp/logs/access.log \
|
|
||||||
--error-logfile /opt/ollama-webapp/logs/error.log \
|
|
||||||
app:app
|
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=5
|
RestartSec=5
|
||||||
|
Environment=PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
@ -772,10 +763,10 @@ ollama run granite3.2-vision "Beschreibe dieses Bild"
|
||||||
sudo systemctl status ollama-webapp
|
sudo systemctl status ollama-webapp
|
||||||
|
|
||||||
# Logs pruefen
|
# Logs pruefen
|
||||||
tail -50 /opt/ollama-webapp/logs/error.log
|
sudo journalctl -u ollama-webapp -n 50
|
||||||
|
|
||||||
# Port pruefen
|
# Port pruefen
|
||||||
sudo netstat -tlnp | grep 5000
|
sudo netstat -tlnp | grep 8000
|
||||||
```
|
```
|
||||||
|
|
||||||
## Ollama nicht erreichbar
|
## Ollama nicht erreichbar
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue