diff --git a/README.md b/README.md index 803876f..5a34d77 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,24 @@ # Private LLM - für Neutralisierung -KI-Dokumentenanalyse mit lokalen Ollama Vision-Modellen. +KI-Dokumentenanalyse und Text-Neutralisierung mit lokalen Ollama-Modellen. -## Integrierte MVP Features +## Features -- Rechnungen, Belege, Bankauszüge analysieren +- Text-Neutralisierung für den Gateway (kein Datenabfluss an externe APIs) +- Rechnungen, Belege, Bankauszüge analysieren (Vision) - Handschrift erkennen -- PDF-Support -- 100% lokal - keine Cloud-APIs +- PDF-Support (PyMuPDF) +- 100% lokal auf Schweizer Infrastruktur ## Tech Stack -- **Backend:** Python Flask -- **AI:** Ollama Vision Models -- **Server:** Infomaniak Swiss Cloud (GPU) +- **Backend:** FastAPI (Python), Uvicorn +- **AI:** Ollama (qwen2.5:7b, qwen2.5vl:7b, granite3.2-vision) +- **Server:** Infomaniak Swiss Cloud (NVIDIA L4, 24 GB VRAM) +- **TLS:** Let's Encrypt (`llm.poweron.swiss`) ## Deployment -Automatisches Deployment via GitHub Actions bei Push zu `main`. +Automatisches Deployment via Forgejo Actions bei Push zu `main`. +Ziel: `/opt/ollama-webapp/app/` auf `83.228.200.109`. diff --git a/config.py b/config.py index 749f612..5141b8f 100644 --- a/config.py +++ b/config.py @@ -153,12 +153,24 @@ rateLimiter = RateLimiter( # Model Mapping # ============================================================================ +# Current models (L4 24 GB VRAM — Infomaniak) MODEL_MAPPING = { "poweron-text-general": "qwen2.5:7b", "poweron-vision-general": "qwen2.5vl:7b", "poweron-vision-deep": "granite3.2-vision", } +# Next-gen models (RTX PRO 6000 96 GB VRAM — prepared, activate after migration) +# Uncomment and remove the old entries above once the new hardware is live. +# MODEL_MAPPING = { +# "poweron-text-general": "qwen3:14b", +# "poweron-text-reasoning": "deepseek-r1:70b", +# "poweron-vision-general": "llama4:scout", +# "poweron-vision-deep": "qwen2.5vl:72b", +# "poweron-embed": "nomic-embed-text", +# "poweron-transcribe": "whisper-large-v3-turbo", +# } + INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()} @@ -260,7 +272,7 @@ def _isVisionModel(modelName: str) -> bool: if not modelName: return False modelLower = modelName.lower() - visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"] + visionIndicators = ["vision", "vl", "llava", "bakllava", "granite", "scout", "llama4"] return any(indicator in modelLower for indicator in visionIndicators) diff --git a/docu/setupserver.md b/docu/setupserver.md index 30d1913..70fcbf3 100644 --- a/docu/setupserver.md +++ b/docu/setupserver.md @@ -51,25 +51,25 @@ Connect: ssh -i "C:\Users\pmots\Downloads\ollama-deploy-key.pem" ubuntu@83.228.2 | **GPU** | NVIDIA L4 (24GB VRAM) | | **OS** | Ubuntu 24.04 LTS | | **SSH User** | `ubuntu` | -| **App Port** | `5000` | +| **App Port** | `8000` (HTTPS) | | **Ollama Port** | `11434` | -| **GitHub Repo** | `https://github.com/valueonag/private-llm` | +| **GitHub Repo** | `https://git.poweron.swiss` (Forgejo) | ### Installierte Modelle -| Modell | Verwendung | -|--------|------------| -| `granite3.2-vision` | Rechnungen, Belege, Dokumente | -| `qwen2.5vl:7b` | Handschrift | -| `deepseek-ocr` | OCR / Text-Extraktion | +| Modell | Ollama-Name | Verwendung | +|--------|-------------|------------| +| `poweron-text-general` | `qwen2.5:7b` | Text-Neutralisierung | +| `poweron-vision-general` | `qwen2.5vl:7b` | Handschrift, Dokumente | +| `poweron-vision-deep` | `granite3.2-vision` | Rechnungen, Belege | ### URLs | Service | URL | |---------|-----| -| **App** | http://83.228.200.109:5000 | -| **Health Check** | http://83.228.200.109:5000/api/health | -| **Ollama Status** | http://83.228.200.109:5000/api/ollama/status | +| **App** | https://llm.poweron.swiss:8000 | +| **Health Check** | https://llm.poweron.swiss:8000/api/health | +| **Ollama Status** | https://llm.poweron.swiss:8000/api/ollama/status | --- @@ -573,14 +573,14 @@ sudo systemctl enable ollama ## D.5 Modelle herunterladen ```bash -# Fuer Dokumente (Rechnungen, Belege) -ollama pull granite3.2-vision +# Text-Neutralisierung +ollama pull qwen2.5:7b -# Fuer Handschrift +# Vision: Handschrift, Dokumente ollama pull qwen2.5vl:7b -# OCR-Spezialist -ollama pull deepseek-ocr +# Vision: Rechnungen, Belege +ollama pull granite3.2-vision ``` ### Modelle pruefen @@ -606,7 +606,7 @@ python3 -m venv /opt/ollama-webapp/venv # Basis-Pakete installieren /opt/ollama-webapp/venv/bin/pip install --upgrade pip -/opt/ollama-webapp/venv/bin/pip install flask flask-cors requests pymupdf gunicorn +/opt/ollama-webapp/venv/bin/pip install -r /opt/ollama-webapp/app/requirements.txt ``` --- @@ -621,26 +621,17 @@ Inhalt: ```ini [Unit] -Description=Belegscanner Flask App -After=network.target ollama.service -Wants=ollama.service +Description=PowerOn Private-LLM Service +After=network.target [Service] Type=simple User=ubuntu -Group=ubuntu WorkingDirectory=/opt/ollama-webapp/app -Environment="PATH=/opt/ollama-webapp/venv/bin:/usr/bin" -Environment="FLASK_ENV=production" -ExecStart=/opt/ollama-webapp/venv/bin/gunicorn \ - --bind 0.0.0.0:5000 \ - --workers 2 \ - --timeout 3600 \ - --access-logfile /opt/ollama-webapp/logs/access.log \ - --error-logfile /opt/ollama-webapp/logs/error.log \ - app:app +ExecStart=/opt/ollama-webapp/venv/bin/uvicorn app:app --host 0.0.0.0 --port 8000 --ssl-keyfile /etc/letsencrypt/live/llm.poweron.swiss/privkey.pem --ssl-certfile /etc/letsencrypt/live/llm.poweron.swiss/fullchain.pem Restart=always RestartSec=5 +Environment=PYTHONUNBUFFERED=1 [Install] WantedBy=multi-user.target @@ -772,10 +763,10 @@ ollama run granite3.2-vision "Beschreibe dieses Bild" sudo systemctl status ollama-webapp # Logs pruefen -tail -50 /opt/ollama-webapp/logs/error.log +sudo journalctl -u ollama-webapp -n 50 # Port pruefen -sudo netstat -tlnp | grep 5000 +sudo netstat -tlnp | grep 8000 ``` ## Ollama nicht erreichbar