integrated privateLLM

This commit is contained in:
ValueOn AG 2026-02-06 10:27:06 +01:00
parent 0313821f59
commit a04bee5008
9 changed files with 671 additions and 307 deletions

824
app.py
View file

@ -1,17 +1,33 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Belegscanner - KI-Dokumentenanalyse
Python Flask Web App mit CORS-Unterstützung und Poweron Design
Private-LLM Service - FastAPI Web App
Provides AI model endpoints for OCR and Vision processing via Ollama.
Models exposed:
- poweron-ocr-general (deepseek)
- poweron-vision-general (qwen2.5)
- poweron-vision-deep (granite3.2)
"""
from flask import Flask, render_template, request, jsonify, session, redirect, url_for
from flask_cors import CORS
from functools import wraps
import requests
import os
import sys
import base64
import json
import re
import io
import os
import logging
import time
from collections import defaultdict
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Depends, Header, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel, Field
import httpx
# PDF Support
try:
@ -22,79 +38,243 @@ except ImportError:
print("WARNUNG: PyMuPDF nicht installiert. PDF-Support deaktiviert.")
print("Installieren mit: pip install pymupdf")
app = Flask(__name__)
app.secret_key = os.environ.get('SECRET_KEY', 'poweron-secret-key-change-in-production')
CORS(app, supports_credentials=True) # CORS für alle Routen aktivieren
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
# ============================================================================
# Authentication
# Configuration
# ============================================================================
# Einfache Credentials (für minimalen Schutz)
AUTH_USERNAME = os.environ.get('AUTH_USERNAME', 'poweron')
AUTH_PASSWORD = os.environ.get('AUTH_PASSWORD', 'poweron')
def _loadConfig() -> Dict[str, Any]:
"""Load configuration from config.ini file."""
configPath = os.path.join(os.path.dirname(__file__), "config.ini")
config = {
"apiKey": None,
"ollamaUrl": "http://localhost:11434",
"authUsername": "poweron",
"authPassword": "poweron",
"secretKey": "poweron-secret-key-change-in-production",
"rateLimitRequestsPerMinute": 60,
"rateLimitBurstSize": 10,
}
if os.path.exists(configPath):
try:
with open(configPath, "r") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
if "=" in line:
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
# Map config keys
if key == "PRIVATE_LLM_API_KEY":
config["apiKey"] = value
elif key == "OLLAMA_URL":
config["ollamaUrl"] = value
elif key == "AUTH_USERNAME":
config["authUsername"] = value
elif key == "AUTH_PASSWORD":
config["authPassword"] = value
elif key == "SECRET_KEY":
config["secretKey"] = value
elif key == "RATE_LIMIT_REQUESTS_PER_MINUTE":
config["rateLimitRequestsPerMinute"] = int(value)
elif key == "RATE_LIMIT_BURST_SIZE":
config["rateLimitBurstSize"] = int(value)
except Exception as e:
logger.warning(f"Error loading config.ini: {e}")
# Override with environment variables if set
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
config["secretKey"] = os.environ.get("SECRET_KEY", config["secretKey"])
config["rateLimitRequestsPerMinute"] = int(os.environ.get("RATE_LIMIT_REQUESTS_PER_MINUTE", config["rateLimitRequestsPerMinute"]))
config["rateLimitBurstSize"] = int(os.environ.get("RATE_LIMIT_BURST_SIZE", config["rateLimitBurstSize"]))
return config
CONFIG = _loadConfig()
def _loginRequired(f):
"""Decorator für geschützte Routen"""
@wraps(f)
def decorated_function(*args, **kwargs):
if not session.get('logged_in'):
# Bei API-Calls JSON zurückgeben, sonst redirect
if request.path.startswith('/api/'):
return jsonify({'error': 'Nicht autorisiert', 'login_required': True}), 401
return redirect(url_for('_login'))
return f(*args, **kwargs)
return decorated_function
# ============================================================================
# Rate Limiting (Token Bucket Algorithm)
# ============================================================================
class RateLimiter:
"""
Token bucket rate limiter with per-API-key tracking.
Each API key gets its own bucket. Tokens are added at a constant rate
(requestsPerMinute / 60 per second) up to a maximum burst size.
"""
def __init__(self, requestsPerMinute: int = 60, burstSize: int = 10):
self.requestsPerMinute = requestsPerMinute
self.burstSize = burstSize
self.tokensPerSecond = requestsPerMinute / 60.0
# Track tokens and last update time per API key
# Format: {apiKey: {"tokens": float, "lastUpdate": float}}
self._buckets: Dict[str, Dict[str, float]] = defaultdict(
lambda: {"tokens": burstSize, "lastUpdate": time.time()}
)
def _refillTokens(self, bucket: Dict[str, float]) -> None:
"""Refill tokens based on elapsed time."""
now = time.time()
elapsed = now - bucket["lastUpdate"]
bucket["tokens"] = min(
self.burstSize,
bucket["tokens"] + elapsed * self.tokensPerSecond
)
bucket["lastUpdate"] = now
def isAllowed(self, apiKey: str) -> tuple[bool, Dict[str, Any]]:
"""
Check if a request is allowed and consume a token if so.
Returns:
Tuple of (allowed: bool, info: dict with remaining tokens and retry_after)
"""
bucket = self._buckets[apiKey]
self._refillTokens(bucket)
if bucket["tokens"] >= 1.0:
bucket["tokens"] -= 1.0
return True, {
"remaining": int(bucket["tokens"]),
"limit": self.requestsPerMinute,
"resetSeconds": 60
}
else:
# Calculate when the next token will be available
retryAfter = (1.0 - bucket["tokens"]) / self.tokensPerSecond
return False, {
"remaining": 0,
"limit": self.requestsPerMinute,
"retryAfter": round(retryAfter, 1),
"resetSeconds": 60
}
def cleanup(self, maxAgeSeconds: int = 3600) -> int:
"""Remove stale buckets to prevent memory growth."""
now = time.time()
staleKeys = [
key for key, bucket in self._buckets.items()
if now - bucket["lastUpdate"] > maxAgeSeconds
]
for key in staleKeys:
del self._buckets[key]
return len(staleKeys)
# Global rate limiter instance
rateLimiter = RateLimiter(
requestsPerMinute=CONFIG["rateLimitRequestsPerMinute"],
burstSize=CONFIG["rateLimitBurstSize"]
)
# Model mapping: external name -> internal Ollama model name
# Production models (optimized for 32GB RAM server):
# - deepseek-ocr: 3.34B params, 8K context, ~6.7GB RAM
# - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM
# - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM
MODEL_MAPPING = {
"poweron-ocr-general": "deepseek-ocr",
"poweron-vision-general": "qwen2.5vl:7b",
"poweron-vision-deep": "granite3.2-vision",
}
# Reverse mapping for lookups
INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()}
# ============================================================================
# Request/Response Models
# ============================================================================
class AnalyzeRequest(BaseModel):
"""Request model for document analysis."""
imageBase64: Optional[str] = Field(default=None, description="Base64 encoded image")
prompt: str = Field(description="Analysis prompt")
modelName: str = Field(default="poweron-vision-general", description="Model to use")
class AnalyzeResponse(BaseModel):
"""Response model for document analysis."""
success: bool = Field(description="Whether the analysis was successful")
data: Optional[Dict[str, Any]] = Field(default=None, description="Extracted data")
rawResponse: Optional[str] = Field(default=None, description="Raw model response")
error: Optional[str] = Field(default=None, description="Error message if failed")
class PdfExtractRequest(BaseModel):
"""Request model for PDF extraction."""
pdfBase64: str = Field(description="Base64 encoded PDF")
page: Optional[int] = Field(default=None, description="Specific page number (1-indexed)")
class ModelInfo(BaseModel):
"""Model information."""
name: str = Field(description="External model name")
internalName: str = Field(description="Internal Ollama model name")
isVision: bool = Field(description="Whether it's a vision model")
pricePerCall: float = Field(description="Price per call in CHF")
class HealthResponse(BaseModel):
"""Health check response."""
status: str
service: str
pdfSupport: bool
ollamaConnected: bool
class OllamaStatusResponse(BaseModel):
"""Ollama status response."""
connected: bool
models: Optional[List[str]] = None
visionModels: Optional[List[str]] = None
totalModels: Optional[int] = None
error: Optional[str] = None
# ============================================================================
# PDF Helper Functions
# ============================================================================
def _extractImagesFromPdf(pdfBytes, maxPages=5):
"""
Extrahiert Bilder aus einem PDF.
Gibt eine Liste von Base64-kodierten Bildern zurück.
"""
def _extractImagesFromPdf(pdfBytes: bytes, maxPages: int = 5) -> List[Dict[str, Any]]:
"""Extract images from a PDF."""
if not PDF_SUPPORT:
raise Exception("PDF-Support nicht verfügbar. Bitte PyMuPDF installieren.")
images = []
# PDF öffnen
doc = fitz.open(stream=pdfBytes, filetype="pdf")
# Anzahl der Seiten begrenzen
numPages = min(len(doc), maxPages)
for pageNum in range(numPages):
page = doc[pageNum]
# Seite als Bild rendern (höhere Auflösung für bessere OCR)
mat = fitz.Matrix(2.0, 2.0) # 2x Zoom für bessere Qualität
mat = fitz.Matrix(2.0, 2.0) # 2x Zoom for better quality
pix = page.get_pixmap(matrix=mat)
# In PNG konvertieren
imgBytes = pix.tobytes("png")
imgBase64 = base64.b64encode(imgBytes).decode('utf-8')
imgBase64 = base64.b64encode(imgBytes).decode("utf-8")
images.append({
'page': pageNum + 1,
'base64': imgBase64,
'width': pix.width,
'height': pix.height
"page": pageNum + 1,
"base64": imgBase64,
"width": pix.width,
"height": pix.height
})
doc.close()
return images
def _renderPdfPageAsImage(pdfBytes, pageNum=0, zoom=2.0):
"""
Rendert eine einzelne PDF-Seite als Bild.
"""
def _renderPdfPageAsImage(pdfBytes: bytes, pageNum: int = 0, zoom: float = 2.0) -> Dict[str, Any]:
"""Render a single PDF page as an image."""
if not PDF_SUPPORT:
raise Exception("PDF-Support nicht verfügbar.")
@ -106,267 +286,399 @@ def _renderPdfPageAsImage(pdfBytes, pageNum=0, zoom=2.0):
page = doc[pageNum]
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
imgBytes = pix.tobytes("png")
imgBase64 = base64.b64encode(imgBytes).decode('utf-8')
imgBase64 = base64.b64encode(imgBytes).decode("utf-8")
result = {
'base64': imgBase64,
'width': pix.width,
'height': pix.height,
'page': pageNum + 1,
'totalPages': len(doc)
"base64": imgBase64,
"width": pix.width,
"height": pix.height,
"page": pageNum + 1,
"totalPages": len(doc)
}
doc.close()
return result
# ============================================================================
# Model Helper Functions
# ============================================================================
def _isVisionModel(modelName):
"""
Prüft ob ein Modell ein Vision-Modell ist basierend auf Namenskonventionen.
Vision-Modelle enthalten typischerweise 'vision', 'vl', 'llava', 'bakllava' im Namen.
"""
def _isVisionModel(modelName: str) -> bool:
"""Check if a model is a vision model based on naming conventions."""
if not modelName:
return False
modelLower = modelName.lower()
visionIndicators = ['vision', 'vl', 'llava', 'bakllava']
visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"]
return any(indicator in modelLower for indicator in visionIndicators)
def _getInternalModelName(externalName: str) -> str:
"""Get the internal Ollama model name from external name."""
return MODEL_MAPPING.get(externalName, externalName)
def _getExternalModelName(internalName: str) -> str:
"""Get the external model name from internal Ollama name."""
return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
# ============================================================================
# Routes
# Authentication & Rate Limiting
# ============================================================================
@app.route('/login', methods=['GET', 'POST'])
def _login():
"""Login-Seite"""
error = None
if request.method == 'POST':
username = request.form.get('username', '')
password = request.form.get('password', '')
if username == AUTH_USERNAME and password == AUTH_PASSWORD:
session['logged_in'] = True
session['username'] = username
return redirect(url_for('_index'))
else:
error = 'Ungültige Anmeldedaten'
async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")) -> str:
"""Verify the API key from header and return it for rate limiting."""
if not CONFIG["apiKey"]:
# No API key configured, allow all requests (development mode)
logger.warning("No API key configured - running in development mode")
return "dev-mode"
return render_template('login.html', error=error)
if not xApiKey:
raise HTTPException(status_code=401, detail="API key required")
if xApiKey != CONFIG["apiKey"]:
raise HTTPException(status_code=401, detail="Invalid API key")
return xApiKey
@app.route('/logout')
def _logout():
"""Logout"""
session.clear()
return redirect(url_for('_login'))
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
"""Check rate limit for the authenticated API key."""
allowed, info = rateLimiter.isAllowed(apiKey)
if not allowed:
raise HTTPException(
status_code=429,
detail={
"error": "Rate limit exceeded",
"message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
"retryAfter": info["retryAfter"],
"limit": info["limit"],
"remaining": info["remaining"]
},
headers={
"Retry-After": str(int(info["retryAfter"])),
"X-RateLimit-Limit": str(info["limit"]),
"X-RateLimit-Remaining": str(info["remaining"]),
"X-RateLimit-Reset": str(info["resetSeconds"])
}
)
return apiKey
# ============================================================================
# Application Lifecycle
# ============================================================================
@app.route('/')
@_loginRequired
def _index():
"""Hauptseite mit dem Belegscanner UI"""
return render_template('index.html')
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan handler."""
logger.info("Private-LLM Service starting up...")
logger.info(f"Ollama URL: {CONFIG['ollamaUrl']}")
logger.info(f"API Key configured: {'Yes' if CONFIG['apiKey'] else 'No (development mode)'}")
logger.info(f"PDF Support: {'Enabled' if PDF_SUPPORT else 'Disabled'}")
yield
logger.info("Private-LLM Service shutting down...")
# ============================================================================
# FastAPI Application
# ============================================================================
@app.route('/api/analyze', methods=['POST'])
@_loginRequired
def _analyzeDocument():
app = FastAPI(
title="PowerOn Private-LLM Service",
description="AI model endpoints for OCR and Vision processing",
version="1.0.0",
lifespan=lifespan,
)
# CORS Configuration - Allow gateway instances
ALLOWED_ORIGINS = [
"http://localhost:8000",
"http://localhost:8080",
"http://localhost:5000",
"http://127.0.0.1:8000",
"http://127.0.0.1:8080",
"http://127.0.0.1:5000",
]
# Add production origins
PRODUCTION_PATTERNS = [
"poweron.swiss",
"poweron-center.net",
]
# Build full origins list with https variants
for pattern in PRODUCTION_PATTERNS:
ALLOWED_ORIGINS.extend([
f"https://{pattern}",
f"https://www.{pattern}",
f"https://api.{pattern}",
f"https://gateway.{pattern}",
f"https://app.{pattern}",
f"https://nyla.{pattern}",
f"https://playground.{pattern}",
])
# Allow all subdomains via regex in middleware
app.add_middleware(
CORSMiddleware,
allow_origins=ALLOWED_ORIGINS,
allow_origin_regex=r"https://.*\.(poweron\.swiss|poweron-center\.net)",
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"],
expose_headers=["*"],
max_age=86400,
)
# Static files and templates (for web UI)
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
# ============================================================================
# API Routes
# ============================================================================
@app.get("/api/health", response_model=HealthResponse, tags=["System"])
async def _healthCheck():
"""Health check endpoint."""
ollamaConnected = False
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
ollamaConnected = response.status_code == 200
except Exception:
pass
return HealthResponse(
status="ok",
service="private-llm",
pdfSupport=PDF_SUPPORT,
ollamaConnected=ollamaConnected
)
@app.get("/api/models", response_model=List[ModelInfo], tags=["Models"])
async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
"""List available models with pricing."""
models = []
for externalName, internalName in MODEL_MAPPING.items():
isVision = _isVisionModel(internalName)
pricePerCall = 0.10 if isVision else 0.01 # CHF pricing
models.append(ModelInfo(
name=externalName,
internalName=internalName,
isVision=isVision,
pricePerCall=pricePerCall
))
return models
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
async def _ollamaStatus(authenticated: bool = Depends(_verifyApiKey)):
"""Check Ollama connection status and list available models."""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
if response.status_code != 200:
return OllamaStatusResponse(
connected=False,
error=f"Ollama responded with status {response.status_code}"
)
data = response.json()
models = [m.get("name", "") for m in data.get("models", [])]
visionModels = [m for m in models if _isVisionModel(m)]
return OllamaStatusResponse(
connected=True,
models=models,
visionModels=visionModels,
totalModels=len(models)
)
except httpx.ConnectError:
return OllamaStatusResponse(
connected=False,
error="Keine Verbindung zu Ollama. Ist Ollama gestartet?"
)
except Exception as e:
return OllamaStatusResponse(
connected=False,
error=str(e)
)
@app.post("/api/analyze", response_model=AnalyzeResponse, tags=["AI"])
async def _analyzeDocument(
request: AnalyzeRequest,
apiKey: str = Depends(_checkRateLimit)
):
"""
Analysiert ein Dokument mit Ollama Vision API oder verarbeitet Text mit Non-Vision Modellen
Erwartet: { imageBase64 (optional bei Non-Vision), prompt, ollamaUrl, modelName }
Analyze a document with AI Vision API.
Supports both vision models (with images) and text models (without images).
"""
try:
data = request.get_json()
# Get internal model name
internalModelName = _getInternalModelName(request.modelName)
isVision = _isVisionModel(internalModelName)
imageBase64 = data.get('imageBase64')
prompt = data.get('prompt')
ollamaUrl = data.get('ollamaUrl', 'http://localhost:11434')
modelName = data.get('modelName', 'qwen2.5vl:72b')
# Validate request
if isVision and not request.imageBase64:
raise HTTPException(
status_code=400,
detail="Kein Bild übermittelt (erforderlich für Vision-Modelle)"
)
# Prüfe ob es ein Vision-Modell ist (basierend auf Namenskonvention)
isVisionModel = _isVisionModel(modelName)
if not request.prompt:
raise HTTPException(status_code=400, detail="Kein Prompt übermittelt")
# Bei Vision-Modellen ist ein Bild erforderlich
if isVisionModel and not imageBase64:
return jsonify({'error': 'Kein Bild übermittelt (erforderlich für Vision-Modelle)'}), 400
# Model-specific context lengths (actual model limits)
modelContextLengths = {
"deepseek-ocr": 8192, # 8K context
"qwen2.5vl:7b": 32768, # Use 32K (model supports 125K but RAM limited)
"granite3.2-vision": 16000, # 16K context
}
numCtx = modelContextLengths.get(internalModelName, 8192)
if not prompt:
return jsonify({'error': 'Kein Prompt übermittelt'}), 400
# Request-Body erstellen
# Build request body with model-specific context window
requestBody = {
'model': modelName,
'prompt': prompt,
'stream': False
"model": internalModelName,
"prompt": request.prompt,
"stream": False,
"options": {
"num_ctx": numCtx
}
}
# Bilder nur hinzufügen wenn vorhanden (für Vision-Modelle)
if imageBase64:
requestBody['images'] = [imageBase64]
if request.imageBase64:
requestBody["images"] = [request.imageBase64]
# Ollama API aufrufen (Timeout: 60 Minuten für grosse Modelle)
response = requests.post(
f'{ollamaUrl}/api/generate',
json=requestBody,
timeout=3600 # 60 Minuten
# Call Ollama API
async with httpx.AsyncClient(timeout=3600.0) as client: # 60 min timeout
response = await client.post(
f"{CONFIG['ollamaUrl']}/api/generate",
json=requestBody
)
if response.status_code == 404:
raise HTTPException(
status_code=404,
detail=f'Modell "{internalModelName}" nicht gefunden. Bitte installieren mit: ollama pull {internalModelName}'
)
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"Ollama API Fehler: {response.status_code} - {response.text[:200]}"
)
responseData = response.json()
responseText = responseData.get("response", "")
# Try to extract JSON from response
extractedData = None
jsonMatch = re.search(r"\{[\s\S]*\}", responseText)
if jsonMatch:
try:
extractedData = json.loads(jsonMatch.group())
except json.JSONDecodeError:
extractedData = None
# Wrap plain text response in JSON object
if extractedData is None:
extractedData = {"response": responseText.strip()}
return AnalyzeResponse(
success=True,
data=extractedData,
rawResponse=responseText
)
except httpx.TimeoutException:
return AnalyzeResponse(
success=False,
error="Zeitüberschreitung bei der Ollama API"
)
if response.status_code == 404:
return jsonify({
'error': f'Modell "{modelName}" nicht gefunden. Bitte installieren Sie es mit: ollama pull {modelName}'
}), 404
if response.status_code != 200:
return jsonify({
'error': f'Ollama API Fehler: {response.status_code} - {response.text[:200]}'
}), response.status_code
responseData = response.json()
responseText = responseData.get('response', '')
# Versuche JSON aus der Antwort zu extrahieren
extractedData = None
jsonMatch = re.search(r'\{[\s\S]*\}', responseText)
if jsonMatch:
try:
extractedData = json.loads(jsonMatch.group())
except json.JSONDecodeError:
# JSON-ähnlicher Text gefunden, aber ungültig
extractedData = None
# Wenn kein JSON gefunden, Antwort in JSON-Objekt verpacken
if extractedData is None:
extractedData = {
'response': responseText.strip()
}
return jsonify({
'success': True,
'data': extractedData,
'rawResponse': responseText
})
except requests.exceptions.Timeout:
return jsonify({'error': 'Zeitüberschreitung bei der Ollama API'}), 504
except requests.exceptions.ConnectionError:
return jsonify({'error': 'Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?'}), 503
except json.JSONDecodeError as e:
return jsonify({'error': f'JSON Parse-Fehler: {str(e)}'}), 400
except httpx.ConnectError:
return AnalyzeResponse(
success=False,
error="Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?"
)
except HTTPException:
raise
except Exception as e:
return jsonify({'error': f'Unerwarteter Fehler: {str(e)}'}), 500
logger.error(f"Error analyzing document: {e}")
return AnalyzeResponse(
success=False,
error=f"Unerwarteter Fehler: {str(e)}"
)
@app.route('/api/health', methods=['GET'])
def _healthCheck():
"""Health Check Endpoint"""
return jsonify({'status': 'ok', 'service': 'belegscanner', 'pdfSupport': PDF_SUPPORT})
@app.route('/api/pdf/extract', methods=['POST'])
@_loginRequired
def _extractPdfImages():
"""
Extrahiert Bilder aus einem PDF.
Erwartet: { pdfBase64, page (optional, default: alle) }
"""
@app.post("/api/pdf/extract", tags=["PDF"])
async def _extractPdfImages(
request: PdfExtractRequest,
authenticated: bool = Depends(_verifyApiKey)
):
"""Extract images from a PDF."""
if not PDF_SUPPORT:
return jsonify({
'error': 'PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf'
}), 501
raise HTTPException(
status_code=501,
detail="PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf"
)
try:
data = request.get_json()
pdfBase64 = data.get('pdfBase64')
pageNum = data.get('page') # Optional: spezifische Seite
pdfBytes = base64.b64decode(request.pdfBase64)
if not pdfBase64:
return jsonify({'error': 'Kein PDF übermittelt'}), 400
# Base64 dekodieren
pdfBytes = base64.b64decode(pdfBase64)
if pageNum is not None:
# Einzelne Seite extrahieren
result = _renderPdfPageAsImage(pdfBytes, pageNum - 1) # 0-basiert
return jsonify({
'success': True,
'image': result
})
if request.page is not None:
# Extract single page
result = _renderPdfPageAsImage(pdfBytes, request.page - 1)
return {"success": True, "image": result}
else:
# Alle Seiten extrahieren (max 5)
# Extract all pages (max 5)
images = _extractImagesFromPdf(pdfBytes, maxPages=5)
return jsonify({
'success': True,
'images': images,
'totalExtracted': len(images)
})
except Exception as e:
return jsonify({'error': f'PDF-Verarbeitungsfehler: {str(e)}'}), 500
@app.route('/api/ollama/status', methods=['GET'])
@_loginRequired
def _ollamaStatus():
"""Prüft ob Ollama erreichbar ist und listet verfügbare Modelle"""
ollamaUrl = request.args.get('url', 'http://localhost:11434')
return {
"success": True,
"images": images,
"totalExtracted": len(images)
}
try:
# Prüfe ob Ollama läuft
response = requests.get(f'{ollamaUrl}/api/tags', timeout=5)
if response.status_code != 200:
return jsonify({
'connected': False,
'error': f'Ollama antwortet mit Status {response.status_code}'
})
data = response.json()
models = [m.get('name', '') for m in data.get('models', [])]
# Filtere Vision-Modelle (enthalten oft 'vision', 'vl', 'llava' im Namen)
visionModels = [m for m in models if any(x in m.lower() for x in ['vision', 'vl', 'llava', 'bakllava'])]
return jsonify({
'connected': True,
'models': models,
'visionModels': visionModels,
'totalModels': len(models)
})
except requests.exceptions.ConnectionError:
return jsonify({
'connected': False,
'error': 'Keine Verbindung zu Ollama. Ist Ollama gestartet?'
})
except Exception as e:
return jsonify({
'connected': False,
'error': str(e)
})
raise HTTPException(
status_code=500,
detail=f"PDF-Verarbeitungsfehler: {str(e)}"
)
# ============================================================================
# Web UI Routes (Optional - for direct browser access)
# ============================================================================
@app.get("/", response_class=HTMLResponse, tags=["Web UI"])
async def _index(request: Request):
"""Main page with document scanner UI."""
return templates.TemplateResponse("index.html", {"request": request})
@app.get("/login", response_class=HTMLResponse, tags=["Web UI"])
async def _loginPage(request: Request):
"""Login page."""
return templates.TemplateResponse("login.html", {"request": request})
# ============================================================================
# Main
# ============================================================================
if __name__ == '__main__':
print("\n" + "="*60)
print(" Belegscanner - KI-Dokumentenanalyse")
print(" Powered by Poweron")
print("="*60)
print("\n Server läuft auf: http://localhost:5000")
print(" CORS ist aktiviert für alle Origins")
print("\n Drücke Ctrl+C zum Beenden")
print("="*60 + "\n")
if __name__ == "__main__":
import uvicorn
app.run(host='0.0.0.0', port=5000, debug=True)
print("\n" + "=" * 60)
print(" Private-LLM Service - KI-Dokumentenanalyse")
print(" Powered by PowerOn")
print("=" * 60)
print(f"\n Server läuft auf: http://localhost:5000")
print(f" API Docs: http://localhost:5000/docs")
print(f" Ollama URL: {CONFIG['ollamaUrl']}")
print("\n Drücke Ctrl+C zum Beenden")
print("=" * 60 + "\n")
uvicorn.run(app, host="0.0.0.0", port=5000)

21
config.ini Normal file
View file

@ -0,0 +1,21 @@
# Private-LLM Configuration
# =========================
# API Key für eingehende Requests (Gateway authentifiziert sich damit)
# Muss mit Connector_AiPrivateLlm_API_SECRET in Gateway env-Files übereinstimmen
# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))"
PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
# Ollama Server URL
OLLAMA_URL = http://localhost:11434
# Web UI Authentication (optional, für direkten Browser-Zugriff)
AUTH_USERNAME = poweron
AUTH_PASSWORD = poweron
# FastAPI Secret Key (für Session-Management)
SECRET_KEY = c8bc1cede035171dedf01f220623e185aa8b83670ef607e97d928d271ac94200
# Rate Limiting
RATE_LIMIT_REQUESTS_PER_MINUTE = 60
RATE_LIMIT_BURST_SIZE = 10

16
docu/requirements.txt Normal file
View file

@ -0,0 +1,16 @@
# FastAPI and dependencies
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.6
httpx>=0.26.0
pydantic>=2.5.0
# Templating for web UI
jinja2>=3.1.0
aiofiles>=23.0.0
# PDF Support
pymupdf>=1.24.0
# Production server
gunicorn>=21.0.0

View file

@ -1,6 +0,0 @@
flask>=3.0.0
flask-cors>=4.0.0
requests>=2.31.0
werkzeug>=3.0.0
pymupdf>=1.24.0
gunicorn>=21.0.0

View file

@ -1,8 +1,8 @@
@echo off
chcp 65001 >nul
echo ============================================================
echo Belegscanner - KI-Dokumentenanalyse
echo Powered by Poweron
echo Private-LLM Service - KI-Dokumentenanalyse
echo Powered by PowerOn (FastAPI + Uvicorn)
echo ============================================================
echo.
@ -31,11 +31,14 @@ REM Dependencies installieren
echo [2/3] Installiere Python Dependencies...
pip install -r requirements.txt --quiet
echo [3/3] Starte Python Flask Server...
echo [3/3] Starte FastAPI Server (Uvicorn)...
echo.
echo Server URL: http://localhost:5000
echo API Docs: http://localhost:5000/docs
echo OpenAPI JSON: http://localhost:5000/openapi.json
echo.
echo Server URL: http://localhost:5000
echo Druecke Ctrl+C zum Beenden
echo.
REM Flask starten
python app.py
REM FastAPI mit Uvicorn starten
uvicorn app:app --host 0.0.0.0 --port 5000 --reload

View file

@ -1,9 +1,9 @@
# Belegscanner - Python Web App Starter
# Poweron Design
# Private-LLM Service - FastAPI Starter
# Powered by PowerOn
Write-Host "============================================================" -ForegroundColor Cyan
Write-Host " Belegscanner - KI-Dokumentenanalyse" -ForegroundColor White
Write-Host " Powered by Poweron" -ForegroundColor Magenta
Write-Host " Private-LLM Service - KI-Dokumentenanalyse" -ForegroundColor White
Write-Host " Powered by PowerOn (FastAPI + Uvicorn)" -ForegroundColor Magenta
Write-Host "============================================================" -ForegroundColor Cyan
Write-Host ""
@ -44,11 +44,14 @@ Start-Sleep -Seconds 2
Write-Host "[2/3] Installiere Python Dependencies..." -ForegroundColor Yellow
pip install -r requirements.txt --quiet
Write-Host "[3/3] Starte Flask Server..." -ForegroundColor Yellow
Write-Host "[3/3] Starte FastAPI Server (Uvicorn)..." -ForegroundColor Yellow
Write-Host ""
Write-Host "Server URL: http://localhost:5000" -ForegroundColor Green
Write-Host "API Docs: http://localhost:5000/docs" -ForegroundColor Green
Write-Host "OpenAPI JSON: http://localhost:5000/openapi.json" -ForegroundColor Gray
Write-Host ""
Write-Host "Server URL: http://localhost:5000" -ForegroundColor Green
Write-Host "Druecke Ctrl+C zum Beenden" -ForegroundColor Gray
Write-Host ""
# Flask Server starten
python app.py
# FastAPI Server mit Uvicorn starten
uvicorn app:app --host 0.0.0.0 --port 5000 --reload

BIN
t1.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 258 KiB

View file

@ -807,6 +807,31 @@ Falls ein Feld nicht erkennbar ist, setze den Wert auf null.</textarea>
// Ollama Status prüfen
checkOllamaBtn.addEventListener('click', _checkOllamaStatus);
// PowerOn Model Definitions (must match app.py MODEL_MAPPING)
const POWERON_MODELS = [
{
name: 'poweron-vision-general',
displayName: 'PowerOn Vision General',
description: 'Handschrift & allgemeine Bilder (qwen2.5vl:7b)',
isVision: true,
ollamaModel: 'qwen2.5vl:7b'
},
{
name: 'poweron-vision-deep',
displayName: 'PowerOn Vision Deep',
description: 'Rechnungen, Belege, Dokumente (granite3.2-vision)',
isVision: true,
ollamaModel: 'granite3.2-vision'
},
{
name: 'poweron-ocr-general',
displayName: 'PowerOn OCR General',
description: 'Text-Extraktion / OCR (deepseek-ocr)',
isVision: true,
ollamaModel: 'deepseek-ocr'
}
];
async function _checkOllamaStatus() {
ollamaStatusDiv.style.display = 'block';
ollamaStatusDiv.className = 'ollama-status loading';
@ -819,45 +844,31 @@ Falls ein Feld nicht erkennbar ist, setze den Wert auf null.</textarea>
if (result.connected) {
ollamaStatusDiv.className = 'ollama-status success';
// Modelle in Dropdown laden
// PowerOn Modelle in Dropdown laden (nur wenn Backend-Modell verfügbar)
modelName.innerHTML = '';
if (result.visionModels && result.visionModels.length > 0) {
const availableModels = result.models || [];
const availablePowerOnModels = POWERON_MODELS.filter(pm =>
availableModels.some(m => m.startsWith(pm.ollamaModel.split(':')[0]))
);
if (availablePowerOnModels.length > 0) {
const optGroup = document.createElement('optgroup');
optGroup.label = 'Vision Modelle (empfohlen)';
result.visionModels.forEach(model => {
optGroup.label = 'PowerOn Modelle';
availablePowerOnModels.forEach(model => {
const opt = document.createElement('option');
opt.value = model;
opt.textContent = model;
opt.value = model.name;
opt.textContent = `${model.displayName}`;
opt.title = model.description;
optGroup.appendChild(opt);
});
modelName.appendChild(optGroup);
// Erstes Modell auswählen
modelName.value = availablePowerOnModels[0].name;
}
if (result.models && result.models.length > 0) {
const otherModels = result.models.filter(m =>
!result.visionModels || !result.visionModels.includes(m)
);
if (otherModels.length > 0) {
const optGroup = document.createElement('optgroup');
optGroup.label = 'Andere Modelle';
otherModels.forEach(model => {
const opt = document.createElement('option');
opt.value = model;
opt.textContent = model;
optGroup.appendChild(opt);
});
modelName.appendChild(optGroup);
}
}
// Erstes Vision-Modell auswählen falls vorhanden
if (result.visionModels && result.visionModels.length > 0) {
modelName.value = result.visionModels[0];
}
ollamaStatusDiv.innerHTML = `✓ Verbunden - ${result.totalModels} Modelle gefunden` +
(result.visionModels?.length ? ` (${result.visionModels.length} Vision-Modelle)` : '');
ollamaStatusDiv.innerHTML = `✓ Verbunden - ${availablePowerOnModels.length} PowerOn Modelle verfügbar`;
// Button-Status nach Modell-Laden aktualisieren
_updateAnalyzeButtonState();
@ -875,8 +886,12 @@ Falls ein Feld nicht erkennbar ist, setze den Wert auf null.</textarea>
// Helper: Prüft ob Modell ein Vision-Modell ist
function _isVisionModel(model) {
if (!model) return true; // Default: als Vision behandeln
// Check PowerOn models first
const powerOnModel = POWERON_MODELS.find(pm => pm.name === model);
if (powerOnModel) return powerOnModel.isVision;
// Fallback for direct Ollama model names
const modelLower = model.toLowerCase();
return ['vision', 'vl', 'llava', 'bakllava'].some(indicator => modelLower.includes(indicator));
return ['vision', 'vl', 'llava', 'bakllava', 'granite', 'deepseek-ocr'].some(indicator => modelLower.includes(indicator));
}
// Button-Status basierend auf Modell und Bild aktualisieren