diff --git a/app.py b/app.py index f24d6ce..857ba88 100644 --- a/app.py +++ b/app.py @@ -1,17 +1,33 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. """ -Belegscanner - KI-Dokumentenanalyse -Python Flask Web App mit CORS-Unterstützung und Poweron Design +Private-LLM Service - FastAPI Web App +Provides AI model endpoints for OCR and Vision processing via Ollama. + +Models exposed: +- poweron-ocr-general (deepseek) +- poweron-vision-general (qwen2.5) +- poweron-vision-deep (granite3.2) """ -from flask import Flask, render_template, request, jsonify, session, redirect, url_for -from flask_cors import CORS -from functools import wraps -import requests +import os +import sys import base64 import json import re -import io -import os +import logging +import time +from collections import defaultdict +from typing import Optional, List, Dict, Any +from contextlib import asynccontextmanager + +from fastapi import FastAPI, HTTPException, Depends, Header, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel, Field +import httpx # PDF Support try: @@ -22,79 +38,243 @@ except ImportError: print("WARNUNG: PyMuPDF nicht installiert. PDF-Support deaktiviert.") print("Installieren mit: pip install pymupdf") -app = Flask(__name__) -app.secret_key = os.environ.get('SECRET_KEY', 'poweron-secret-key-change-in-production') -CORS(app, supports_credentials=True) # CORS für alle Routen aktivieren +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +logger = logging.getLogger(__name__) # ============================================================================ -# Authentication +# Configuration # ============================================================================ -# Einfache Credentials (für minimalen Schutz) -AUTH_USERNAME = os.environ.get('AUTH_USERNAME', 'poweron') -AUTH_PASSWORD = os.environ.get('AUTH_PASSWORD', 'poweron') +def _loadConfig() -> Dict[str, Any]: + """Load configuration from config.ini file.""" + configPath = os.path.join(os.path.dirname(__file__), "config.ini") + config = { + "apiKey": None, + "ollamaUrl": "http://localhost:11434", + "authUsername": "poweron", + "authPassword": "poweron", + "secretKey": "poweron-secret-key-change-in-production", + "rateLimitRequestsPerMinute": 60, + "rateLimitBurstSize": 10, + } + + if os.path.exists(configPath): + try: + with open(configPath, "r") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + if "=" in line: + key, value = line.split("=", 1) + key = key.strip() + value = value.strip() + + # Map config keys + if key == "PRIVATE_LLM_API_KEY": + config["apiKey"] = value + elif key == "OLLAMA_URL": + config["ollamaUrl"] = value + elif key == "AUTH_USERNAME": + config["authUsername"] = value + elif key == "AUTH_PASSWORD": + config["authPassword"] = value + elif key == "SECRET_KEY": + config["secretKey"] = value + elif key == "RATE_LIMIT_REQUESTS_PER_MINUTE": + config["rateLimitRequestsPerMinute"] = int(value) + elif key == "RATE_LIMIT_BURST_SIZE": + config["rateLimitBurstSize"] = int(value) + except Exception as e: + logger.warning(f"Error loading config.ini: {e}") + + # Override with environment variables if set + config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"]) + config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"]) + config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"]) + config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"]) + config["secretKey"] = os.environ.get("SECRET_KEY", config["secretKey"]) + config["rateLimitRequestsPerMinute"] = int(os.environ.get("RATE_LIMIT_REQUESTS_PER_MINUTE", config["rateLimitRequestsPerMinute"])) + config["rateLimitBurstSize"] = int(os.environ.get("RATE_LIMIT_BURST_SIZE", config["rateLimitBurstSize"])) + + return config + +CONFIG = _loadConfig() -def _loginRequired(f): - """Decorator für geschützte Routen""" - @wraps(f) - def decorated_function(*args, **kwargs): - if not session.get('logged_in'): - # Bei API-Calls JSON zurückgeben, sonst redirect - if request.path.startswith('/api/'): - return jsonify({'error': 'Nicht autorisiert', 'login_required': True}), 401 - return redirect(url_for('_login')) - return f(*args, **kwargs) - return decorated_function +# ============================================================================ +# Rate Limiting (Token Bucket Algorithm) +# ============================================================================ +class RateLimiter: + """ + Token bucket rate limiter with per-API-key tracking. + + Each API key gets its own bucket. Tokens are added at a constant rate + (requestsPerMinute / 60 per second) up to a maximum burst size. + """ + + def __init__(self, requestsPerMinute: int = 60, burstSize: int = 10): + self.requestsPerMinute = requestsPerMinute + self.burstSize = burstSize + self.tokensPerSecond = requestsPerMinute / 60.0 + + # Track tokens and last update time per API key + # Format: {apiKey: {"tokens": float, "lastUpdate": float}} + self._buckets: Dict[str, Dict[str, float]] = defaultdict( + lambda: {"tokens": burstSize, "lastUpdate": time.time()} + ) + + def _refillTokens(self, bucket: Dict[str, float]) -> None: + """Refill tokens based on elapsed time.""" + now = time.time() + elapsed = now - bucket["lastUpdate"] + bucket["tokens"] = min( + self.burstSize, + bucket["tokens"] + elapsed * self.tokensPerSecond + ) + bucket["lastUpdate"] = now + + def isAllowed(self, apiKey: str) -> tuple[bool, Dict[str, Any]]: + """ + Check if a request is allowed and consume a token if so. + + Returns: + Tuple of (allowed: bool, info: dict with remaining tokens and retry_after) + """ + bucket = self._buckets[apiKey] + self._refillTokens(bucket) + + if bucket["tokens"] >= 1.0: + bucket["tokens"] -= 1.0 + return True, { + "remaining": int(bucket["tokens"]), + "limit": self.requestsPerMinute, + "resetSeconds": 60 + } + else: + # Calculate when the next token will be available + retryAfter = (1.0 - bucket["tokens"]) / self.tokensPerSecond + return False, { + "remaining": 0, + "limit": self.requestsPerMinute, + "retryAfter": round(retryAfter, 1), + "resetSeconds": 60 + } + + def cleanup(self, maxAgeSeconds: int = 3600) -> int: + """Remove stale buckets to prevent memory growth.""" + now = time.time() + staleKeys = [ + key for key, bucket in self._buckets.items() + if now - bucket["lastUpdate"] > maxAgeSeconds + ] + for key in staleKeys: + del self._buckets[key] + return len(staleKeys) + + +# Global rate limiter instance +rateLimiter = RateLimiter( + requestsPerMinute=CONFIG["rateLimitRequestsPerMinute"], + burstSize=CONFIG["rateLimitBurstSize"] +) + +# Model mapping: external name -> internal Ollama model name +# Production models (optimized for 32GB RAM server): +# - deepseek-ocr: 3.34B params, 8K context, ~6.7GB RAM +# - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM +# - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM +MODEL_MAPPING = { + "poweron-ocr-general": "deepseek-ocr", + "poweron-vision-general": "qwen2.5vl:7b", + "poweron-vision-deep": "granite3.2-vision", +} + +# Reverse mapping for lookups +INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()} + +# ============================================================================ +# Request/Response Models +# ============================================================================ + +class AnalyzeRequest(BaseModel): + """Request model for document analysis.""" + imageBase64: Optional[str] = Field(default=None, description="Base64 encoded image") + prompt: str = Field(description="Analysis prompt") + modelName: str = Field(default="poweron-vision-general", description="Model to use") + +class AnalyzeResponse(BaseModel): + """Response model for document analysis.""" + success: bool = Field(description="Whether the analysis was successful") + data: Optional[Dict[str, Any]] = Field(default=None, description="Extracted data") + rawResponse: Optional[str] = Field(default=None, description="Raw model response") + error: Optional[str] = Field(default=None, description="Error message if failed") + +class PdfExtractRequest(BaseModel): + """Request model for PDF extraction.""" + pdfBase64: str = Field(description="Base64 encoded PDF") + page: Optional[int] = Field(default=None, description="Specific page number (1-indexed)") + +class ModelInfo(BaseModel): + """Model information.""" + name: str = Field(description="External model name") + internalName: str = Field(description="Internal Ollama model name") + isVision: bool = Field(description="Whether it's a vision model") + pricePerCall: float = Field(description="Price per call in CHF") + +class HealthResponse(BaseModel): + """Health check response.""" + status: str + service: str + pdfSupport: bool + ollamaConnected: bool + +class OllamaStatusResponse(BaseModel): + """Ollama status response.""" + connected: bool + models: Optional[List[str]] = None + visionModels: Optional[List[str]] = None + totalModels: Optional[int] = None + error: Optional[str] = None # ============================================================================ # PDF Helper Functions # ============================================================================ -def _extractImagesFromPdf(pdfBytes, maxPages=5): - """ - Extrahiert Bilder aus einem PDF. - Gibt eine Liste von Base64-kodierten Bildern zurück. - """ +def _extractImagesFromPdf(pdfBytes: bytes, maxPages: int = 5) -> List[Dict[str, Any]]: + """Extract images from a PDF.""" if not PDF_SUPPORT: raise Exception("PDF-Support nicht verfügbar. Bitte PyMuPDF installieren.") images = [] - - # PDF öffnen doc = fitz.open(stream=pdfBytes, filetype="pdf") - - # Anzahl der Seiten begrenzen numPages = min(len(doc), maxPages) for pageNum in range(numPages): page = doc[pageNum] - - # Seite als Bild rendern (höhere Auflösung für bessere OCR) - mat = fitz.Matrix(2.0, 2.0) # 2x Zoom für bessere Qualität + mat = fitz.Matrix(2.0, 2.0) # 2x Zoom for better quality pix = page.get_pixmap(matrix=mat) - - # In PNG konvertieren imgBytes = pix.tobytes("png") - imgBase64 = base64.b64encode(imgBytes).decode('utf-8') + imgBase64 = base64.b64encode(imgBytes).decode("utf-8") images.append({ - 'page': pageNum + 1, - 'base64': imgBase64, - 'width': pix.width, - 'height': pix.height + "page": pageNum + 1, + "base64": imgBase64, + "width": pix.width, + "height": pix.height }) doc.close() - return images - -def _renderPdfPageAsImage(pdfBytes, pageNum=0, zoom=2.0): - """ - Rendert eine einzelne PDF-Seite als Bild. - """ +def _renderPdfPageAsImage(pdfBytes: bytes, pageNum: int = 0, zoom: float = 2.0) -> Dict[str, Any]: + """Render a single PDF page as an image.""" if not PDF_SUPPORT: raise Exception("PDF-Support nicht verfügbar.") @@ -106,267 +286,399 @@ def _renderPdfPageAsImage(pdfBytes, pageNum=0, zoom=2.0): page = doc[pageNum] mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) - imgBytes = pix.tobytes("png") - imgBase64 = base64.b64encode(imgBytes).decode('utf-8') + imgBase64 = base64.b64encode(imgBytes).decode("utf-8") result = { - 'base64': imgBase64, - 'width': pix.width, - 'height': pix.height, - 'page': pageNum + 1, - 'totalPages': len(doc) + "base64": imgBase64, + "width": pix.width, + "height": pix.height, + "page": pageNum + 1, + "totalPages": len(doc) } doc.close() - return result # ============================================================================ # Model Helper Functions # ============================================================================ -def _isVisionModel(modelName): - """ - Prüft ob ein Modell ein Vision-Modell ist basierend auf Namenskonventionen. - Vision-Modelle enthalten typischerweise 'vision', 'vl', 'llava', 'bakllava' im Namen. - """ +def _isVisionModel(modelName: str) -> bool: + """Check if a model is a vision model based on naming conventions.""" if not modelName: return False modelLower = modelName.lower() - visionIndicators = ['vision', 'vl', 'llava', 'bakllava'] + visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"] return any(indicator in modelLower for indicator in visionIndicators) +def _getInternalModelName(externalName: str) -> str: + """Get the internal Ollama model name from external name.""" + return MODEL_MAPPING.get(externalName, externalName) + +def _getExternalModelName(internalName: str) -> str: + """Get the external model name from internal Ollama name.""" + return INTERNAL_TO_EXTERNAL.get(internalName, internalName) # ============================================================================ -# Routes +# Authentication & Rate Limiting # ============================================================================ -@app.route('/login', methods=['GET', 'POST']) -def _login(): - """Login-Seite""" - error = None - if request.method == 'POST': - username = request.form.get('username', '') - password = request.form.get('password', '') - - if username == AUTH_USERNAME and password == AUTH_PASSWORD: - session['logged_in'] = True - session['username'] = username - return redirect(url_for('_index')) - else: - error = 'Ungültige Anmeldedaten' +async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")) -> str: + """Verify the API key from header and return it for rate limiting.""" + if not CONFIG["apiKey"]: + # No API key configured, allow all requests (development mode) + logger.warning("No API key configured - running in development mode") + return "dev-mode" - return render_template('login.html', error=error) + if not xApiKey: + raise HTTPException(status_code=401, detail="API key required") + + if xApiKey != CONFIG["apiKey"]: + raise HTTPException(status_code=401, detail="Invalid API key") + + return xApiKey -@app.route('/logout') -def _logout(): - """Logout""" - session.clear() - return redirect(url_for('_login')) +async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str: + """Check rate limit for the authenticated API key.""" + allowed, info = rateLimiter.isAllowed(apiKey) + + if not allowed: + raise HTTPException( + status_code=429, + detail={ + "error": "Rate limit exceeded", + "message": f"Too many requests. Please retry after {info['retryAfter']} seconds.", + "retryAfter": info["retryAfter"], + "limit": info["limit"], + "remaining": info["remaining"] + }, + headers={ + "Retry-After": str(int(info["retryAfter"])), + "X-RateLimit-Limit": str(info["limit"]), + "X-RateLimit-Remaining": str(info["remaining"]), + "X-RateLimit-Reset": str(info["resetSeconds"]) + } + ) + + return apiKey +# ============================================================================ +# Application Lifecycle +# ============================================================================ -@app.route('/') -@_loginRequired -def _index(): - """Hauptseite mit dem Belegscanner UI""" - return render_template('index.html') +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan handler.""" + logger.info("Private-LLM Service starting up...") + logger.info(f"Ollama URL: {CONFIG['ollamaUrl']}") + logger.info(f"API Key configured: {'Yes' if CONFIG['apiKey'] else 'No (development mode)'}") + logger.info(f"PDF Support: {'Enabled' if PDF_SUPPORT else 'Disabled'}") + yield + logger.info("Private-LLM Service shutting down...") +# ============================================================================ +# FastAPI Application +# ============================================================================ -@app.route('/api/analyze', methods=['POST']) -@_loginRequired -def _analyzeDocument(): +app = FastAPI( + title="PowerOn Private-LLM Service", + description="AI model endpoints for OCR and Vision processing", + version="1.0.0", + lifespan=lifespan, +) + +# CORS Configuration - Allow gateway instances +ALLOWED_ORIGINS = [ + "http://localhost:8000", + "http://localhost:8080", + "http://localhost:5000", + "http://127.0.0.1:8000", + "http://127.0.0.1:8080", + "http://127.0.0.1:5000", +] + +# Add production origins +PRODUCTION_PATTERNS = [ + "poweron.swiss", + "poweron-center.net", +] + +# Build full origins list with https variants +for pattern in PRODUCTION_PATTERNS: + ALLOWED_ORIGINS.extend([ + f"https://{pattern}", + f"https://www.{pattern}", + f"https://api.{pattern}", + f"https://gateway.{pattern}", + f"https://app.{pattern}", + f"https://nyla.{pattern}", + f"https://playground.{pattern}", + ]) + +# Allow all subdomains via regex in middleware +app.add_middleware( + CORSMiddleware, + allow_origins=ALLOWED_ORIGINS, + allow_origin_regex=r"https://.*\.(poweron\.swiss|poweron-center\.net)", + allow_credentials=True, + allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], + allow_headers=["*"], + expose_headers=["*"], + max_age=86400, +) + +# Static files and templates (for web UI) +app.mount("/static", StaticFiles(directory="static"), name="static") +templates = Jinja2Templates(directory="templates") + +# ============================================================================ +# API Routes +# ============================================================================ + +@app.get("/api/health", response_model=HealthResponse, tags=["System"]) +async def _healthCheck(): + """Health check endpoint.""" + ollamaConnected = False + try: + async with httpx.AsyncClient(timeout=5.0) as client: + response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags") + ollamaConnected = response.status_code == 200 + except Exception: + pass + + return HealthResponse( + status="ok", + service="private-llm", + pdfSupport=PDF_SUPPORT, + ollamaConnected=ollamaConnected + ) + +@app.get("/api/models", response_model=List[ModelInfo], tags=["Models"]) +async def _listModels(authenticated: bool = Depends(_verifyApiKey)): + """List available models with pricing.""" + models = [] + for externalName, internalName in MODEL_MAPPING.items(): + isVision = _isVisionModel(internalName) + pricePerCall = 0.10 if isVision else 0.01 # CHF pricing + + models.append(ModelInfo( + name=externalName, + internalName=internalName, + isVision=isVision, + pricePerCall=pricePerCall + )) + + return models + +@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"]) +async def _ollamaStatus(authenticated: bool = Depends(_verifyApiKey)): + """Check Ollama connection status and list available models.""" + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags") + + if response.status_code != 200: + return OllamaStatusResponse( + connected=False, + error=f"Ollama responded with status {response.status_code}" + ) + + data = response.json() + models = [m.get("name", "") for m in data.get("models", [])] + visionModels = [m for m in models if _isVisionModel(m)] + + return OllamaStatusResponse( + connected=True, + models=models, + visionModels=visionModels, + totalModels=len(models) + ) + + except httpx.ConnectError: + return OllamaStatusResponse( + connected=False, + error="Keine Verbindung zu Ollama. Ist Ollama gestartet?" + ) + except Exception as e: + return OllamaStatusResponse( + connected=False, + error=str(e) + ) + +@app.post("/api/analyze", response_model=AnalyzeResponse, tags=["AI"]) +async def _analyzeDocument( + request: AnalyzeRequest, + apiKey: str = Depends(_checkRateLimit) +): """ - Analysiert ein Dokument mit Ollama Vision API oder verarbeitet Text mit Non-Vision Modellen - Erwartet: { imageBase64 (optional bei Non-Vision), prompt, ollamaUrl, modelName } + Analyze a document with AI Vision API. + + Supports both vision models (with images) and text models (without images). """ try: - data = request.get_json() + # Get internal model name + internalModelName = _getInternalModelName(request.modelName) + isVision = _isVisionModel(internalModelName) - imageBase64 = data.get('imageBase64') - prompt = data.get('prompt') - ollamaUrl = data.get('ollamaUrl', 'http://localhost:11434') - modelName = data.get('modelName', 'qwen2.5vl:72b') + # Validate request + if isVision and not request.imageBase64: + raise HTTPException( + status_code=400, + detail="Kein Bild übermittelt (erforderlich für Vision-Modelle)" + ) - # Prüfe ob es ein Vision-Modell ist (basierend auf Namenskonvention) - isVisionModel = _isVisionModel(modelName) + if not request.prompt: + raise HTTPException(status_code=400, detail="Kein Prompt übermittelt") - # Bei Vision-Modellen ist ein Bild erforderlich - if isVisionModel and not imageBase64: - return jsonify({'error': 'Kein Bild übermittelt (erforderlich für Vision-Modelle)'}), 400 + # Model-specific context lengths (actual model limits) + modelContextLengths = { + "deepseek-ocr": 8192, # 8K context + "qwen2.5vl:7b": 32768, # Use 32K (model supports 125K but RAM limited) + "granite3.2-vision": 16000, # 16K context + } + numCtx = modelContextLengths.get(internalModelName, 8192) - if not prompt: - return jsonify({'error': 'Kein Prompt übermittelt'}), 400 - - # Request-Body erstellen + # Build request body with model-specific context window requestBody = { - 'model': modelName, - 'prompt': prompt, - 'stream': False + "model": internalModelName, + "prompt": request.prompt, + "stream": False, + "options": { + "num_ctx": numCtx + } } - # Bilder nur hinzufügen wenn vorhanden (für Vision-Modelle) - if imageBase64: - requestBody['images'] = [imageBase64] + if request.imageBase64: + requestBody["images"] = [request.imageBase64] - # Ollama API aufrufen (Timeout: 60 Minuten für grosse Modelle) - response = requests.post( - f'{ollamaUrl}/api/generate', - json=requestBody, - timeout=3600 # 60 Minuten + # Call Ollama API + async with httpx.AsyncClient(timeout=3600.0) as client: # 60 min timeout + response = await client.post( + f"{CONFIG['ollamaUrl']}/api/generate", + json=requestBody + ) + + if response.status_code == 404: + raise HTTPException( + status_code=404, + detail=f'Modell "{internalModelName}" nicht gefunden. Bitte installieren mit: ollama pull {internalModelName}' + ) + + if response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=f"Ollama API Fehler: {response.status_code} - {response.text[:200]}" + ) + + responseData = response.json() + responseText = responseData.get("response", "") + + # Try to extract JSON from response + extractedData = None + jsonMatch = re.search(r"\{[\s\S]*\}", responseText) + + if jsonMatch: + try: + extractedData = json.loads(jsonMatch.group()) + except json.JSONDecodeError: + extractedData = None + + # Wrap plain text response in JSON object + if extractedData is None: + extractedData = {"response": responseText.strip()} + + return AnalyzeResponse( + success=True, + data=extractedData, + rawResponse=responseText + ) + + except httpx.TimeoutException: + return AnalyzeResponse( + success=False, + error="Zeitüberschreitung bei der Ollama API" ) - - if response.status_code == 404: - return jsonify({ - 'error': f'Modell "{modelName}" nicht gefunden. Bitte installieren Sie es mit: ollama pull {modelName}' - }), 404 - - if response.status_code != 200: - return jsonify({ - 'error': f'Ollama API Fehler: {response.status_code} - {response.text[:200]}' - }), response.status_code - - responseData = response.json() - responseText = responseData.get('response', '') - - # Versuche JSON aus der Antwort zu extrahieren - extractedData = None - jsonMatch = re.search(r'\{[\s\S]*\}', responseText) - - if jsonMatch: - try: - extractedData = json.loads(jsonMatch.group()) - except json.JSONDecodeError: - # JSON-ähnlicher Text gefunden, aber ungültig - extractedData = None - - # Wenn kein JSON gefunden, Antwort in JSON-Objekt verpacken - if extractedData is None: - extractedData = { - 'response': responseText.strip() - } - - return jsonify({ - 'success': True, - 'data': extractedData, - 'rawResponse': responseText - }) - - except requests.exceptions.Timeout: - return jsonify({'error': 'Zeitüberschreitung bei der Ollama API'}), 504 - except requests.exceptions.ConnectionError: - return jsonify({'error': 'Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?'}), 503 - except json.JSONDecodeError as e: - return jsonify({'error': f'JSON Parse-Fehler: {str(e)}'}), 400 + except httpx.ConnectError: + return AnalyzeResponse( + success=False, + error="Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?" + ) + except HTTPException: + raise except Exception as e: - return jsonify({'error': f'Unerwarteter Fehler: {str(e)}'}), 500 + logger.error(f"Error analyzing document: {e}") + return AnalyzeResponse( + success=False, + error=f"Unerwarteter Fehler: {str(e)}" + ) - -@app.route('/api/health', methods=['GET']) -def _healthCheck(): - """Health Check Endpoint""" - return jsonify({'status': 'ok', 'service': 'belegscanner', 'pdfSupport': PDF_SUPPORT}) - - -@app.route('/api/pdf/extract', methods=['POST']) -@_loginRequired -def _extractPdfImages(): - """ - Extrahiert Bilder aus einem PDF. - Erwartet: { pdfBase64, page (optional, default: alle) } - """ +@app.post("/api/pdf/extract", tags=["PDF"]) +async def _extractPdfImages( + request: PdfExtractRequest, + authenticated: bool = Depends(_verifyApiKey) +): + """Extract images from a PDF.""" if not PDF_SUPPORT: - return jsonify({ - 'error': 'PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf' - }), 501 + raise HTTPException( + status_code=501, + detail="PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf" + ) try: - data = request.get_json() - pdfBase64 = data.get('pdfBase64') - pageNum = data.get('page') # Optional: spezifische Seite + pdfBytes = base64.b64decode(request.pdfBase64) - if not pdfBase64: - return jsonify({'error': 'Kein PDF übermittelt'}), 400 - - # Base64 dekodieren - pdfBytes = base64.b64decode(pdfBase64) - - if pageNum is not None: - # Einzelne Seite extrahieren - result = _renderPdfPageAsImage(pdfBytes, pageNum - 1) # 0-basiert - return jsonify({ - 'success': True, - 'image': result - }) + if request.page is not None: + # Extract single page + result = _renderPdfPageAsImage(pdfBytes, request.page - 1) + return {"success": True, "image": result} else: - # Alle Seiten extrahieren (max 5) + # Extract all pages (max 5) images = _extractImagesFromPdf(pdfBytes, maxPages=5) - return jsonify({ - 'success': True, - 'images': images, - 'totalExtracted': len(images) - }) - - except Exception as e: - return jsonify({'error': f'PDF-Verarbeitungsfehler: {str(e)}'}), 500 - - -@app.route('/api/ollama/status', methods=['GET']) -@_loginRequired -def _ollamaStatus(): - """Prüft ob Ollama erreichbar ist und listet verfügbare Modelle""" - ollamaUrl = request.args.get('url', 'http://localhost:11434') + return { + "success": True, + "images": images, + "totalExtracted": len(images) + } - try: - # Prüfe ob Ollama läuft - response = requests.get(f'{ollamaUrl}/api/tags', timeout=5) - - if response.status_code != 200: - return jsonify({ - 'connected': False, - 'error': f'Ollama antwortet mit Status {response.status_code}' - }) - - data = response.json() - models = [m.get('name', '') for m in data.get('models', [])] - - # Filtere Vision-Modelle (enthalten oft 'vision', 'vl', 'llava' im Namen) - visionModels = [m for m in models if any(x in m.lower() for x in ['vision', 'vl', 'llava', 'bakllava'])] - - return jsonify({ - 'connected': True, - 'models': models, - 'visionModels': visionModels, - 'totalModels': len(models) - }) - - except requests.exceptions.ConnectionError: - return jsonify({ - 'connected': False, - 'error': 'Keine Verbindung zu Ollama. Ist Ollama gestartet?' - }) except Exception as e: - return jsonify({ - 'connected': False, - 'error': str(e) - }) + raise HTTPException( + status_code=500, + detail=f"PDF-Verarbeitungsfehler: {str(e)}" + ) +# ============================================================================ +# Web UI Routes (Optional - for direct browser access) +# ============================================================================ + +@app.get("/", response_class=HTMLResponse, tags=["Web UI"]) +async def _index(request: Request): + """Main page with document scanner UI.""" + return templates.TemplateResponse("index.html", {"request": request}) + +@app.get("/login", response_class=HTMLResponse, tags=["Web UI"]) +async def _loginPage(request: Request): + """Login page.""" + return templates.TemplateResponse("login.html", {"request": request}) # ============================================================================ # Main # ============================================================================ -if __name__ == '__main__': - print("\n" + "="*60) - print(" Belegscanner - KI-Dokumentenanalyse") - print(" Powered by Poweron") - print("="*60) - print("\n Server läuft auf: http://localhost:5000") - print(" CORS ist aktiviert für alle Origins") - print("\n Drücke Ctrl+C zum Beenden") - print("="*60 + "\n") +if __name__ == "__main__": + import uvicorn - app.run(host='0.0.0.0', port=5000, debug=True) + print("\n" + "=" * 60) + print(" Private-LLM Service - KI-Dokumentenanalyse") + print(" Powered by PowerOn") + print("=" * 60) + print(f"\n Server läuft auf: http://localhost:5000") + print(f" API Docs: http://localhost:5000/docs") + print(f" Ollama URL: {CONFIG['ollamaUrl']}") + print("\n Drücke Ctrl+C zum Beenden") + print("=" * 60 + "\n") + + uvicorn.run(app, host="0.0.0.0", port=5000) diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..cfe9374 --- /dev/null +++ b/config.ini @@ -0,0 +1,21 @@ +# Private-LLM Configuration +# ========================= + +# API Key für eingehende Requests (Gateway authentifiziert sich damit) +# Muss mit Connector_AiPrivateLlm_API_SECRET in Gateway env-Files übereinstimmen +# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))" +PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0 + +# Ollama Server URL +OLLAMA_URL = http://localhost:11434 + +# Web UI Authentication (optional, für direkten Browser-Zugriff) +AUTH_USERNAME = poweron +AUTH_PASSWORD = poweron + +# FastAPI Secret Key (für Session-Management) +SECRET_KEY = c8bc1cede035171dedf01f220623e185aa8b83670ef607e97d928d271ac94200 + +# Rate Limiting +RATE_LIMIT_REQUESTS_PER_MINUTE = 60 +RATE_LIMIT_BURST_SIZE = 10 diff --git a/docu/requirements.txt b/docu/requirements.txt new file mode 100644 index 0000000..be74a22 --- /dev/null +++ b/docu/requirements.txt @@ -0,0 +1,16 @@ +# FastAPI and dependencies +fastapi>=0.109.0 +uvicorn[standard]>=0.27.0 +python-multipart>=0.0.6 +httpx>=0.26.0 +pydantic>=2.5.0 + +# Templating for web UI +jinja2>=3.1.0 +aiofiles>=23.0.0 + +# PDF Support +pymupdf>=1.24.0 + +# Production server +gunicorn>=21.0.0 diff --git a/setupserver.md b/docu/setupserver.md similarity index 100% rename from setupserver.md rename to docu/setupserver.md diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c350cd9..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -flask>=3.0.0 -flask-cors>=4.0.0 -requests>=2.31.0 -werkzeug>=3.0.0 -pymupdf>=1.24.0 -gunicorn>=21.0.0 \ No newline at end of file diff --git a/start-python.bat b/start-python.bat index 7a5b4ad..064704b 100644 --- a/start-python.bat +++ b/start-python.bat @@ -1,8 +1,8 @@ @echo off chcp 65001 >nul echo ============================================================ -echo Belegscanner - KI-Dokumentenanalyse -echo Powered by Poweron +echo Private-LLM Service - KI-Dokumentenanalyse +echo Powered by PowerOn (FastAPI + Uvicorn) echo ============================================================ echo. @@ -31,11 +31,14 @@ REM Dependencies installieren echo [2/3] Installiere Python Dependencies... pip install -r requirements.txt --quiet -echo [3/3] Starte Python Flask Server... +echo [3/3] Starte FastAPI Server (Uvicorn)... +echo. +echo Server URL: http://localhost:5000 +echo API Docs: http://localhost:5000/docs +echo OpenAPI JSON: http://localhost:5000/openapi.json echo. -echo Server URL: http://localhost:5000 echo Druecke Ctrl+C zum Beenden echo. -REM Flask starten -python app.py +REM FastAPI mit Uvicorn starten +uvicorn app:app --host 0.0.0.0 --port 5000 --reload diff --git a/start-python.ps1 b/start-python.ps1 index 7feab99..32ae31a 100644 --- a/start-python.ps1 +++ b/start-python.ps1 @@ -1,9 +1,9 @@ -# Belegscanner - Python Web App Starter -# Poweron Design +# Private-LLM Service - FastAPI Starter +# Powered by PowerOn Write-Host "============================================================" -ForegroundColor Cyan -Write-Host " Belegscanner - KI-Dokumentenanalyse" -ForegroundColor White -Write-Host " Powered by Poweron" -ForegroundColor Magenta +Write-Host " Private-LLM Service - KI-Dokumentenanalyse" -ForegroundColor White +Write-Host " Powered by PowerOn (FastAPI + Uvicorn)" -ForegroundColor Magenta Write-Host "============================================================" -ForegroundColor Cyan Write-Host "" @@ -44,11 +44,14 @@ Start-Sleep -Seconds 2 Write-Host "[2/3] Installiere Python Dependencies..." -ForegroundColor Yellow pip install -r requirements.txt --quiet -Write-Host "[3/3] Starte Flask Server..." -ForegroundColor Yellow +Write-Host "[3/3] Starte FastAPI Server (Uvicorn)..." -ForegroundColor Yellow +Write-Host "" +Write-Host "Server URL: http://localhost:5000" -ForegroundColor Green +Write-Host "API Docs: http://localhost:5000/docs" -ForegroundColor Green +Write-Host "OpenAPI JSON: http://localhost:5000/openapi.json" -ForegroundColor Gray Write-Host "" -Write-Host "Server URL: http://localhost:5000" -ForegroundColor Green Write-Host "Druecke Ctrl+C zum Beenden" -ForegroundColor Gray Write-Host "" -# Flask Server starten -python app.py +# FastAPI Server mit Uvicorn starten +uvicorn app:app --host 0.0.0.0 --port 5000 --reload diff --git a/t1.png b/t1.png deleted file mode 100644 index fc0618a..0000000 Binary files a/t1.png and /dev/null differ diff --git a/templates/index.html b/templates/index.html index fbeacf8..4a7aee0 100644 --- a/templates/index.html +++ b/templates/index.html @@ -807,6 +807,31 @@ Falls ein Feld nicht erkennbar ist, setze den Wert auf null. // Ollama Status prüfen checkOllamaBtn.addEventListener('click', _checkOllamaStatus); + // PowerOn Model Definitions (must match app.py MODEL_MAPPING) + const POWERON_MODELS = [ + { + name: 'poweron-vision-general', + displayName: 'PowerOn Vision General', + description: 'Handschrift & allgemeine Bilder (qwen2.5vl:7b)', + isVision: true, + ollamaModel: 'qwen2.5vl:7b' + }, + { + name: 'poweron-vision-deep', + displayName: 'PowerOn Vision Deep', + description: 'Rechnungen, Belege, Dokumente (granite3.2-vision)', + isVision: true, + ollamaModel: 'granite3.2-vision' + }, + { + name: 'poweron-ocr-general', + displayName: 'PowerOn OCR General', + description: 'Text-Extraktion / OCR (deepseek-ocr)', + isVision: true, + ollamaModel: 'deepseek-ocr' + } + ]; + async function _checkOllamaStatus() { ollamaStatusDiv.style.display = 'block'; ollamaStatusDiv.className = 'ollama-status loading'; @@ -819,45 +844,31 @@ Falls ein Feld nicht erkennbar ist, setze den Wert auf null. if (result.connected) { ollamaStatusDiv.className = 'ollama-status success'; - // Modelle in Dropdown laden + // PowerOn Modelle in Dropdown laden (nur wenn Backend-Modell verfügbar) modelName.innerHTML = ''; - if (result.visionModels && result.visionModels.length > 0) { + const availableModels = result.models || []; + const availablePowerOnModels = POWERON_MODELS.filter(pm => + availableModels.some(m => m.startsWith(pm.ollamaModel.split(':')[0])) + ); + + if (availablePowerOnModels.length > 0) { const optGroup = document.createElement('optgroup'); - optGroup.label = 'Vision Modelle (empfohlen)'; - result.visionModels.forEach(model => { + optGroup.label = 'PowerOn Modelle'; + availablePowerOnModels.forEach(model => { const opt = document.createElement('option'); - opt.value = model; - opt.textContent = model; + opt.value = model.name; + opt.textContent = `${model.displayName}`; + opt.title = model.description; optGroup.appendChild(opt); }); modelName.appendChild(optGroup); + + // Erstes Modell auswählen + modelName.value = availablePowerOnModels[0].name; } - if (result.models && result.models.length > 0) { - const otherModels = result.models.filter(m => - !result.visionModels || !result.visionModels.includes(m) - ); - if (otherModels.length > 0) { - const optGroup = document.createElement('optgroup'); - optGroup.label = 'Andere Modelle'; - otherModels.forEach(model => { - const opt = document.createElement('option'); - opt.value = model; - opt.textContent = model; - optGroup.appendChild(opt); - }); - modelName.appendChild(optGroup); - } - } - - // Erstes Vision-Modell auswählen falls vorhanden - if (result.visionModels && result.visionModels.length > 0) { - modelName.value = result.visionModels[0]; - } - - ollamaStatusDiv.innerHTML = `✓ Verbunden - ${result.totalModels} Modelle gefunden` + - (result.visionModels?.length ? ` (${result.visionModels.length} Vision-Modelle)` : ''); + ollamaStatusDiv.innerHTML = `✓ Verbunden - ${availablePowerOnModels.length} PowerOn Modelle verfügbar`; // Button-Status nach Modell-Laden aktualisieren _updateAnalyzeButtonState(); @@ -875,8 +886,12 @@ Falls ein Feld nicht erkennbar ist, setze den Wert auf null. // Helper: Prüft ob Modell ein Vision-Modell ist function _isVisionModel(model) { if (!model) return true; // Default: als Vision behandeln + // Check PowerOn models first + const powerOnModel = POWERON_MODELS.find(pm => pm.name === model); + if (powerOnModel) return powerOnModel.isVision; + // Fallback for direct Ollama model names const modelLower = model.toLowerCase(); - return ['vision', 'vl', 'llava', 'bakllava'].some(indicator => modelLower.includes(indicator)); + return ['vision', 'vl', 'llava', 'bakllava', 'granite', 'deepseek-ocr'].some(indicator => modelLower.includes(indicator)); } // Button-Status basierend auf Modell und Bild aktualisieren