713 lines
25 KiB
Python
713 lines
25 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Private-LLM Service - FastAPI Web App
|
|
Provides AI model endpoints for OCR and Vision processing via Ollama.
|
|
|
|
Models exposed:
|
|
- poweron-ocr-general (deepseek)
|
|
- poweron-vision-general (qwen2.5)
|
|
- poweron-vision-deep (granite3.2)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import base64
|
|
import json
|
|
import re
|
|
import logging
|
|
import time
|
|
from collections import defaultdict
|
|
from typing import Optional, List, Dict, Any
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI, HTTPException, Depends, Header, Request
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import HTMLResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from fastapi.templating import Jinja2Templates
|
|
from pydantic import BaseModel, Field
|
|
import httpx
|
|
|
|
# PDF Support
|
|
try:
|
|
import fitz # PyMuPDF
|
|
PDF_SUPPORT = True
|
|
except ImportError:
|
|
PDF_SUPPORT = False
|
|
print("WARNUNG: PyMuPDF nicht installiert. PDF-Support deaktiviert.")
|
|
print("Installieren mit: pip install pymupdf")
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ============================================================================
|
|
# Configuration
|
|
# ============================================================================
|
|
|
|
def _loadConfig() -> Dict[str, Any]:
|
|
"""Load configuration from config.ini file."""
|
|
configPath = os.path.join(os.path.dirname(__file__), "config.ini")
|
|
config = {
|
|
"apiKey": None,
|
|
"ollamaUrl": "http://localhost:11434",
|
|
"authUsername": "poweron",
|
|
"authPassword": "poweron",
|
|
"secretKey": "poweron-secret-key-change-in-production",
|
|
"rateLimitRequestsPerMinute": 60,
|
|
"rateLimitBurstSize": 10,
|
|
}
|
|
|
|
if os.path.exists(configPath):
|
|
try:
|
|
with open(configPath, "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
if "=" in line:
|
|
key, value = line.split("=", 1)
|
|
key = key.strip()
|
|
value = value.strip()
|
|
|
|
# Map config keys
|
|
if key == "PRIVATE_LLM_API_KEY":
|
|
config["apiKey"] = value
|
|
elif key == "OLLAMA_URL":
|
|
config["ollamaUrl"] = value
|
|
elif key == "AUTH_USERNAME":
|
|
config["authUsername"] = value
|
|
elif key == "AUTH_PASSWORD":
|
|
config["authPassword"] = value
|
|
elif key == "SECRET_KEY":
|
|
config["secretKey"] = value
|
|
elif key == "RATE_LIMIT_REQUESTS_PER_MINUTE":
|
|
config["rateLimitRequestsPerMinute"] = int(value)
|
|
elif key == "RATE_LIMIT_BURST_SIZE":
|
|
config["rateLimitBurstSize"] = int(value)
|
|
except Exception as e:
|
|
logger.warning(f"Error loading config.ini: {e}")
|
|
|
|
# Override with environment variables if set
|
|
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
|
|
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
|
|
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
|
|
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
|
|
config["secretKey"] = os.environ.get("SECRET_KEY", config["secretKey"])
|
|
config["rateLimitRequestsPerMinute"] = int(os.environ.get("RATE_LIMIT_REQUESTS_PER_MINUTE", config["rateLimitRequestsPerMinute"]))
|
|
config["rateLimitBurstSize"] = int(os.environ.get("RATE_LIMIT_BURST_SIZE", config["rateLimitBurstSize"]))
|
|
|
|
return config
|
|
|
|
CONFIG = _loadConfig()
|
|
|
|
|
|
# ============================================================================
|
|
# Rate Limiting (Token Bucket Algorithm)
|
|
# ============================================================================
|
|
|
|
class RateLimiter:
|
|
"""
|
|
Token bucket rate limiter with per-API-key tracking.
|
|
|
|
Each API key gets its own bucket. Tokens are added at a constant rate
|
|
(requestsPerMinute / 60 per second) up to a maximum burst size.
|
|
"""
|
|
|
|
def __init__(self, requestsPerMinute: int = 60, burstSize: int = 10):
|
|
self.requestsPerMinute = requestsPerMinute
|
|
self.burstSize = burstSize
|
|
self.tokensPerSecond = requestsPerMinute / 60.0
|
|
|
|
# Track tokens and last update time per API key
|
|
# Format: {apiKey: {"tokens": float, "lastUpdate": float}}
|
|
self._buckets: Dict[str, Dict[str, float]] = defaultdict(
|
|
lambda: {"tokens": burstSize, "lastUpdate": time.time()}
|
|
)
|
|
|
|
def _refillTokens(self, bucket: Dict[str, float]) -> None:
|
|
"""Refill tokens based on elapsed time."""
|
|
now = time.time()
|
|
elapsed = now - bucket["lastUpdate"]
|
|
bucket["tokens"] = min(
|
|
self.burstSize,
|
|
bucket["tokens"] + elapsed * self.tokensPerSecond
|
|
)
|
|
bucket["lastUpdate"] = now
|
|
|
|
def isAllowed(self, apiKey: str) -> tuple[bool, Dict[str, Any]]:
|
|
"""
|
|
Check if a request is allowed and consume a token if so.
|
|
|
|
Returns:
|
|
Tuple of (allowed: bool, info: dict with remaining tokens and retry_after)
|
|
"""
|
|
bucket = self._buckets[apiKey]
|
|
self._refillTokens(bucket)
|
|
|
|
if bucket["tokens"] >= 1.0:
|
|
bucket["tokens"] -= 1.0
|
|
return True, {
|
|
"remaining": int(bucket["tokens"]),
|
|
"limit": self.requestsPerMinute,
|
|
"resetSeconds": 60
|
|
}
|
|
else:
|
|
# Calculate when the next token will be available
|
|
retryAfter = (1.0 - bucket["tokens"]) / self.tokensPerSecond
|
|
return False, {
|
|
"remaining": 0,
|
|
"limit": self.requestsPerMinute,
|
|
"retryAfter": round(retryAfter, 1),
|
|
"resetSeconds": 60
|
|
}
|
|
|
|
def cleanup(self, maxAgeSeconds: int = 3600) -> int:
|
|
"""Remove stale buckets to prevent memory growth."""
|
|
now = time.time()
|
|
staleKeys = [
|
|
key for key, bucket in self._buckets.items()
|
|
if now - bucket["lastUpdate"] > maxAgeSeconds
|
|
]
|
|
for key in staleKeys:
|
|
del self._buckets[key]
|
|
return len(staleKeys)
|
|
|
|
|
|
# Global rate limiter instance
|
|
rateLimiter = RateLimiter(
|
|
requestsPerMinute=CONFIG["rateLimitRequestsPerMinute"],
|
|
burstSize=CONFIG["rateLimitBurstSize"]
|
|
)
|
|
|
|
# Model mapping: external name -> internal Ollama model name
|
|
# Production models (optimized for 32GB RAM server):
|
|
# - qwen2.5:7b: 7.6B params, 128K context, ~4.7GB RAM (Text)
|
|
# - qwen2.5vl:7b: 8.29B params, 125K context, ~6GB RAM (Vision)
|
|
# - granite3.2-vision: 2B params, 16K context, ~2.4GB RAM (Vision)
|
|
MODEL_MAPPING = {
|
|
"poweron-text-general": "qwen2.5:7b",
|
|
"poweron-vision-general": "qwen2.5vl:7b",
|
|
"poweron-vision-deep": "granite3.2-vision",
|
|
}
|
|
|
|
# Reverse mapping for lookups
|
|
INTERNAL_TO_EXTERNAL = {v: k for k, v in MODEL_MAPPING.items()}
|
|
|
|
# ============================================================================
|
|
# Request/Response Models
|
|
# ============================================================================
|
|
|
|
class AnalyzeRequest(BaseModel):
|
|
"""Request model for document analysis."""
|
|
imageBase64: Optional[str] = Field(default=None, description="Base64 encoded image")
|
|
prompt: str = Field(description="Analysis prompt")
|
|
modelName: str = Field(default="poweron-vision-general", description="Model to use")
|
|
|
|
class AnalyzeResponse(BaseModel):
|
|
"""Response model for document analysis."""
|
|
success: bool = Field(description="Whether the analysis was successful")
|
|
data: Optional[Dict[str, Any]] = Field(default=None, description="Extracted data")
|
|
rawResponse: Optional[str] = Field(default=None, description="Raw model response")
|
|
error: Optional[str] = Field(default=None, description="Error message if failed")
|
|
|
|
class PdfExtractRequest(BaseModel):
|
|
"""Request model for PDF extraction."""
|
|
pdfBase64: str = Field(description="Base64 encoded PDF")
|
|
page: Optional[int] = Field(default=None, description="Specific page number (1-indexed)")
|
|
|
|
class ModelInfo(BaseModel):
|
|
"""Model information."""
|
|
name: str = Field(description="External model name")
|
|
internalName: str = Field(description="Internal Ollama model name")
|
|
isVision: bool = Field(description="Whether it's a vision model")
|
|
pricePerCall: float = Field(description="Price per call in CHF")
|
|
|
|
class HealthResponse(BaseModel):
|
|
"""Health check response."""
|
|
status: str
|
|
service: str
|
|
pdfSupport: bool
|
|
ollamaConnected: bool
|
|
|
|
class OllamaStatusResponse(BaseModel):
|
|
"""Ollama status response."""
|
|
connected: bool
|
|
models: Optional[List[str]] = None
|
|
visionModels: Optional[List[str]] = None
|
|
totalModels: Optional[int] = None
|
|
error: Optional[str] = None
|
|
|
|
# ============================================================================
|
|
# PDF Helper Functions
|
|
# ============================================================================
|
|
|
|
def _extractImagesFromPdf(pdfBytes: bytes, maxPages: int = 5) -> List[Dict[str, Any]]:
|
|
"""Extract images from a PDF."""
|
|
if not PDF_SUPPORT:
|
|
raise Exception("PDF-Support nicht verfügbar. Bitte PyMuPDF installieren.")
|
|
|
|
images = []
|
|
doc = fitz.open(stream=pdfBytes, filetype="pdf")
|
|
numPages = min(len(doc), maxPages)
|
|
|
|
for pageNum in range(numPages):
|
|
page = doc[pageNum]
|
|
mat = fitz.Matrix(2.0, 2.0) # 2x Zoom for better quality
|
|
pix = page.get_pixmap(matrix=mat)
|
|
imgBytes = pix.tobytes("png")
|
|
imgBase64 = base64.b64encode(imgBytes).decode("utf-8")
|
|
|
|
images.append({
|
|
"page": pageNum + 1,
|
|
"base64": imgBase64,
|
|
"width": pix.width,
|
|
"height": pix.height
|
|
})
|
|
|
|
doc.close()
|
|
return images
|
|
|
|
def _renderPdfPageAsImage(pdfBytes: bytes, pageNum: int = 0, zoom: float = 2.0) -> Dict[str, Any]:
|
|
"""Render a single PDF page as an image."""
|
|
if not PDF_SUPPORT:
|
|
raise Exception("PDF-Support nicht verfügbar.")
|
|
|
|
doc = fitz.open(stream=pdfBytes, filetype="pdf")
|
|
|
|
if pageNum >= len(doc):
|
|
pageNum = len(doc) - 1
|
|
|
|
page = doc[pageNum]
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
pix = page.get_pixmap(matrix=mat)
|
|
imgBytes = pix.tobytes("png")
|
|
imgBase64 = base64.b64encode(imgBytes).decode("utf-8")
|
|
|
|
result = {
|
|
"base64": imgBase64,
|
|
"width": pix.width,
|
|
"height": pix.height,
|
|
"page": pageNum + 1,
|
|
"totalPages": len(doc)
|
|
}
|
|
|
|
doc.close()
|
|
return result
|
|
|
|
# ============================================================================
|
|
# Model Helper Functions
|
|
# ============================================================================
|
|
|
|
def _isVisionModel(modelName: str) -> bool:
|
|
"""Check if a model is a vision model based on naming conventions."""
|
|
if not modelName:
|
|
return False
|
|
|
|
modelLower = modelName.lower()
|
|
visionIndicators = ["vision", "vl", "llava", "bakllava", "granite"]
|
|
|
|
return any(indicator in modelLower for indicator in visionIndicators)
|
|
|
|
def _getInternalModelName(externalName: str) -> str:
|
|
"""Get the internal Ollama model name from external name."""
|
|
return MODEL_MAPPING.get(externalName, externalName)
|
|
|
|
def _getExternalModelName(internalName: str) -> str:
|
|
"""Get the external model name from internal Ollama name."""
|
|
return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
|
|
|
|
# ============================================================================
|
|
# Authentication & Rate Limiting
|
|
# ============================================================================
|
|
|
|
async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")) -> str:
|
|
"""Verify the API key from header and return it for rate limiting."""
|
|
if not CONFIG["apiKey"]:
|
|
# No API key configured, allow all requests (development mode)
|
|
logger.warning("No API key configured - running in development mode")
|
|
return "dev-mode"
|
|
|
|
if not xApiKey:
|
|
raise HTTPException(status_code=401, detail="API key required")
|
|
|
|
if xApiKey != CONFIG["apiKey"]:
|
|
raise HTTPException(status_code=401, detail="Invalid API key")
|
|
|
|
return xApiKey
|
|
|
|
|
|
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
|
|
"""Check rate limit for the authenticated API key."""
|
|
allowed, info = rateLimiter.isAllowed(apiKey)
|
|
|
|
if not allowed:
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail={
|
|
"error": "Rate limit exceeded",
|
|
"message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
|
|
"retryAfter": info["retryAfter"],
|
|
"limit": info["limit"],
|
|
"remaining": info["remaining"]
|
|
},
|
|
headers={
|
|
"Retry-After": str(int(info["retryAfter"])),
|
|
"X-RateLimit-Limit": str(info["limit"]),
|
|
"X-RateLimit-Remaining": str(info["remaining"]),
|
|
"X-RateLimit-Reset": str(info["resetSeconds"])
|
|
}
|
|
)
|
|
|
|
return apiKey
|
|
|
|
# ============================================================================
|
|
# Application Lifecycle
|
|
# ============================================================================
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Application lifespan handler."""
|
|
logger.info("Private-LLM Service starting up...")
|
|
logger.info(f"Ollama URL: {CONFIG['ollamaUrl']}")
|
|
logger.info(f"API Key configured: {'Yes' if CONFIG['apiKey'] else 'No (development mode)'}")
|
|
logger.info(f"PDF Support: {'Enabled' if PDF_SUPPORT else 'Disabled'}")
|
|
yield
|
|
logger.info("Private-LLM Service shutting down...")
|
|
|
|
# ============================================================================
|
|
# FastAPI Application
|
|
# ============================================================================
|
|
|
|
app = FastAPI(
|
|
title="PowerOn Private-LLM Service",
|
|
description="AI model endpoints for OCR and Vision processing",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# CORS Configuration - Allow gateway instances
|
|
ALLOWED_ORIGINS = [
|
|
"http://localhost:8000",
|
|
"http://localhost:8080",
|
|
"http://localhost:5000",
|
|
"http://127.0.0.1:8000",
|
|
"http://127.0.0.1:8080",
|
|
"http://127.0.0.1:5000",
|
|
]
|
|
|
|
# Add production origins
|
|
PRODUCTION_PATTERNS = [
|
|
"poweron.swiss",
|
|
"poweron-center.net",
|
|
]
|
|
|
|
# Build full origins list with https variants
|
|
for pattern in PRODUCTION_PATTERNS:
|
|
ALLOWED_ORIGINS.extend([
|
|
f"https://{pattern}",
|
|
f"https://www.{pattern}",
|
|
f"https://api.{pattern}",
|
|
f"https://gateway.{pattern}",
|
|
f"https://app.{pattern}",
|
|
f"https://nyla.{pattern}",
|
|
f"https://playground.{pattern}",
|
|
])
|
|
|
|
# Allow all subdomains via regex in middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=ALLOWED_ORIGINS,
|
|
allow_origin_regex=r"https://.*\.(poweron\.swiss|poweron-center\.net)",
|
|
allow_credentials=True,
|
|
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
|
allow_headers=["*"],
|
|
expose_headers=["*"],
|
|
max_age=86400,
|
|
)
|
|
|
|
# Static files and templates (for web UI)
|
|
app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
templates = Jinja2Templates(directory="templates")
|
|
|
|
# ============================================================================
|
|
# API Routes
|
|
# ============================================================================
|
|
|
|
@app.get("/api/health", response_model=HealthResponse, tags=["System"])
|
|
async def _healthCheck():
|
|
"""Health check endpoint."""
|
|
ollamaConnected = False
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
|
|
ollamaConnected = response.status_code == 200
|
|
except Exception:
|
|
pass
|
|
|
|
return HealthResponse(
|
|
status="ok",
|
|
service="private-llm",
|
|
pdfSupport=PDF_SUPPORT,
|
|
ollamaConnected=ollamaConnected
|
|
)
|
|
|
|
@app.get("/api/models", response_model=List[ModelInfo], tags=["Models"])
|
|
async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
|
|
"""List available models with pricing."""
|
|
models = []
|
|
for externalName, internalName in MODEL_MAPPING.items():
|
|
isVision = _isVisionModel(internalName)
|
|
pricePerCall = 0.10 if isVision else 0.01 # CHF pricing
|
|
|
|
models.append(ModelInfo(
|
|
name=externalName,
|
|
internalName=internalName,
|
|
isVision=isVision,
|
|
pricePerCall=pricePerCall
|
|
))
|
|
|
|
return models
|
|
|
|
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
|
|
async def _ollamaStatus():
|
|
"""Check Ollama connection status and list available models."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
|
|
|
|
if response.status_code != 200:
|
|
return OllamaStatusResponse(
|
|
connected=False,
|
|
error=f"Ollama responded with status {response.status_code}"
|
|
)
|
|
|
|
data = response.json()
|
|
models = [m.get("name", "") for m in data.get("models", [])]
|
|
visionModels = [m for m in models if _isVisionModel(m)]
|
|
|
|
return OllamaStatusResponse(
|
|
connected=True,
|
|
models=models,
|
|
visionModels=visionModels,
|
|
totalModels=len(models)
|
|
)
|
|
|
|
except httpx.ConnectError:
|
|
return OllamaStatusResponse(
|
|
connected=False,
|
|
error="Keine Verbindung zu Ollama. Ist Ollama gestartet?"
|
|
)
|
|
except Exception as e:
|
|
return OllamaStatusResponse(
|
|
connected=False,
|
|
error=str(e)
|
|
)
|
|
|
|
@app.post("/api/analyze", response_model=AnalyzeResponse, tags=["AI"])
|
|
async def _analyzeDocument(
|
|
request: AnalyzeRequest,
|
|
xApiKey: Optional[str] = Header(None, alias="X-API-Key")
|
|
):
|
|
"""
|
|
Analyze a document with AI Vision API.
|
|
|
|
Supports both vision models (with images) and text models (without images).
|
|
|
|
Authentication:
|
|
- Gateway calls: Must include X-API-Key header
|
|
- Test UI calls: No auth required (same-origin)
|
|
|
|
Rate limiting is applied when API key is provided.
|
|
"""
|
|
# Apply rate limiting only for authenticated requests (Gateway)
|
|
if xApiKey:
|
|
if CONFIG["apiKey"] and xApiKey != CONFIG["apiKey"]:
|
|
raise HTTPException(status_code=401, detail="Invalid API key")
|
|
# Check rate limit for authenticated requests
|
|
allowed, info = rateLimiter.isAllowed(xApiKey)
|
|
if not allowed:
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail=f"Rate limit exceeded. Retry after {info['retryAfter']} seconds."
|
|
)
|
|
try:
|
|
# Get internal model name
|
|
internalModelName = _getInternalModelName(request.modelName)
|
|
isVision = _isVisionModel(internalModelName)
|
|
|
|
# Validate request
|
|
if isVision and not request.imageBase64:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Kein Bild übermittelt (erforderlich für Vision-Modelle)"
|
|
)
|
|
|
|
if not request.prompt:
|
|
raise HTTPException(status_code=400, detail="Kein Prompt übermittelt")
|
|
|
|
# Model-specific context lengths (reduced for RAM constraints)
|
|
# Server has 31GB RAM + 22GB GPU - vision models need smaller context
|
|
modelContextLengths = {
|
|
"qwen2.5:7b": 8192, # Text model - 8K context
|
|
"qwen2.5vl:7b": 4096, # Vision model - 4K context (images use lots of RAM)
|
|
"granite3.2-vision": 4096, # Vision model - 4K context
|
|
"granite3.2-vision:latest": 4096,
|
|
"deepseek-ocr": 4096, # OCR model - 4K context
|
|
"deepseek-ocr:latest": 4096,
|
|
}
|
|
numCtx = modelContextLengths.get(internalModelName, 4096)
|
|
|
|
# Build request body with model-specific context window
|
|
requestBody = {
|
|
"model": internalModelName,
|
|
"prompt": request.prompt,
|
|
"stream": False,
|
|
"options": {
|
|
"num_ctx": numCtx
|
|
}
|
|
}
|
|
|
|
if request.imageBase64:
|
|
requestBody["images"] = [request.imageBase64]
|
|
|
|
# Call Ollama API
|
|
async with httpx.AsyncClient(timeout=3600.0) as client: # 60 min timeout
|
|
response = await client.post(
|
|
f"{CONFIG['ollamaUrl']}/api/generate",
|
|
json=requestBody
|
|
)
|
|
|
|
if response.status_code == 404:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f'Modell "{internalModelName}" nicht gefunden. Bitte installieren mit: ollama pull {internalModelName}'
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise HTTPException(
|
|
status_code=response.status_code,
|
|
detail=f"Ollama API Fehler: {response.status_code} - {response.text[:200]}"
|
|
)
|
|
|
|
responseData = response.json()
|
|
responseText = responseData.get("response", "")
|
|
|
|
# Try to extract JSON from response
|
|
extractedData = None
|
|
jsonMatch = re.search(r"\{[\s\S]*\}", responseText)
|
|
|
|
if jsonMatch:
|
|
try:
|
|
extractedData = json.loads(jsonMatch.group())
|
|
except json.JSONDecodeError:
|
|
extractedData = None
|
|
|
|
# Wrap plain text response in JSON object
|
|
if extractedData is None:
|
|
extractedData = {"response": responseText.strip()}
|
|
|
|
return AnalyzeResponse(
|
|
success=True,
|
|
data=extractedData,
|
|
rawResponse=responseText
|
|
)
|
|
|
|
except httpx.TimeoutException:
|
|
return AnalyzeResponse(
|
|
success=False,
|
|
error="Zeitüberschreitung bei der Ollama API"
|
|
)
|
|
except httpx.ConnectError:
|
|
return AnalyzeResponse(
|
|
success=False,
|
|
error="Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?"
|
|
)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing document: {e}")
|
|
return AnalyzeResponse(
|
|
success=False,
|
|
error=f"Unerwarteter Fehler: {str(e)}"
|
|
)
|
|
|
|
@app.post("/api/pdf/extract", tags=["PDF"])
|
|
async def _extractPdfImages(request: PdfExtractRequest):
|
|
"""
|
|
Extract images from a PDF.
|
|
|
|
No API key required - this endpoint is for local test UI only,
|
|
not used by gateway (gateway sends images directly).
|
|
"""
|
|
if not PDF_SUPPORT:
|
|
raise HTTPException(
|
|
status_code=501,
|
|
detail="PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf"
|
|
)
|
|
|
|
try:
|
|
pdfBytes = base64.b64decode(request.pdfBase64)
|
|
|
|
if request.page is not None:
|
|
# Extract single page
|
|
result = _renderPdfPageAsImage(pdfBytes, request.page - 1)
|
|
return {"success": True, "image": result}
|
|
else:
|
|
# Extract all pages (max 5)
|
|
images = _extractImagesFromPdf(pdfBytes, maxPages=5)
|
|
return {
|
|
"success": True,
|
|
"images": images,
|
|
"totalExtracted": len(images)
|
|
}
|
|
|
|
except Exception as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"PDF-Verarbeitungsfehler: {str(e)}"
|
|
)
|
|
|
|
# ============================================================================
|
|
# Web UI Routes (Optional - for direct browser access)
|
|
# ============================================================================
|
|
|
|
@app.get("/", response_class=HTMLResponse, tags=["Web UI"])
|
|
async def _index(request: Request):
|
|
"""Main page with document scanner UI."""
|
|
return templates.TemplateResponse("index.html", {"request": request})
|
|
|
|
@app.get("/login", response_class=HTMLResponse, tags=["Web UI"])
|
|
async def _loginPage(request: Request):
|
|
"""Login page."""
|
|
return templates.TemplateResponse("login.html", {"request": request})
|
|
|
|
@app.get("/logout", response_class=HTMLResponse, tags=["Web UI"])
|
|
async def _logout(request: Request):
|
|
"""Logout - redirect to login page."""
|
|
from starlette.responses import RedirectResponse
|
|
return RedirectResponse(url="/login", status_code=302)
|
|
|
|
# ============================================================================
|
|
# Main
|
|
# ============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
print("\n" + "=" * 60)
|
|
print(" Private-LLM Service - KI-Dokumentenanalyse")
|
|
print(" Powered by PowerOn")
|
|
print("=" * 60)
|
|
print(f"\n Server läuft auf: http://localhost:5000")
|
|
print(f" API Docs: http://localhost:5000/docs")
|
|
print(f" Ollama URL: {CONFIG['ollamaUrl']}")
|
|
print("\n Drücke Ctrl+C zum Beenden")
|
|
print("=" * 60 + "\n")
|
|
|
|
uvicorn.run(app, host="0.0.0.0", port=5000)
|