Move all API routes, OpenAI-compatible routes, web UI routes, shared config, models, rate limiter, and auth logic into dedicated files (config.py, routeApi.py, routeOpenAi.py, routeWeb.py). app.py now serves as a clean entry point. Made-with: Cursor
256 lines
8.1 KiB
Python
256 lines
8.1 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""API routes for Private-LLM: health, models, analyze, PDF extract, Ollama status."""
|
|
|
|
import base64
|
|
import json
|
|
import re
|
|
import logging
|
|
from typing import Optional, List
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, HTTPException, Depends, Header
|
|
|
|
from config import (
|
|
CONFIG, MODEL_MAPPING, PDF_SUPPORT,
|
|
rateLimiter,
|
|
_isVisionModel, _getInternalModelName,
|
|
_extractImagesFromPdf, _renderPdfPageAsImage,
|
|
_verifyApiKey,
|
|
AnalyzeRequest, AnalyzeResponse,
|
|
PdfExtractRequest, ModelInfo,
|
|
HealthResponse, OllamaStatusResponse,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(tags=["API"])
|
|
|
|
|
|
@router.get("/api/health", response_model=HealthResponse, tags=["System"])
|
|
async def _healthCheck():
|
|
"""Health check endpoint."""
|
|
ollamaConnected = False
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
|
|
ollamaConnected = response.status_code == 200
|
|
except Exception:
|
|
pass
|
|
|
|
return HealthResponse(
|
|
status="ok",
|
|
service="private-llm",
|
|
pdfSupport=PDF_SUPPORT,
|
|
ollamaConnected=ollamaConnected
|
|
)
|
|
|
|
|
|
@router.get("/api/models", response_model=List[ModelInfo], tags=["Models"])
|
|
async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
|
|
"""List available models with pricing."""
|
|
models = []
|
|
for externalName, internalName in MODEL_MAPPING.items():
|
|
isVision = _isVisionModel(internalName)
|
|
pricePerCall = 0.10 if isVision else 0.01
|
|
|
|
models.append(ModelInfo(
|
|
name=externalName,
|
|
internalName=internalName,
|
|
isVision=isVision,
|
|
pricePerCall=pricePerCall
|
|
))
|
|
|
|
return models
|
|
|
|
|
|
@router.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
|
|
async def _ollamaStatus():
|
|
"""Check Ollama connection status and list available models."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
|
|
|
|
if response.status_code != 200:
|
|
return OllamaStatusResponse(
|
|
connected=False,
|
|
error=f"Ollama responded with status {response.status_code}"
|
|
)
|
|
|
|
data = response.json()
|
|
models = [m.get("name", "") for m in data.get("models", [])]
|
|
visionModels = [m for m in models if _isVisionModel(m)]
|
|
|
|
return OllamaStatusResponse(
|
|
connected=True,
|
|
models=models,
|
|
visionModels=visionModels,
|
|
totalModels=len(models)
|
|
)
|
|
|
|
except httpx.ConnectError:
|
|
return OllamaStatusResponse(
|
|
connected=False,
|
|
error="Keine Verbindung zu Ollama. Ist Ollama gestartet?"
|
|
)
|
|
except Exception as e:
|
|
return OllamaStatusResponse(
|
|
connected=False,
|
|
error=str(e)
|
|
)
|
|
|
|
|
|
@router.post("/api/analyze", response_model=AnalyzeResponse, tags=["AI"])
|
|
async def _analyzeDocument(
|
|
request: AnalyzeRequest,
|
|
xApiKey: Optional[str] = Header(None, alias="X-API-Key")
|
|
):
|
|
"""
|
|
Analyze a document with AI Vision API.
|
|
|
|
Supports both vision models (with images) and text models (without images).
|
|
|
|
Authentication:
|
|
- Gateway calls: Must include X-API-Key header
|
|
- Test UI calls: No auth required (same-origin)
|
|
|
|
Rate limiting is applied when API key is provided.
|
|
"""
|
|
if xApiKey:
|
|
if CONFIG["apiKey"] and xApiKey != CONFIG["apiKey"]:
|
|
raise HTTPException(status_code=401, detail="Invalid API key")
|
|
allowed, info = rateLimiter.isAllowed(xApiKey)
|
|
if not allowed:
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail=f"Rate limit exceeded. Retry after {info['retryAfter']} seconds.",
|
|
headers={"Retry-After": str(int(info["retryAfter"]) + 1)},
|
|
)
|
|
try:
|
|
internalModelName = _getInternalModelName(request.modelName)
|
|
isVision = _isVisionModel(internalModelName)
|
|
|
|
if isVision and not request.imageBase64:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Kein Bild übermittelt (erforderlich für Vision-Modelle)"
|
|
)
|
|
|
|
if not request.prompt:
|
|
raise HTTPException(status_code=400, detail="Kein Prompt übermittelt")
|
|
|
|
# Server has 31GB RAM + 22GB GPU - vision models need smaller context
|
|
modelContextLengths = {
|
|
"qwen2.5:7b": 8192,
|
|
"qwen2.5vl:7b": 4096,
|
|
"granite3.2-vision": 4096,
|
|
"granite3.2-vision:latest": 4096,
|
|
"deepseek-ocr": 4096,
|
|
"deepseek-ocr:latest": 4096,
|
|
}
|
|
numCtx = modelContextLengths.get(internalModelName, 4096)
|
|
|
|
requestBody = {
|
|
"model": internalModelName,
|
|
"prompt": request.prompt,
|
|
"stream": False,
|
|
"options": {
|
|
"num_ctx": numCtx
|
|
}
|
|
}
|
|
|
|
if request.imageBase64:
|
|
requestBody["images"] = [request.imageBase64]
|
|
|
|
async with httpx.AsyncClient(timeout=3600.0) as client:
|
|
response = await client.post(
|
|
f"{CONFIG['ollamaUrl']}/api/generate",
|
|
json=requestBody
|
|
)
|
|
|
|
if response.status_code == 404:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f'Modell "{internalModelName}" nicht gefunden. Bitte installieren mit: ollama pull {internalModelName}'
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise HTTPException(
|
|
status_code=response.status_code,
|
|
detail=f"Ollama API Fehler: {response.status_code} - {response.text[:200]}"
|
|
)
|
|
|
|
responseData = response.json()
|
|
responseText = responseData.get("response", "")
|
|
|
|
extractedData = None
|
|
jsonMatch = re.search(r"\{[\s\S]*\}", responseText)
|
|
|
|
if jsonMatch:
|
|
try:
|
|
extractedData = json.loads(jsonMatch.group())
|
|
except json.JSONDecodeError:
|
|
extractedData = None
|
|
|
|
if extractedData is None:
|
|
extractedData = {"response": responseText.strip()}
|
|
|
|
return AnalyzeResponse(
|
|
success=True,
|
|
data=extractedData,
|
|
rawResponse=responseText
|
|
)
|
|
|
|
except httpx.TimeoutException:
|
|
return AnalyzeResponse(
|
|
success=False,
|
|
error="Zeitüberschreitung bei der Ollama API"
|
|
)
|
|
except httpx.ConnectError:
|
|
return AnalyzeResponse(
|
|
success=False,
|
|
error="Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?"
|
|
)
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing document: {e}")
|
|
return AnalyzeResponse(
|
|
success=False,
|
|
error=f"Unerwarteter Fehler: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.post("/api/pdf/extract", tags=["PDF"])
|
|
async def _extractPdfImages(request: PdfExtractRequest):
|
|
"""
|
|
Extract images from a PDF.
|
|
|
|
No API key required - this endpoint is for local test UI only,
|
|
not used by gateway (gateway sends images directly).
|
|
"""
|
|
if not PDF_SUPPORT:
|
|
raise HTTPException(
|
|
status_code=501,
|
|
detail="PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf"
|
|
)
|
|
|
|
try:
|
|
pdfBytes = base64.b64decode(request.pdfBase64)
|
|
|
|
if request.page is not None:
|
|
result = _renderPdfPageAsImage(pdfBytes, request.page - 1)
|
|
return {"success": True, "image": result}
|
|
else:
|
|
images = _extractImagesFromPdf(pdfBytes, maxPages=5)
|
|
return {
|
|
"success": True,
|
|
"images": images,
|
|
"totalExtracted": len(images)
|
|
}
|
|
|
|
except Exception as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"PDF-Verarbeitungsfehler: {str(e)}"
|
|
)
|