service-llm-private/routeApi.py
ValueOn AG b15d283941
All checks were successful
Deploy LLM Service / deploy (push) Successful in 23s
cp adapted to 2026 poweron
2026-06-09 09:54:11 +02:00

256 lines
8.1 KiB
Python

# Copyright (c) 2026 PowerOn AG
# All rights reserved.
"""API routes for Private-LLM: health, models, analyze, PDF extract, Ollama status."""
import base64
import json
import re
import logging
from typing import Optional, List
import httpx
from fastapi import APIRouter, HTTPException, Depends, Header
from config import (
CONFIG, MODEL_MAPPING, PDF_SUPPORT,
rateLimiter,
_isVisionModel, _getInternalModelName,
_extractImagesFromPdf, _renderPdfPageAsImage,
_verifyApiKey,
AnalyzeRequest, AnalyzeResponse,
PdfExtractRequest, ModelInfo,
HealthResponse, OllamaStatusResponse,
)
logger = logging.getLogger(__name__)
router = APIRouter(tags=["API"])
@router.get("/api/health", response_model=HealthResponse, tags=["System"])
async def _healthCheck():
"""Health check endpoint."""
ollamaConnected = False
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
ollamaConnected = response.status_code == 200
except Exception:
pass
return HealthResponse(
status="ok",
service="private-llm",
pdfSupport=PDF_SUPPORT,
ollamaConnected=ollamaConnected
)
@router.get("/api/models", response_model=List[ModelInfo], tags=["Models"])
async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
"""List available models with pricing."""
models = []
for externalName, internalName in MODEL_MAPPING.items():
isVision = _isVisionModel(internalName)
pricePerCall = 0.10 if isVision else 0.01
models.append(ModelInfo(
name=externalName,
internalName=internalName,
isVision=isVision,
pricePerCall=pricePerCall
))
return models
@router.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
async def _ollamaStatus():
"""Check Ollama connection status and list available models."""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags")
if response.status_code != 200:
return OllamaStatusResponse(
connected=False,
error=f"Ollama responded with status {response.status_code}"
)
data = response.json()
models = [m.get("name", "") for m in data.get("models", [])]
visionModels = [m for m in models if _isVisionModel(m)]
return OllamaStatusResponse(
connected=True,
models=models,
visionModels=visionModels,
totalModels=len(models)
)
except httpx.ConnectError:
return OllamaStatusResponse(
connected=False,
error="Keine Verbindung zu Ollama. Ist Ollama gestartet?"
)
except Exception as e:
return OllamaStatusResponse(
connected=False,
error=str(e)
)
@router.post("/api/analyze", response_model=AnalyzeResponse, tags=["AI"])
async def _analyzeDocument(
request: AnalyzeRequest,
xApiKey: Optional[str] = Header(None, alias="X-API-Key")
):
"""
Analyze a document with AI Vision API.
Supports both vision models (with images) and text models (without images).
Authentication:
- Gateway calls: Must include X-API-Key header
- Test UI calls: No auth required (same-origin)
Rate limiting is applied when API key is provided.
"""
if xApiKey:
if CONFIG["apiKey"] and xApiKey != CONFIG["apiKey"]:
raise HTTPException(status_code=401, detail="Invalid API key")
allowed, info = rateLimiter.isAllowed(xApiKey)
if not allowed:
raise HTTPException(
status_code=429,
detail=f"Rate limit exceeded. Retry after {info['retryAfter']} seconds.",
headers={"Retry-After": str(int(info["retryAfter"]) + 1)},
)
try:
internalModelName = _getInternalModelName(request.modelName)
isVision = _isVisionModel(internalModelName)
if isVision and not request.imageBase64:
raise HTTPException(
status_code=400,
detail="Kein Bild übermittelt (erforderlich für Vision-Modelle)"
)
if not request.prompt:
raise HTTPException(status_code=400, detail="Kein Prompt übermittelt")
# Server has 31GB RAM + 22GB GPU - vision models need smaller context
modelContextLengths = {
"qwen2.5:7b": 8192,
"qwen2.5vl:7b": 4096,
"granite3.2-vision": 4096,
"granite3.2-vision:latest": 4096,
"deepseek-ocr": 4096,
"deepseek-ocr:latest": 4096,
}
numCtx = modelContextLengths.get(internalModelName, 4096)
requestBody = {
"model": internalModelName,
"prompt": request.prompt,
"stream": False,
"options": {
"num_ctx": numCtx
}
}
if request.imageBase64:
requestBody["images"] = [request.imageBase64]
async with httpx.AsyncClient(timeout=3600.0) as client:
response = await client.post(
f"{CONFIG['ollamaUrl']}/api/generate",
json=requestBody
)
if response.status_code == 404:
raise HTTPException(
status_code=404,
detail=f'Modell "{internalModelName}" nicht gefunden. Bitte installieren mit: ollama pull {internalModelName}'
)
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"Ollama API Fehler: {response.status_code} - {response.text[:200]}"
)
responseData = response.json()
responseText = responseData.get("response", "")
extractedData = None
jsonMatch = re.search(r"\{[\s\S]*\}", responseText)
if jsonMatch:
try:
extractedData = json.loads(jsonMatch.group())
except json.JSONDecodeError:
extractedData = None
if extractedData is None:
extractedData = {"response": responseText.strip()}
return AnalyzeResponse(
success=True,
data=extractedData,
rawResponse=responseText
)
except httpx.TimeoutException:
return AnalyzeResponse(
success=False,
error="Zeitüberschreitung bei der Ollama API"
)
except httpx.ConnectError:
return AnalyzeResponse(
success=False,
error="Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error analyzing document: {e}")
return AnalyzeResponse(
success=False,
error=f"Unerwarteter Fehler: {str(e)}"
)
@router.post("/api/pdf/extract", tags=["PDF"])
async def _extractPdfImages(request: PdfExtractRequest):
"""
Extract images from a PDF.
No API key required - this endpoint is for local test UI only,
not used by gateway (gateway sends images directly).
"""
if not PDF_SUPPORT:
raise HTTPException(
status_code=501,
detail="PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf"
)
try:
pdfBytes = base64.b64decode(request.pdfBase64)
if request.page is not None:
result = _renderPdfPageAsImage(pdfBytes, request.page - 1)
return {"success": True, "image": result}
else:
images = _extractImagesFromPdf(pdfBytes, maxPages=5)
return {
"success": True,
"images": images,
"totalExtracted": len(images)
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"PDF-Verarbeitungsfehler: {str(e)}"
)