# Copyright (c) 2026 PowerOn AG # All rights reserved. """API routes for Private-LLM: health, models, analyze, PDF extract, Ollama status.""" import base64 import json import re import logging from typing import Optional, List import httpx from fastapi import APIRouter, HTTPException, Depends, Header from config import ( CONFIG, MODEL_MAPPING, PDF_SUPPORT, rateLimiter, _isVisionModel, _getInternalModelName, _extractImagesFromPdf, _renderPdfPageAsImage, _verifyApiKey, AnalyzeRequest, AnalyzeResponse, PdfExtractRequest, ModelInfo, HealthResponse, OllamaStatusResponse, ) logger = logging.getLogger(__name__) router = APIRouter(tags=["API"]) @router.get("/api/health", response_model=HealthResponse, tags=["System"]) async def _healthCheck(): """Health check endpoint.""" ollamaConnected = False try: async with httpx.AsyncClient(timeout=5.0) as client: response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags") ollamaConnected = response.status_code == 200 except Exception: pass return HealthResponse( status="ok", service="private-llm", pdfSupport=PDF_SUPPORT, ollamaConnected=ollamaConnected ) @router.get("/api/models", response_model=List[ModelInfo], tags=["Models"]) async def _listModels(authenticated: bool = Depends(_verifyApiKey)): """List available models with pricing.""" models = [] for externalName, internalName in MODEL_MAPPING.items(): isVision = _isVisionModel(internalName) pricePerCall = 0.10 if isVision else 0.01 models.append(ModelInfo( name=externalName, internalName=internalName, isVision=isVision, pricePerCall=pricePerCall )) return models @router.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"]) async def _ollamaStatus(): """Check Ollama connection status and list available models.""" try: async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get(f"{CONFIG['ollamaUrl']}/api/tags") if response.status_code != 200: return OllamaStatusResponse( connected=False, error=f"Ollama responded with status {response.status_code}" ) data = response.json() models = [m.get("name", "") for m in data.get("models", [])] visionModels = [m for m in models if _isVisionModel(m)] return OllamaStatusResponse( connected=True, models=models, visionModels=visionModels, totalModels=len(models) ) except httpx.ConnectError: return OllamaStatusResponse( connected=False, error="Keine Verbindung zu Ollama. Ist Ollama gestartet?" ) except Exception as e: return OllamaStatusResponse( connected=False, error=str(e) ) @router.post("/api/analyze", response_model=AnalyzeResponse, tags=["AI"]) async def _analyzeDocument( request: AnalyzeRequest, xApiKey: Optional[str] = Header(None, alias="X-API-Key") ): """ Analyze a document with AI Vision API. Supports both vision models (with images) and text models (without images). Authentication: - Gateway calls: Must include X-API-Key header - Test UI calls: No auth required (same-origin) Rate limiting is applied when API key is provided. """ if xApiKey: if CONFIG["apiKey"] and xApiKey != CONFIG["apiKey"]: raise HTTPException(status_code=401, detail="Invalid API key") allowed, info = rateLimiter.isAllowed(xApiKey) if not allowed: raise HTTPException( status_code=429, detail=f"Rate limit exceeded. Retry after {info['retryAfter']} seconds.", headers={"Retry-After": str(int(info["retryAfter"]) + 1)}, ) try: internalModelName = _getInternalModelName(request.modelName) isVision = _isVisionModel(internalModelName) if isVision and not request.imageBase64: raise HTTPException( status_code=400, detail="Kein Bild übermittelt (erforderlich für Vision-Modelle)" ) if not request.prompt: raise HTTPException(status_code=400, detail="Kein Prompt übermittelt") # Server has 31GB RAM + 22GB GPU - vision models need smaller context modelContextLengths = { "qwen2.5:7b": 8192, "qwen2.5vl:7b": 4096, "granite3.2-vision": 4096, "granite3.2-vision:latest": 4096, "deepseek-ocr": 4096, "deepseek-ocr:latest": 4096, } numCtx = modelContextLengths.get(internalModelName, 4096) requestBody = { "model": internalModelName, "prompt": request.prompt, "stream": False, "options": { "num_ctx": numCtx } } if request.imageBase64: requestBody["images"] = [request.imageBase64] async with httpx.AsyncClient(timeout=3600.0) as client: response = await client.post( f"{CONFIG['ollamaUrl']}/api/generate", json=requestBody ) if response.status_code == 404: raise HTTPException( status_code=404, detail=f'Modell "{internalModelName}" nicht gefunden. Bitte installieren mit: ollama pull {internalModelName}' ) if response.status_code != 200: raise HTTPException( status_code=response.status_code, detail=f"Ollama API Fehler: {response.status_code} - {response.text[:200]}" ) responseData = response.json() responseText = responseData.get("response", "") extractedData = None jsonMatch = re.search(r"\{[\s\S]*\}", responseText) if jsonMatch: try: extractedData = json.loads(jsonMatch.group()) except json.JSONDecodeError: extractedData = None if extractedData is None: extractedData = {"response": responseText.strip()} return AnalyzeResponse( success=True, data=extractedData, rawResponse=responseText ) except httpx.TimeoutException: return AnalyzeResponse( success=False, error="Zeitüberschreitung bei der Ollama API" ) except httpx.ConnectError: return AnalyzeResponse( success=False, error="Verbindung zu Ollama fehlgeschlagen. Ist Ollama gestartet?" ) except HTTPException: raise except Exception as e: logger.error(f"Error analyzing document: {e}") return AnalyzeResponse( success=False, error=f"Unerwarteter Fehler: {str(e)}" ) @router.post("/api/pdf/extract", tags=["PDF"]) async def _extractPdfImages(request: PdfExtractRequest): """ Extract images from a PDF. No API key required - this endpoint is for local test UI only, not used by gateway (gateway sends images directly). """ if not PDF_SUPPORT: raise HTTPException( status_code=501, detail="PDF-Support nicht verfügbar. Bitte PyMuPDF installieren: pip install pymupdf" ) try: pdfBytes = base64.b64decode(request.pdfBase64) if request.page is not None: result = _renderPdfPageAsImage(pdfBytes, request.page - 1) return {"success": True, "image": result} else: images = _extractImagesFromPdf(pdfBytes, maxPages=5) return { "success": True, "images": images, "totalExtracted": len(images) } except Exception as e: raise HTTPException( status_code=500, detail=f"PDF-Verarbeitungsfehler: {str(e)}" )