This commit is contained in:
ValueOn AG 2026-03-12 21:42:05 +01:00
parent f9b91501d8
commit c70c03c637
2 changed files with 246 additions and 0 deletions

242
app.py
View file

@ -17,6 +17,7 @@ import json
import re
import logging
import time
import uuid
from collections import defaultdict
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
@ -55,6 +56,7 @@ def _loadConfig() -> Dict[str, Any]:
configPath = os.path.join(os.path.dirname(__file__), "config.ini")
config = {
"apiKey": None,
"cursorApiKey": None,
"ollamaUrl": "http://localhost:11434",
"authUsername": "poweron",
"authPassword": "poweron",
@ -78,6 +80,8 @@ def _loadConfig() -> Dict[str, Any]:
# Map config keys
if key == "PRIVATE_LLM_API_KEY":
config["apiKey"] = value
elif key == "CURSOR_API_KEY":
config["cursorApiKey"] = value
elif key == "OLLAMA_URL":
config["ollamaUrl"] = value
elif key == "AUTH_USERNAME":
@ -95,6 +99,7 @@ def _loadConfig() -> Dict[str, Any]:
# Override with environment variables if set
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
config["cursorApiKey"] = os.environ.get("CURSOR_API_KEY", config["cursorApiKey"])
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
@ -243,6 +248,59 @@ class OllamaStatusResponse(BaseModel):
totalModels: Optional[int] = None
error: Optional[str] = None
class OpenAiModelInfo(BaseModel):
"""OpenAI-compatible model object."""
id: str
object: str = "model"
created: int
ownedBy: str = Field(default="poweron", alias="owned_by")
class OpenAiModelsResponse(BaseModel):
"""OpenAI-compatible models list response."""
object: str = "list"
data: List[OpenAiModelInfo]
class OpenAiChatMessage(BaseModel):
"""OpenAI-compatible chat message."""
role: str
content: Any
class OpenAiChatCompletionRequest(BaseModel):
"""OpenAI-compatible chat completion request."""
model: str
messages: List[OpenAiChatMessage]
stream: Optional[bool] = False
maxTokens: Optional[int] = Field(default=None, alias="max_tokens")
temperature: Optional[float] = None
class OpenAiChatCompletionChoice(BaseModel):
"""OpenAI-compatible completion choice."""
index: int
message: OpenAiChatMessage
finishReason: str = Field(default="stop", alias="finish_reason")
class OpenAiChatCompletionUsage(BaseModel):
"""OpenAI-compatible token usage."""
promptTokens: int = Field(default=0, alias="prompt_tokens")
completionTokens: int = Field(default=0, alias="completion_tokens")
totalTokens: int = Field(default=0, alias="total_tokens")
class OpenAiChatCompletionResponse(BaseModel):
"""OpenAI-compatible chat completion response."""
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[OpenAiChatCompletionChoice]
usage: OpenAiChatCompletionUsage
# ============================================================================
# PDF Helper Functions
# ============================================================================
@ -322,6 +380,46 @@ def _getExternalModelName(internalName: str) -> str:
"""Get the external model name from internal Ollama name."""
return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
def _contentToText(content: Any) -> str:
"""Normalize OpenAI message content into plain text."""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
textParts = []
for part in content:
if isinstance(part, str):
textParts.append(part)
continue
if isinstance(part, dict):
partText = part.get("text")
if isinstance(partText, str):
textParts.append(partText)
return "\n".join([part for part in textParts if part.strip()])
if isinstance(content, dict):
contentText = content.get("text")
if isinstance(contentText, str):
return contentText
return str(content)
def _messagesToPrompt(messages: List[OpenAiChatMessage]) -> str:
"""Convert OpenAI chat messages to a single prompt for Ollama generate."""
promptLines = []
for message in messages:
normalizedText = _contentToText(message.content).strip()
if not normalizedText:
continue
promptLines.append(f"{message.role}: {normalizedText}")
if not promptLines:
return ""
promptLines.append("assistant:")
return "\n\n".join(promptLines)
# ============================================================================
# Authentication & Rate Limiting
# ============================================================================
@ -342,6 +440,28 @@ async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")
return xApiKey
async def _verifyCursorApiKey(authorization: Optional[str] = Header(None)) -> str:
"""Verify Bearer token for Cursor OpenAI-compatible endpoints."""
expectedApiKey = CONFIG.get("cursorApiKey")
if not expectedApiKey:
raise HTTPException(
status_code=503,
detail="Cursor API key not configured on server"
)
if not authorization:
raise HTTPException(status_code=401, detail="Authorization header required")
if not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Bearer token required")
providedApiKey = authorization[len("Bearer "):].strip()
if providedApiKey != expectedApiKey:
raise HTTPException(status_code=401, detail="Invalid API key")
return providedApiKey
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
"""Check rate limit for the authenticated API key."""
allowed, info = rateLimiter.isAllowed(apiKey)
@ -474,6 +594,128 @@ async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
return models
@app.get("/v1/models", response_model=OpenAiModelsResponse, tags=["OpenAI Compatible"])
async def _listOpenAiModels(cursorApiKey: str = Depends(_verifyCursorApiKey)):
"""OpenAI-compatible models endpoint for Cursor."""
createdAt = int(time.time())
modelData = []
for externalName in MODEL_MAPPING.keys():
modelData.append(
OpenAiModelInfo(
id=externalName,
created=createdAt
)
)
return OpenAiModelsResponse(data=modelData)
@app.post(
"/v1/chat/completions",
response_model=OpenAiChatCompletionResponse,
tags=["OpenAI Compatible"]
)
async def _openAiChatCompletions(
request: OpenAiChatCompletionRequest,
cursorApiKey: str = Depends(_verifyCursorApiKey)
):
"""OpenAI-compatible chat completions endpoint for Cursor."""
if request.stream:
raise HTTPException(
status_code=400,
detail="Streaming is not supported by this endpoint"
)
allowed, info = rateLimiter.isAllowed(f"cursor:{cursorApiKey}")
if not allowed:
raise HTTPException(
status_code=429,
detail={
"error": "Rate limit exceeded",
"message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
"retryAfter": info["retryAfter"],
"limit": info["limit"],
"remaining": info["remaining"]
},
headers={
"Retry-After": str(int(info["retryAfter"])),
"X-RateLimit-Limit": str(info["limit"]),
"X-RateLimit-Remaining": str(info["remaining"]),
"X-RateLimit-Reset": str(info["resetSeconds"])
}
)
promptText = _messagesToPrompt(request.messages).strip()
if not promptText:
raise HTTPException(status_code=400, detail="messages must contain text content")
internalModelName = _getInternalModelName(request.model)
if _isVisionModel(internalModelName):
raise HTTPException(
status_code=400,
detail="Vision models are not supported on /v1/chat/completions"
)
requestOptions = {
"num_ctx": 8192
}
if request.temperature is not None:
requestOptions["temperature"] = request.temperature
if request.maxTokens is not None:
requestOptions["num_predict"] = request.maxTokens
requestBody = {
"model": internalModelName,
"prompt": promptText,
"stream": False,
"options": requestOptions
}
try:
async with httpx.AsyncClient(timeout=3600.0) as client:
response = await client.post(
f"{CONFIG['ollamaUrl']}/api/generate",
json=requestBody
)
if response.status_code == 404:
raise HTTPException(
status_code=404,
detail=f'Model "{request.model}" not found'
)
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"Ollama API error: {response.status_code} - {response.text[:200]}"
)
responseData = response.json()
responseText = responseData.get("response", "").strip()
promptEvalCount = int(responseData.get("prompt_eval_count", 0))
evalCount = int(responseData.get("eval_count", 0))
return OpenAiChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex}",
created=int(time.time()),
model=request.model,
choices=[
OpenAiChatCompletionChoice(
index=0,
message=OpenAiChatMessage(role="assistant", content=responseText)
)
],
usage=OpenAiChatCompletionUsage(
promptTokens=promptEvalCount,
completionTokens=evalCount,
totalTokens=promptEvalCount + evalCount
)
)
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Upstream timeout (Ollama)")
except httpx.ConnectError:
raise HTTPException(status_code=503, detail="Cannot connect to Ollama upstream")
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
async def _ollamaStatus():
"""Check Ollama connection status and list available models."""

View file

@ -6,6 +6,10 @@
# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))"
PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
# Separater API Key für Cursor (OpenAI-kompatible /v1 Endpoints)
# Cursor sendet diesen Key als Authorization: Bearer <key>
CURSOR_API_KEY = tQGh9-nsjBg9Dv2sm2Y97u7rcjYyuR5Kkwc6VliPeGc
# Ollama Server URL
OLLAMA_URL = http://localhost:11434