This commit is contained in:
ValueOn AG 2026-03-12 21:42:05 +01:00
parent f9b91501d8
commit c70c03c637
2 changed files with 246 additions and 0 deletions

242
app.py
View file

@ -17,6 +17,7 @@ import json
import re import re
import logging import logging
import time import time
import uuid
from collections import defaultdict from collections import defaultdict
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@ -55,6 +56,7 @@ def _loadConfig() -> Dict[str, Any]:
configPath = os.path.join(os.path.dirname(__file__), "config.ini") configPath = os.path.join(os.path.dirname(__file__), "config.ini")
config = { config = {
"apiKey": None, "apiKey": None,
"cursorApiKey": None,
"ollamaUrl": "http://localhost:11434", "ollamaUrl": "http://localhost:11434",
"authUsername": "poweron", "authUsername": "poweron",
"authPassword": "poweron", "authPassword": "poweron",
@ -78,6 +80,8 @@ def _loadConfig() -> Dict[str, Any]:
# Map config keys # Map config keys
if key == "PRIVATE_LLM_API_KEY": if key == "PRIVATE_LLM_API_KEY":
config["apiKey"] = value config["apiKey"] = value
elif key == "CURSOR_API_KEY":
config["cursorApiKey"] = value
elif key == "OLLAMA_URL": elif key == "OLLAMA_URL":
config["ollamaUrl"] = value config["ollamaUrl"] = value
elif key == "AUTH_USERNAME": elif key == "AUTH_USERNAME":
@ -95,6 +99,7 @@ def _loadConfig() -> Dict[str, Any]:
# Override with environment variables if set # Override with environment variables if set
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"]) config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
config["cursorApiKey"] = os.environ.get("CURSOR_API_KEY", config["cursorApiKey"])
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"]) config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"]) config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"]) config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
@ -243,6 +248,59 @@ class OllamaStatusResponse(BaseModel):
totalModels: Optional[int] = None totalModels: Optional[int] = None
error: Optional[str] = None error: Optional[str] = None
class OpenAiModelInfo(BaseModel):
"""OpenAI-compatible model object."""
id: str
object: str = "model"
created: int
ownedBy: str = Field(default="poweron", alias="owned_by")
class OpenAiModelsResponse(BaseModel):
"""OpenAI-compatible models list response."""
object: str = "list"
data: List[OpenAiModelInfo]
class OpenAiChatMessage(BaseModel):
"""OpenAI-compatible chat message."""
role: str
content: Any
class OpenAiChatCompletionRequest(BaseModel):
"""OpenAI-compatible chat completion request."""
model: str
messages: List[OpenAiChatMessage]
stream: Optional[bool] = False
maxTokens: Optional[int] = Field(default=None, alias="max_tokens")
temperature: Optional[float] = None
class OpenAiChatCompletionChoice(BaseModel):
"""OpenAI-compatible completion choice."""
index: int
message: OpenAiChatMessage
finishReason: str = Field(default="stop", alias="finish_reason")
class OpenAiChatCompletionUsage(BaseModel):
"""OpenAI-compatible token usage."""
promptTokens: int = Field(default=0, alias="prompt_tokens")
completionTokens: int = Field(default=0, alias="completion_tokens")
totalTokens: int = Field(default=0, alias="total_tokens")
class OpenAiChatCompletionResponse(BaseModel):
"""OpenAI-compatible chat completion response."""
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[OpenAiChatCompletionChoice]
usage: OpenAiChatCompletionUsage
# ============================================================================ # ============================================================================
# PDF Helper Functions # PDF Helper Functions
# ============================================================================ # ============================================================================
@ -322,6 +380,46 @@ def _getExternalModelName(internalName: str) -> str:
"""Get the external model name from internal Ollama name.""" """Get the external model name from internal Ollama name."""
return INTERNAL_TO_EXTERNAL.get(internalName, internalName) return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
def _contentToText(content: Any) -> str:
"""Normalize OpenAI message content into plain text."""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
textParts = []
for part in content:
if isinstance(part, str):
textParts.append(part)
continue
if isinstance(part, dict):
partText = part.get("text")
if isinstance(partText, str):
textParts.append(partText)
return "\n".join([part for part in textParts if part.strip()])
if isinstance(content, dict):
contentText = content.get("text")
if isinstance(contentText, str):
return contentText
return str(content)
def _messagesToPrompt(messages: List[OpenAiChatMessage]) -> str:
"""Convert OpenAI chat messages to a single prompt for Ollama generate."""
promptLines = []
for message in messages:
normalizedText = _contentToText(message.content).strip()
if not normalizedText:
continue
promptLines.append(f"{message.role}: {normalizedText}")
if not promptLines:
return ""
promptLines.append("assistant:")
return "\n\n".join(promptLines)
# ============================================================================ # ============================================================================
# Authentication & Rate Limiting # Authentication & Rate Limiting
# ============================================================================ # ============================================================================
@ -342,6 +440,28 @@ async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")
return xApiKey return xApiKey
async def _verifyCursorApiKey(authorization: Optional[str] = Header(None)) -> str:
"""Verify Bearer token for Cursor OpenAI-compatible endpoints."""
expectedApiKey = CONFIG.get("cursorApiKey")
if not expectedApiKey:
raise HTTPException(
status_code=503,
detail="Cursor API key not configured on server"
)
if not authorization:
raise HTTPException(status_code=401, detail="Authorization header required")
if not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Bearer token required")
providedApiKey = authorization[len("Bearer "):].strip()
if providedApiKey != expectedApiKey:
raise HTTPException(status_code=401, detail="Invalid API key")
return providedApiKey
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str: async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
"""Check rate limit for the authenticated API key.""" """Check rate limit for the authenticated API key."""
allowed, info = rateLimiter.isAllowed(apiKey) allowed, info = rateLimiter.isAllowed(apiKey)
@ -474,6 +594,128 @@ async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
return models return models
@app.get("/v1/models", response_model=OpenAiModelsResponse, tags=["OpenAI Compatible"])
async def _listOpenAiModels(cursorApiKey: str = Depends(_verifyCursorApiKey)):
"""OpenAI-compatible models endpoint for Cursor."""
createdAt = int(time.time())
modelData = []
for externalName in MODEL_MAPPING.keys():
modelData.append(
OpenAiModelInfo(
id=externalName,
created=createdAt
)
)
return OpenAiModelsResponse(data=modelData)
@app.post(
"/v1/chat/completions",
response_model=OpenAiChatCompletionResponse,
tags=["OpenAI Compatible"]
)
async def _openAiChatCompletions(
request: OpenAiChatCompletionRequest,
cursorApiKey: str = Depends(_verifyCursorApiKey)
):
"""OpenAI-compatible chat completions endpoint for Cursor."""
if request.stream:
raise HTTPException(
status_code=400,
detail="Streaming is not supported by this endpoint"
)
allowed, info = rateLimiter.isAllowed(f"cursor:{cursorApiKey}")
if not allowed:
raise HTTPException(
status_code=429,
detail={
"error": "Rate limit exceeded",
"message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
"retryAfter": info["retryAfter"],
"limit": info["limit"],
"remaining": info["remaining"]
},
headers={
"Retry-After": str(int(info["retryAfter"])),
"X-RateLimit-Limit": str(info["limit"]),
"X-RateLimit-Remaining": str(info["remaining"]),
"X-RateLimit-Reset": str(info["resetSeconds"])
}
)
promptText = _messagesToPrompt(request.messages).strip()
if not promptText:
raise HTTPException(status_code=400, detail="messages must contain text content")
internalModelName = _getInternalModelName(request.model)
if _isVisionModel(internalModelName):
raise HTTPException(
status_code=400,
detail="Vision models are not supported on /v1/chat/completions"
)
requestOptions = {
"num_ctx": 8192
}
if request.temperature is not None:
requestOptions["temperature"] = request.temperature
if request.maxTokens is not None:
requestOptions["num_predict"] = request.maxTokens
requestBody = {
"model": internalModelName,
"prompt": promptText,
"stream": False,
"options": requestOptions
}
try:
async with httpx.AsyncClient(timeout=3600.0) as client:
response = await client.post(
f"{CONFIG['ollamaUrl']}/api/generate",
json=requestBody
)
if response.status_code == 404:
raise HTTPException(
status_code=404,
detail=f'Model "{request.model}" not found'
)
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"Ollama API error: {response.status_code} - {response.text[:200]}"
)
responseData = response.json()
responseText = responseData.get("response", "").strip()
promptEvalCount = int(responseData.get("prompt_eval_count", 0))
evalCount = int(responseData.get("eval_count", 0))
return OpenAiChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex}",
created=int(time.time()),
model=request.model,
choices=[
OpenAiChatCompletionChoice(
index=0,
message=OpenAiChatMessage(role="assistant", content=responseText)
)
],
usage=OpenAiChatCompletionUsage(
promptTokens=promptEvalCount,
completionTokens=evalCount,
totalTokens=promptEvalCount + evalCount
)
)
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Upstream timeout (Ollama)")
except httpx.ConnectError:
raise HTTPException(status_code=503, detail="Cannot connect to Ollama upstream")
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"]) @app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
async def _ollamaStatus(): async def _ollamaStatus():
"""Check Ollama connection status and list available models.""" """Check Ollama connection status and list available models."""

View file

@ -6,6 +6,10 @@
# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))" # Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))"
PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0 PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
# Separater API Key für Cursor (OpenAI-kompatible /v1 Endpoints)
# Cursor sendet diesen Key als Authorization: Bearer <key>
CURSOR_API_KEY = tQGh9-nsjBg9Dv2sm2Y97u7rcjYyuR5Kkwc6VliPeGc
# Ollama Server URL # Ollama Server URL
OLLAMA_URL = http://localhost:11434 OLLAMA_URL = http://localhost:11434