From c70c03c63743a5b0d7221a7d65d8470c0a4d83f5 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Thu, 12 Mar 2026 21:42:05 +0100
Subject: [PATCH] keyfix
---
app.py | 242 +++++++++++++++++++++++++++++++++++++++++++++++++++++
config.ini | 4 +
2 files changed, 246 insertions(+)
diff --git a/app.py b/app.py
index 40a1a73..1e00470 100644
--- a/app.py
+++ b/app.py
@@ -17,6 +17,7 @@ import json
import re
import logging
import time
+import uuid
from collections import defaultdict
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
@@ -55,6 +56,7 @@ def _loadConfig() -> Dict[str, Any]:
configPath = os.path.join(os.path.dirname(__file__), "config.ini")
config = {
"apiKey": None,
+ "cursorApiKey": None,
"ollamaUrl": "http://localhost:11434",
"authUsername": "poweron",
"authPassword": "poweron",
@@ -78,6 +80,8 @@ def _loadConfig() -> Dict[str, Any]:
# Map config keys
if key == "PRIVATE_LLM_API_KEY":
config["apiKey"] = value
+ elif key == "CURSOR_API_KEY":
+ config["cursorApiKey"] = value
elif key == "OLLAMA_URL":
config["ollamaUrl"] = value
elif key == "AUTH_USERNAME":
@@ -95,6 +99,7 @@ def _loadConfig() -> Dict[str, Any]:
# Override with environment variables if set
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
+ config["cursorApiKey"] = os.environ.get("CURSOR_API_KEY", config["cursorApiKey"])
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
@@ -243,6 +248,59 @@ class OllamaStatusResponse(BaseModel):
totalModels: Optional[int] = None
error: Optional[str] = None
+
+class OpenAiModelInfo(BaseModel):
+ """OpenAI-compatible model object."""
+ id: str
+ object: str = "model"
+ created: int
+ ownedBy: str = Field(default="poweron", alias="owned_by")
+
+
+class OpenAiModelsResponse(BaseModel):
+ """OpenAI-compatible models list response."""
+ object: str = "list"
+ data: List[OpenAiModelInfo]
+
+
+class OpenAiChatMessage(BaseModel):
+ """OpenAI-compatible chat message."""
+ role: str
+ content: Any
+
+
+class OpenAiChatCompletionRequest(BaseModel):
+ """OpenAI-compatible chat completion request."""
+ model: str
+ messages: List[OpenAiChatMessage]
+ stream: Optional[bool] = False
+ maxTokens: Optional[int] = Field(default=None, alias="max_tokens")
+ temperature: Optional[float] = None
+
+
+class OpenAiChatCompletionChoice(BaseModel):
+ """OpenAI-compatible completion choice."""
+ index: int
+ message: OpenAiChatMessage
+ finishReason: str = Field(default="stop", alias="finish_reason")
+
+
+class OpenAiChatCompletionUsage(BaseModel):
+ """OpenAI-compatible token usage."""
+ promptTokens: int = Field(default=0, alias="prompt_tokens")
+ completionTokens: int = Field(default=0, alias="completion_tokens")
+ totalTokens: int = Field(default=0, alias="total_tokens")
+
+
+class OpenAiChatCompletionResponse(BaseModel):
+ """OpenAI-compatible chat completion response."""
+ id: str
+ object: str = "chat.completion"
+ created: int
+ model: str
+ choices: List[OpenAiChatCompletionChoice]
+ usage: OpenAiChatCompletionUsage
+
# ============================================================================
# PDF Helper Functions
# ============================================================================
@@ -322,6 +380,46 @@ def _getExternalModelName(internalName: str) -> str:
"""Get the external model name from internal Ollama name."""
return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
+
+def _contentToText(content: Any) -> str:
+ """Normalize OpenAI message content into plain text."""
+ if content is None:
+ return ""
+ if isinstance(content, str):
+ return content
+ if isinstance(content, list):
+ textParts = []
+ for part in content:
+ if isinstance(part, str):
+ textParts.append(part)
+ continue
+ if isinstance(part, dict):
+ partText = part.get("text")
+ if isinstance(partText, str):
+ textParts.append(partText)
+ return "\n".join([part for part in textParts if part.strip()])
+ if isinstance(content, dict):
+ contentText = content.get("text")
+ if isinstance(contentText, str):
+ return contentText
+ return str(content)
+
+
+def _messagesToPrompt(messages: List[OpenAiChatMessage]) -> str:
+ """Convert OpenAI chat messages to a single prompt for Ollama generate."""
+ promptLines = []
+ for message in messages:
+ normalizedText = _contentToText(message.content).strip()
+ if not normalizedText:
+ continue
+ promptLines.append(f"{message.role}: {normalizedText}")
+
+ if not promptLines:
+ return ""
+
+ promptLines.append("assistant:")
+ return "\n\n".join(promptLines)
+
# ============================================================================
# Authentication & Rate Limiting
# ============================================================================
@@ -342,6 +440,28 @@ async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")
return xApiKey
+async def _verifyCursorApiKey(authorization: Optional[str] = Header(None)) -> str:
+ """Verify Bearer token for Cursor OpenAI-compatible endpoints."""
+ expectedApiKey = CONFIG.get("cursorApiKey")
+ if not expectedApiKey:
+ raise HTTPException(
+ status_code=503,
+ detail="Cursor API key not configured on server"
+ )
+
+ if not authorization:
+ raise HTTPException(status_code=401, detail="Authorization header required")
+
+ if not authorization.startswith("Bearer "):
+ raise HTTPException(status_code=401, detail="Bearer token required")
+
+ providedApiKey = authorization[len("Bearer "):].strip()
+ if providedApiKey != expectedApiKey:
+ raise HTTPException(status_code=401, detail="Invalid API key")
+
+ return providedApiKey
+
+
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
"""Check rate limit for the authenticated API key."""
allowed, info = rateLimiter.isAllowed(apiKey)
@@ -474,6 +594,128 @@ async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
return models
+
+@app.get("/v1/models", response_model=OpenAiModelsResponse, tags=["OpenAI Compatible"])
+async def _listOpenAiModels(cursorApiKey: str = Depends(_verifyCursorApiKey)):
+ """OpenAI-compatible models endpoint for Cursor."""
+ createdAt = int(time.time())
+ modelData = []
+ for externalName in MODEL_MAPPING.keys():
+ modelData.append(
+ OpenAiModelInfo(
+ id=externalName,
+ created=createdAt
+ )
+ )
+ return OpenAiModelsResponse(data=modelData)
+
+
+@app.post(
+ "/v1/chat/completions",
+ response_model=OpenAiChatCompletionResponse,
+ tags=["OpenAI Compatible"]
+)
+async def _openAiChatCompletions(
+ request: OpenAiChatCompletionRequest,
+ cursorApiKey: str = Depends(_verifyCursorApiKey)
+):
+ """OpenAI-compatible chat completions endpoint for Cursor."""
+ if request.stream:
+ raise HTTPException(
+ status_code=400,
+ detail="Streaming is not supported by this endpoint"
+ )
+
+ allowed, info = rateLimiter.isAllowed(f"cursor:{cursorApiKey}")
+ if not allowed:
+ raise HTTPException(
+ status_code=429,
+ detail={
+ "error": "Rate limit exceeded",
+ "message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
+ "retryAfter": info["retryAfter"],
+ "limit": info["limit"],
+ "remaining": info["remaining"]
+ },
+ headers={
+ "Retry-After": str(int(info["retryAfter"])),
+ "X-RateLimit-Limit": str(info["limit"]),
+ "X-RateLimit-Remaining": str(info["remaining"]),
+ "X-RateLimit-Reset": str(info["resetSeconds"])
+ }
+ )
+
+ promptText = _messagesToPrompt(request.messages).strip()
+ if not promptText:
+ raise HTTPException(status_code=400, detail="messages must contain text content")
+
+ internalModelName = _getInternalModelName(request.model)
+ if _isVisionModel(internalModelName):
+ raise HTTPException(
+ status_code=400,
+ detail="Vision models are not supported on /v1/chat/completions"
+ )
+
+ requestOptions = {
+ "num_ctx": 8192
+ }
+ if request.temperature is not None:
+ requestOptions["temperature"] = request.temperature
+ if request.maxTokens is not None:
+ requestOptions["num_predict"] = request.maxTokens
+
+ requestBody = {
+ "model": internalModelName,
+ "prompt": promptText,
+ "stream": False,
+ "options": requestOptions
+ }
+
+ try:
+ async with httpx.AsyncClient(timeout=3600.0) as client:
+ response = await client.post(
+ f"{CONFIG['ollamaUrl']}/api/generate",
+ json=requestBody
+ )
+
+ if response.status_code == 404:
+ raise HTTPException(
+ status_code=404,
+ detail=f'Model "{request.model}" not found'
+ )
+ if response.status_code != 200:
+ raise HTTPException(
+ status_code=response.status_code,
+ detail=f"Ollama API error: {response.status_code} - {response.text[:200]}"
+ )
+
+ responseData = response.json()
+ responseText = responseData.get("response", "").strip()
+ promptEvalCount = int(responseData.get("prompt_eval_count", 0))
+ evalCount = int(responseData.get("eval_count", 0))
+
+ return OpenAiChatCompletionResponse(
+ id=f"chatcmpl-{uuid.uuid4().hex}",
+ created=int(time.time()),
+ model=request.model,
+ choices=[
+ OpenAiChatCompletionChoice(
+ index=0,
+ message=OpenAiChatMessage(role="assistant", content=responseText)
+ )
+ ],
+ usage=OpenAiChatCompletionUsage(
+ promptTokens=promptEvalCount,
+ completionTokens=evalCount,
+ totalTokens=promptEvalCount + evalCount
+ )
+ )
+
+ except httpx.TimeoutException:
+ raise HTTPException(status_code=504, detail="Upstream timeout (Ollama)")
+ except httpx.ConnectError:
+ raise HTTPException(status_code=503, detail="Cannot connect to Ollama upstream")
+
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
async def _ollamaStatus():
"""Check Ollama connection status and list available models."""
diff --git a/config.ini b/config.ini
index cfe9374..baf9e7c 100644
--- a/config.ini
+++ b/config.ini
@@ -6,6 +6,10 @@
# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))"
PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
+# Separater API Key für Cursor (OpenAI-kompatible /v1 Endpoints)
+# Cursor sendet diesen Key als Authorization: Bearer
+CURSOR_API_KEY = tQGh9-nsjBg9Dv2sm2Y97u7rcjYyuR5Kkwc6VliPeGc
+
# Ollama Server URL
OLLAMA_URL = http://localhost:11434