keyfix
This commit is contained in:
parent
f9b91501d8
commit
c70c03c637
2 changed files with 246 additions and 0 deletions
242
app.py
242
app.py
|
|
@ -17,6 +17,7 @@ import json
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
@ -55,6 +56,7 @@ def _loadConfig() -> Dict[str, Any]:
|
||||||
configPath = os.path.join(os.path.dirname(__file__), "config.ini")
|
configPath = os.path.join(os.path.dirname(__file__), "config.ini")
|
||||||
config = {
|
config = {
|
||||||
"apiKey": None,
|
"apiKey": None,
|
||||||
|
"cursorApiKey": None,
|
||||||
"ollamaUrl": "http://localhost:11434",
|
"ollamaUrl": "http://localhost:11434",
|
||||||
"authUsername": "poweron",
|
"authUsername": "poweron",
|
||||||
"authPassword": "poweron",
|
"authPassword": "poweron",
|
||||||
|
|
@ -78,6 +80,8 @@ def _loadConfig() -> Dict[str, Any]:
|
||||||
# Map config keys
|
# Map config keys
|
||||||
if key == "PRIVATE_LLM_API_KEY":
|
if key == "PRIVATE_LLM_API_KEY":
|
||||||
config["apiKey"] = value
|
config["apiKey"] = value
|
||||||
|
elif key == "CURSOR_API_KEY":
|
||||||
|
config["cursorApiKey"] = value
|
||||||
elif key == "OLLAMA_URL":
|
elif key == "OLLAMA_URL":
|
||||||
config["ollamaUrl"] = value
|
config["ollamaUrl"] = value
|
||||||
elif key == "AUTH_USERNAME":
|
elif key == "AUTH_USERNAME":
|
||||||
|
|
@ -95,6 +99,7 @@ def _loadConfig() -> Dict[str, Any]:
|
||||||
|
|
||||||
# Override with environment variables if set
|
# Override with environment variables if set
|
||||||
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
|
config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"])
|
||||||
|
config["cursorApiKey"] = os.environ.get("CURSOR_API_KEY", config["cursorApiKey"])
|
||||||
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
|
config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"])
|
||||||
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
|
config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"])
|
||||||
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
|
config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"])
|
||||||
|
|
@ -243,6 +248,59 @@ class OllamaStatusResponse(BaseModel):
|
||||||
totalModels: Optional[int] = None
|
totalModels: Optional[int] = None
|
||||||
error: Optional[str] = None
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiModelInfo(BaseModel):
|
||||||
|
"""OpenAI-compatible model object."""
|
||||||
|
id: str
|
||||||
|
object: str = "model"
|
||||||
|
created: int
|
||||||
|
ownedBy: str = Field(default="poweron", alias="owned_by")
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiModelsResponse(BaseModel):
|
||||||
|
"""OpenAI-compatible models list response."""
|
||||||
|
object: str = "list"
|
||||||
|
data: List[OpenAiModelInfo]
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiChatMessage(BaseModel):
|
||||||
|
"""OpenAI-compatible chat message."""
|
||||||
|
role: str
|
||||||
|
content: Any
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiChatCompletionRequest(BaseModel):
|
||||||
|
"""OpenAI-compatible chat completion request."""
|
||||||
|
model: str
|
||||||
|
messages: List[OpenAiChatMessage]
|
||||||
|
stream: Optional[bool] = False
|
||||||
|
maxTokens: Optional[int] = Field(default=None, alias="max_tokens")
|
||||||
|
temperature: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiChatCompletionChoice(BaseModel):
|
||||||
|
"""OpenAI-compatible completion choice."""
|
||||||
|
index: int
|
||||||
|
message: OpenAiChatMessage
|
||||||
|
finishReason: str = Field(default="stop", alias="finish_reason")
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiChatCompletionUsage(BaseModel):
|
||||||
|
"""OpenAI-compatible token usage."""
|
||||||
|
promptTokens: int = Field(default=0, alias="prompt_tokens")
|
||||||
|
completionTokens: int = Field(default=0, alias="completion_tokens")
|
||||||
|
totalTokens: int = Field(default=0, alias="total_tokens")
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAiChatCompletionResponse(BaseModel):
|
||||||
|
"""OpenAI-compatible chat completion response."""
|
||||||
|
id: str
|
||||||
|
object: str = "chat.completion"
|
||||||
|
created: int
|
||||||
|
model: str
|
||||||
|
choices: List[OpenAiChatCompletionChoice]
|
||||||
|
usage: OpenAiChatCompletionUsage
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# PDF Helper Functions
|
# PDF Helper Functions
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -322,6 +380,46 @@ def _getExternalModelName(internalName: str) -> str:
|
||||||
"""Get the external model name from internal Ollama name."""
|
"""Get the external model name from internal Ollama name."""
|
||||||
return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
|
return INTERNAL_TO_EXTERNAL.get(internalName, internalName)
|
||||||
|
|
||||||
|
|
||||||
|
def _contentToText(content: Any) -> str:
|
||||||
|
"""Normalize OpenAI message content into plain text."""
|
||||||
|
if content is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
if isinstance(content, list):
|
||||||
|
textParts = []
|
||||||
|
for part in content:
|
||||||
|
if isinstance(part, str):
|
||||||
|
textParts.append(part)
|
||||||
|
continue
|
||||||
|
if isinstance(part, dict):
|
||||||
|
partText = part.get("text")
|
||||||
|
if isinstance(partText, str):
|
||||||
|
textParts.append(partText)
|
||||||
|
return "\n".join([part for part in textParts if part.strip()])
|
||||||
|
if isinstance(content, dict):
|
||||||
|
contentText = content.get("text")
|
||||||
|
if isinstance(contentText, str):
|
||||||
|
return contentText
|
||||||
|
return str(content)
|
||||||
|
|
||||||
|
|
||||||
|
def _messagesToPrompt(messages: List[OpenAiChatMessage]) -> str:
|
||||||
|
"""Convert OpenAI chat messages to a single prompt for Ollama generate."""
|
||||||
|
promptLines = []
|
||||||
|
for message in messages:
|
||||||
|
normalizedText = _contentToText(message.content).strip()
|
||||||
|
if not normalizedText:
|
||||||
|
continue
|
||||||
|
promptLines.append(f"{message.role}: {normalizedText}")
|
||||||
|
|
||||||
|
if not promptLines:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
promptLines.append("assistant:")
|
||||||
|
return "\n\n".join(promptLines)
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Authentication & Rate Limiting
|
# Authentication & Rate Limiting
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
@ -342,6 +440,28 @@ async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key")
|
||||||
return xApiKey
|
return xApiKey
|
||||||
|
|
||||||
|
|
||||||
|
async def _verifyCursorApiKey(authorization: Optional[str] = Header(None)) -> str:
|
||||||
|
"""Verify Bearer token for Cursor OpenAI-compatible endpoints."""
|
||||||
|
expectedApiKey = CONFIG.get("cursorApiKey")
|
||||||
|
if not expectedApiKey:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail="Cursor API key not configured on server"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not authorization:
|
||||||
|
raise HTTPException(status_code=401, detail="Authorization header required")
|
||||||
|
|
||||||
|
if not authorization.startswith("Bearer "):
|
||||||
|
raise HTTPException(status_code=401, detail="Bearer token required")
|
||||||
|
|
||||||
|
providedApiKey = authorization[len("Bearer "):].strip()
|
||||||
|
if providedApiKey != expectedApiKey:
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid API key")
|
||||||
|
|
||||||
|
return providedApiKey
|
||||||
|
|
||||||
|
|
||||||
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
|
async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str:
|
||||||
"""Check rate limit for the authenticated API key."""
|
"""Check rate limit for the authenticated API key."""
|
||||||
allowed, info = rateLimiter.isAllowed(apiKey)
|
allowed, info = rateLimiter.isAllowed(apiKey)
|
||||||
|
|
@ -474,6 +594,128 @@ async def _listModels(authenticated: bool = Depends(_verifyApiKey)):
|
||||||
|
|
||||||
return models
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/models", response_model=OpenAiModelsResponse, tags=["OpenAI Compatible"])
|
||||||
|
async def _listOpenAiModels(cursorApiKey: str = Depends(_verifyCursorApiKey)):
|
||||||
|
"""OpenAI-compatible models endpoint for Cursor."""
|
||||||
|
createdAt = int(time.time())
|
||||||
|
modelData = []
|
||||||
|
for externalName in MODEL_MAPPING.keys():
|
||||||
|
modelData.append(
|
||||||
|
OpenAiModelInfo(
|
||||||
|
id=externalName,
|
||||||
|
created=createdAt
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return OpenAiModelsResponse(data=modelData)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/v1/chat/completions",
|
||||||
|
response_model=OpenAiChatCompletionResponse,
|
||||||
|
tags=["OpenAI Compatible"]
|
||||||
|
)
|
||||||
|
async def _openAiChatCompletions(
|
||||||
|
request: OpenAiChatCompletionRequest,
|
||||||
|
cursorApiKey: str = Depends(_verifyCursorApiKey)
|
||||||
|
):
|
||||||
|
"""OpenAI-compatible chat completions endpoint for Cursor."""
|
||||||
|
if request.stream:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Streaming is not supported by this endpoint"
|
||||||
|
)
|
||||||
|
|
||||||
|
allowed, info = rateLimiter.isAllowed(f"cursor:{cursorApiKey}")
|
||||||
|
if not allowed:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=429,
|
||||||
|
detail={
|
||||||
|
"error": "Rate limit exceeded",
|
||||||
|
"message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
|
||||||
|
"retryAfter": info["retryAfter"],
|
||||||
|
"limit": info["limit"],
|
||||||
|
"remaining": info["remaining"]
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Retry-After": str(int(info["retryAfter"])),
|
||||||
|
"X-RateLimit-Limit": str(info["limit"]),
|
||||||
|
"X-RateLimit-Remaining": str(info["remaining"]),
|
||||||
|
"X-RateLimit-Reset": str(info["resetSeconds"])
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
promptText = _messagesToPrompt(request.messages).strip()
|
||||||
|
if not promptText:
|
||||||
|
raise HTTPException(status_code=400, detail="messages must contain text content")
|
||||||
|
|
||||||
|
internalModelName = _getInternalModelName(request.model)
|
||||||
|
if _isVisionModel(internalModelName):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Vision models are not supported on /v1/chat/completions"
|
||||||
|
)
|
||||||
|
|
||||||
|
requestOptions = {
|
||||||
|
"num_ctx": 8192
|
||||||
|
}
|
||||||
|
if request.temperature is not None:
|
||||||
|
requestOptions["temperature"] = request.temperature
|
||||||
|
if request.maxTokens is not None:
|
||||||
|
requestOptions["num_predict"] = request.maxTokens
|
||||||
|
|
||||||
|
requestBody = {
|
||||||
|
"model": internalModelName,
|
||||||
|
"prompt": promptText,
|
||||||
|
"stream": False,
|
||||||
|
"options": requestOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=3600.0) as client:
|
||||||
|
response = await client.post(
|
||||||
|
f"{CONFIG['ollamaUrl']}/api/generate",
|
||||||
|
json=requestBody
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 404:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f'Model "{request.model}" not found'
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=response.status_code,
|
||||||
|
detail=f"Ollama API error: {response.status_code} - {response.text[:200]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
responseData = response.json()
|
||||||
|
responseText = responseData.get("response", "").strip()
|
||||||
|
promptEvalCount = int(responseData.get("prompt_eval_count", 0))
|
||||||
|
evalCount = int(responseData.get("eval_count", 0))
|
||||||
|
|
||||||
|
return OpenAiChatCompletionResponse(
|
||||||
|
id=f"chatcmpl-{uuid.uuid4().hex}",
|
||||||
|
created=int(time.time()),
|
||||||
|
model=request.model,
|
||||||
|
choices=[
|
||||||
|
OpenAiChatCompletionChoice(
|
||||||
|
index=0,
|
||||||
|
message=OpenAiChatMessage(role="assistant", content=responseText)
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=OpenAiChatCompletionUsage(
|
||||||
|
promptTokens=promptEvalCount,
|
||||||
|
completionTokens=evalCount,
|
||||||
|
totalTokens=promptEvalCount + evalCount
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
raise HTTPException(status_code=504, detail="Upstream timeout (Ollama)")
|
||||||
|
except httpx.ConnectError:
|
||||||
|
raise HTTPException(status_code=503, detail="Cannot connect to Ollama upstream")
|
||||||
|
|
||||||
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
|
@app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"])
|
||||||
async def _ollamaStatus():
|
async def _ollamaStatus():
|
||||||
"""Check Ollama connection status and list available models."""
|
"""Check Ollama connection status and list available models."""
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,10 @@
|
||||||
# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
# Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||||
PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
|
PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0
|
||||||
|
|
||||||
|
# Separater API Key für Cursor (OpenAI-kompatible /v1 Endpoints)
|
||||||
|
# Cursor sendet diesen Key als Authorization: Bearer <key>
|
||||||
|
CURSOR_API_KEY = tQGh9-nsjBg9Dv2sm2Y97u7rcjYyuR5Kkwc6VliPeGc
|
||||||
|
|
||||||
# Ollama Server URL
|
# Ollama Server URL
|
||||||
OLLAMA_URL = http://localhost:11434
|
OLLAMA_URL = http://localhost:11434
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue