Move all API routes, OpenAI-compatible routes, web UI routes, shared config, models, rate limiter, and auth logic into dedicated files (config.py, routeApi.py, routeOpenAi.py, routeWeb.py). app.py now serves as a clean entry point. Made-with: Cursor
145 lines
4.8 KiB
Python
145 lines
4.8 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""OpenAI-compatible routes for Cursor integration (/v1/models, /v1/chat/completions)."""
|
|
|
|
import time
|
|
import uuid
|
|
import logging
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
|
|
from config import (
|
|
CONFIG, MODEL_MAPPING,
|
|
rateLimiter,
|
|
_isVisionModel, _getInternalModelName, _messagesToPrompt,
|
|
_verifyCursorApiKey,
|
|
OpenAiChatCompletionRequest, OpenAiChatCompletionResponse,
|
|
OpenAiChatCompletionChoice, OpenAiChatCompletionUsage,
|
|
OpenAiChatMessage, OpenAiModelInfo, OpenAiModelsResponse,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(tags=["OpenAI Compatible"])
|
|
|
|
|
|
@router.get("/v1/models", response_model=OpenAiModelsResponse)
|
|
async def _listOpenAiModels(cursorApiKey: str = Depends(_verifyCursorApiKey)):
|
|
"""OpenAI-compatible models endpoint for Cursor."""
|
|
createdAt = int(time.time())
|
|
modelData = []
|
|
for externalName in MODEL_MAPPING.keys():
|
|
modelData.append(
|
|
OpenAiModelInfo(
|
|
id=externalName,
|
|
created=createdAt
|
|
)
|
|
)
|
|
return OpenAiModelsResponse(data=modelData)
|
|
|
|
|
|
@router.post(
|
|
"/v1/chat/completions",
|
|
response_model=OpenAiChatCompletionResponse,
|
|
)
|
|
async def _openAiChatCompletions(
|
|
request: OpenAiChatCompletionRequest,
|
|
cursorApiKey: str = Depends(_verifyCursorApiKey)
|
|
):
|
|
"""OpenAI-compatible chat completions endpoint for Cursor."""
|
|
if request.stream:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Streaming is not supported by this endpoint"
|
|
)
|
|
|
|
allowed, info = rateLimiter.isAllowed(f"cursor:{cursorApiKey}")
|
|
if not allowed:
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail={
|
|
"error": "Rate limit exceeded",
|
|
"message": f"Too many requests. Please retry after {info['retryAfter']} seconds.",
|
|
"retryAfter": info["retryAfter"],
|
|
"limit": info["limit"],
|
|
"remaining": info["remaining"]
|
|
},
|
|
headers={
|
|
"Retry-After": str(int(info["retryAfter"])),
|
|
"X-RateLimit-Limit": str(info["limit"]),
|
|
"X-RateLimit-Remaining": str(info["remaining"]),
|
|
"X-RateLimit-Reset": str(info["resetSeconds"])
|
|
}
|
|
)
|
|
|
|
promptText = _messagesToPrompt(request.messages).strip()
|
|
if not promptText:
|
|
raise HTTPException(status_code=400, detail="messages must contain text content")
|
|
|
|
internalModelName = _getInternalModelName(request.model)
|
|
if _isVisionModel(internalModelName):
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Vision models are not supported on /v1/chat/completions"
|
|
)
|
|
|
|
requestOptions = {
|
|
"num_ctx": 8192
|
|
}
|
|
if request.temperature is not None:
|
|
requestOptions["temperature"] = request.temperature
|
|
if request.maxTokens is not None:
|
|
requestOptions["num_predict"] = request.maxTokens
|
|
|
|
requestBody = {
|
|
"model": internalModelName,
|
|
"prompt": promptText,
|
|
"stream": False,
|
|
"options": requestOptions
|
|
}
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=3600.0) as client:
|
|
response = await client.post(
|
|
f"{CONFIG['ollamaUrl']}/api/generate",
|
|
json=requestBody
|
|
)
|
|
|
|
if response.status_code == 404:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f'Model "{request.model}" not found'
|
|
)
|
|
if response.status_code != 200:
|
|
raise HTTPException(
|
|
status_code=response.status_code,
|
|
detail=f"Ollama API error: {response.status_code} - {response.text[:200]}"
|
|
)
|
|
|
|
responseData = response.json()
|
|
responseText = responseData.get("response", "").strip()
|
|
promptEvalCount = int(responseData.get("prompt_eval_count", 0))
|
|
evalCount = int(responseData.get("eval_count", 0))
|
|
|
|
return OpenAiChatCompletionResponse(
|
|
id=f"chatcmpl-{uuid.uuid4().hex}",
|
|
created=int(time.time()),
|
|
model=request.model,
|
|
choices=[
|
|
OpenAiChatCompletionChoice(
|
|
index=0,
|
|
message=OpenAiChatMessage(role="assistant", content=responseText)
|
|
)
|
|
],
|
|
usage=OpenAiChatCompletionUsage(
|
|
promptTokens=promptEvalCount,
|
|
completionTokens=evalCount,
|
|
totalTokens=promptEvalCount + evalCount
|
|
)
|
|
)
|
|
|
|
except httpx.TimeoutException:
|
|
raise HTTPException(status_code=504, detail="Upstream timeout (Ollama)")
|
|
except httpx.ConnectError:
|
|
raise HTTPException(status_code=503, detail="Cannot connect to Ollama upstream")
|