diff --git a/app.py b/app.py index 40a1a73..1e00470 100644 --- a/app.py +++ b/app.py @@ -17,6 +17,7 @@ import json import re import logging import time +import uuid from collections import defaultdict from typing import Optional, List, Dict, Any from contextlib import asynccontextmanager @@ -55,6 +56,7 @@ def _loadConfig() -> Dict[str, Any]: configPath = os.path.join(os.path.dirname(__file__), "config.ini") config = { "apiKey": None, + "cursorApiKey": None, "ollamaUrl": "http://localhost:11434", "authUsername": "poweron", "authPassword": "poweron", @@ -78,6 +80,8 @@ def _loadConfig() -> Dict[str, Any]: # Map config keys if key == "PRIVATE_LLM_API_KEY": config["apiKey"] = value + elif key == "CURSOR_API_KEY": + config["cursorApiKey"] = value elif key == "OLLAMA_URL": config["ollamaUrl"] = value elif key == "AUTH_USERNAME": @@ -95,6 +99,7 @@ def _loadConfig() -> Dict[str, Any]: # Override with environment variables if set config["apiKey"] = os.environ.get("PRIVATE_LLM_API_KEY", config["apiKey"]) + config["cursorApiKey"] = os.environ.get("CURSOR_API_KEY", config["cursorApiKey"]) config["ollamaUrl"] = os.environ.get("OLLAMA_URL", config["ollamaUrl"]) config["authUsername"] = os.environ.get("AUTH_USERNAME", config["authUsername"]) config["authPassword"] = os.environ.get("AUTH_PASSWORD", config["authPassword"]) @@ -243,6 +248,59 @@ class OllamaStatusResponse(BaseModel): totalModels: Optional[int] = None error: Optional[str] = None + +class OpenAiModelInfo(BaseModel): + """OpenAI-compatible model object.""" + id: str + object: str = "model" + created: int + ownedBy: str = Field(default="poweron", alias="owned_by") + + +class OpenAiModelsResponse(BaseModel): + """OpenAI-compatible models list response.""" + object: str = "list" + data: List[OpenAiModelInfo] + + +class OpenAiChatMessage(BaseModel): + """OpenAI-compatible chat message.""" + role: str + content: Any + + +class OpenAiChatCompletionRequest(BaseModel): + """OpenAI-compatible chat completion request.""" + model: str + messages: List[OpenAiChatMessage] + stream: Optional[bool] = False + maxTokens: Optional[int] = Field(default=None, alias="max_tokens") + temperature: Optional[float] = None + + +class OpenAiChatCompletionChoice(BaseModel): + """OpenAI-compatible completion choice.""" + index: int + message: OpenAiChatMessage + finishReason: str = Field(default="stop", alias="finish_reason") + + +class OpenAiChatCompletionUsage(BaseModel): + """OpenAI-compatible token usage.""" + promptTokens: int = Field(default=0, alias="prompt_tokens") + completionTokens: int = Field(default=0, alias="completion_tokens") + totalTokens: int = Field(default=0, alias="total_tokens") + + +class OpenAiChatCompletionResponse(BaseModel): + """OpenAI-compatible chat completion response.""" + id: str + object: str = "chat.completion" + created: int + model: str + choices: List[OpenAiChatCompletionChoice] + usage: OpenAiChatCompletionUsage + # ============================================================================ # PDF Helper Functions # ============================================================================ @@ -322,6 +380,46 @@ def _getExternalModelName(internalName: str) -> str: """Get the external model name from internal Ollama name.""" return INTERNAL_TO_EXTERNAL.get(internalName, internalName) + +def _contentToText(content: Any) -> str: + """Normalize OpenAI message content into plain text.""" + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + textParts = [] + for part in content: + if isinstance(part, str): + textParts.append(part) + continue + if isinstance(part, dict): + partText = part.get("text") + if isinstance(partText, str): + textParts.append(partText) + return "\n".join([part for part in textParts if part.strip()]) + if isinstance(content, dict): + contentText = content.get("text") + if isinstance(contentText, str): + return contentText + return str(content) + + +def _messagesToPrompt(messages: List[OpenAiChatMessage]) -> str: + """Convert OpenAI chat messages to a single prompt for Ollama generate.""" + promptLines = [] + for message in messages: + normalizedText = _contentToText(message.content).strip() + if not normalizedText: + continue + promptLines.append(f"{message.role}: {normalizedText}") + + if not promptLines: + return "" + + promptLines.append("assistant:") + return "\n\n".join(promptLines) + # ============================================================================ # Authentication & Rate Limiting # ============================================================================ @@ -342,6 +440,28 @@ async def _verifyApiKey(xApiKey: Optional[str] = Header(None, alias="X-API-Key") return xApiKey +async def _verifyCursorApiKey(authorization: Optional[str] = Header(None)) -> str: + """Verify Bearer token for Cursor OpenAI-compatible endpoints.""" + expectedApiKey = CONFIG.get("cursorApiKey") + if not expectedApiKey: + raise HTTPException( + status_code=503, + detail="Cursor API key not configured on server" + ) + + if not authorization: + raise HTTPException(status_code=401, detail="Authorization header required") + + if not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Bearer token required") + + providedApiKey = authorization[len("Bearer "):].strip() + if providedApiKey != expectedApiKey: + raise HTTPException(status_code=401, detail="Invalid API key") + + return providedApiKey + + async def _checkRateLimit(apiKey: str = Depends(_verifyApiKey)) -> str: """Check rate limit for the authenticated API key.""" allowed, info = rateLimiter.isAllowed(apiKey) @@ -474,6 +594,128 @@ async def _listModels(authenticated: bool = Depends(_verifyApiKey)): return models + +@app.get("/v1/models", response_model=OpenAiModelsResponse, tags=["OpenAI Compatible"]) +async def _listOpenAiModels(cursorApiKey: str = Depends(_verifyCursorApiKey)): + """OpenAI-compatible models endpoint for Cursor.""" + createdAt = int(time.time()) + modelData = [] + for externalName in MODEL_MAPPING.keys(): + modelData.append( + OpenAiModelInfo( + id=externalName, + created=createdAt + ) + ) + return OpenAiModelsResponse(data=modelData) + + +@app.post( + "/v1/chat/completions", + response_model=OpenAiChatCompletionResponse, + tags=["OpenAI Compatible"] +) +async def _openAiChatCompletions( + request: OpenAiChatCompletionRequest, + cursorApiKey: str = Depends(_verifyCursorApiKey) +): + """OpenAI-compatible chat completions endpoint for Cursor.""" + if request.stream: + raise HTTPException( + status_code=400, + detail="Streaming is not supported by this endpoint" + ) + + allowed, info = rateLimiter.isAllowed(f"cursor:{cursorApiKey}") + if not allowed: + raise HTTPException( + status_code=429, + detail={ + "error": "Rate limit exceeded", + "message": f"Too many requests. Please retry after {info['retryAfter']} seconds.", + "retryAfter": info["retryAfter"], + "limit": info["limit"], + "remaining": info["remaining"] + }, + headers={ + "Retry-After": str(int(info["retryAfter"])), + "X-RateLimit-Limit": str(info["limit"]), + "X-RateLimit-Remaining": str(info["remaining"]), + "X-RateLimit-Reset": str(info["resetSeconds"]) + } + ) + + promptText = _messagesToPrompt(request.messages).strip() + if not promptText: + raise HTTPException(status_code=400, detail="messages must contain text content") + + internalModelName = _getInternalModelName(request.model) + if _isVisionModel(internalModelName): + raise HTTPException( + status_code=400, + detail="Vision models are not supported on /v1/chat/completions" + ) + + requestOptions = { + "num_ctx": 8192 + } + if request.temperature is not None: + requestOptions["temperature"] = request.temperature + if request.maxTokens is not None: + requestOptions["num_predict"] = request.maxTokens + + requestBody = { + "model": internalModelName, + "prompt": promptText, + "stream": False, + "options": requestOptions + } + + try: + async with httpx.AsyncClient(timeout=3600.0) as client: + response = await client.post( + f"{CONFIG['ollamaUrl']}/api/generate", + json=requestBody + ) + + if response.status_code == 404: + raise HTTPException( + status_code=404, + detail=f'Model "{request.model}" not found' + ) + if response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=f"Ollama API error: {response.status_code} - {response.text[:200]}" + ) + + responseData = response.json() + responseText = responseData.get("response", "").strip() + promptEvalCount = int(responseData.get("prompt_eval_count", 0)) + evalCount = int(responseData.get("eval_count", 0)) + + return OpenAiChatCompletionResponse( + id=f"chatcmpl-{uuid.uuid4().hex}", + created=int(time.time()), + model=request.model, + choices=[ + OpenAiChatCompletionChoice( + index=0, + message=OpenAiChatMessage(role="assistant", content=responseText) + ) + ], + usage=OpenAiChatCompletionUsage( + promptTokens=promptEvalCount, + completionTokens=evalCount, + totalTokens=promptEvalCount + evalCount + ) + ) + + except httpx.TimeoutException: + raise HTTPException(status_code=504, detail="Upstream timeout (Ollama)") + except httpx.ConnectError: + raise HTTPException(status_code=503, detail="Cannot connect to Ollama upstream") + @app.get("/api/ollama/status", response_model=OllamaStatusResponse, tags=["System"]) async def _ollamaStatus(): """Check Ollama connection status and list available models.""" diff --git a/config.ini b/config.ini index cfe9374..baf9e7c 100644 --- a/config.ini +++ b/config.ini @@ -6,6 +6,10 @@ # Key generieren: python -c "import secrets; print(secrets.token_urlsafe(32))" PRIVATE_LLM_API_KEY = jL4vyNfh_tv4rxoRaHKW88sVWNHbj32GsxuKE2A8bf0 +# Separater API Key für Cursor (OpenAI-kompatible /v1 Endpoints) +# Cursor sendet diesen Key als Authorization: Bearer +CURSOR_API_KEY = tQGh9-nsjBg9Dv2sm2Y97u7rcjYyuR5Kkwc6VliPeGc + # Ollama Server URL OLLAMA_URL = http://localhost:11434