Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- Updated provider streaming logic for HTTPX 0.28 and closed TestClient sessions in tests
- CLI command `list-models` to display registry entries
- Module-level docstrings across router and local agent modules
- Optional shared-secret auth middleware (`ROUTER_SHARED_SECRET`)



Expand Down
5 changes: 5 additions & 0 deletions docs/router_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This API currently supports the following:
- Local agent forwarding (only vllm, Docker-based workers)
- Proxying to OpenAI
- SQLite-backed model registry
- Optional shared secret auth via the `Authorization` header


**Note:** Features such as caching, rate limiting, smart routing,
Expand Down Expand Up @@ -104,6 +105,7 @@ HF_CACHE_DIR=data/hf_models
HF_DEVICE=cpu
HUGGING_FACE_HUB_TOKEN=
RATE_LIMIT_REQUESTS=60
ROUTER_SHARED_SECRET=
```


Expand All @@ -114,6 +116,9 @@ RATE_LIMIT_REQUESTS=60
For OpenRouter, both `OPENROUTER_BASE_URL` and `EXTERNAL_OPENROUTER_KEY` must be
set before the router can forward requests to the service.

If `ROUTER_SHARED_SECRET` is set the router requires
`Authorization: Bearer <secret>` on every request.

The code defines a few tuning variables reserved for future smart routing.
They can be set as environment variables or placed under `[tool.router]` in
`pyproject.toml`:
Expand Down
6 changes: 6 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,10 @@ curl -X POST http://localhost:8000/v1/chat/completions \
-d '{"model":"local_mistral","messages":[{"role":"user","content":"hello"}]}'
```

If `ROUTER_SHARED_SECRET` is configured, include an Authorization header:

```bash
curl -H 'Authorization: Bearer mysecret' ...
```

Requests for models prefixed with `local` are forwarded to the Local Agent.
19 changes: 19 additions & 0 deletions router/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

from fastapi import Request
from fastapi.responses import JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware

from .settings import settings


class AuthMiddleware(BaseHTTPMiddleware):
"""Middleware enforcing a shared-secret Authorization header."""

async def dispatch(self, request: Request, call_next):
secret = settings.shared_secret
if secret:
header = request.headers.get("Authorization")
if header != f"Bearer {secret}":
return JSONResponse({"detail": "Unauthorized"}, status_code=401)
return await call_next(request)
5 changes: 5 additions & 0 deletions router/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@

from starlette.middleware.base import BaseHTTPMiddleware

from .auth import AuthMiddleware
from .settings import settings

from prometheus_client import (
Counter,
Histogram,
Expand Down Expand Up @@ -201,6 +204,8 @@ async def dispatch(self, request: Request, call_next):

app = FastAPI(title="Intelligent Inference Router")
app.add_middleware(RateLimitMiddleware)
if settings.shared_secret:
app.add_middleware(AuthMiddleware)

MODEL_REGISTRY: dict[str, ModelEntry] = {}

Expand Down
14 changes: 14 additions & 0 deletions router/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

import os
from dataclasses import dataclass


@dataclass
class Settings:
"""Application configuration loaded from environment variables."""

shared_secret: str | None = os.getenv("ROUTER_SHARED_SECRET")


settings = Settings()
67 changes: 67 additions & 0 deletions tests/router/test_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import importlib
import os
import sys

from fastapi.testclient import TestClient

import router.registry as registry
from sqlalchemy import create_engine


def reload_router(secret: str | None):
os.environ.pop("ROUTER_SHARED_SECRET", None)
if secret is not None:
os.environ["ROUTER_SHARED_SECRET"] = secret
if "router.main" in sys.modules:
rm = sys.modules["router.main"]
from prometheus_client import REGISTRY

for metric in ("REQUEST_COUNTER", "CACHE_HIT_COUNTER", "REQUEST_LATENCY"):
collector = getattr(rm, metric, None)
if collector is not None:
try:
REGISTRY.unregister(collector)
except KeyError:
pass
if "router.settings" in sys.modules:
importlib.reload(sys.modules["router.settings"])
if "router.auth" in sys.modules:
importlib.reload(sys.modules["router.auth"])
if "router.main" in sys.modules:
importlib.reload(sys.modules["router.main"])
return importlib.import_module("router.main")


def setup_db(monkeypatch, tmp_path, router_main):
db_path = tmp_path / "models.db"
monkeypatch.setattr(router_main, "SQLITE_DB_PATH", str(db_path))
registry.SQLITE_DB_PATH = str(db_path)
registry.engine = create_engine(f"sqlite:///{db_path}")
registry.SessionLocal = registry.sessionmaker(bind=registry.engine)
registry.create_tables()
router_main.load_registry()


def test_unauthorized(monkeypatch, tmp_path):
router_main = reload_router("s3cret")
try:
setup_db(monkeypatch, tmp_path, router_main)
with TestClient(router_main.app) as client:
payload = {"model": "dummy", "messages": [{"role": "user", "content": "x"}]}
resp = client.post("/v1/chat/completions", json=payload)
assert resp.status_code == 401
finally:
reload_router(None)


def test_authorized(monkeypatch, tmp_path):
router_main = reload_router("s3cret")
try:
setup_db(monkeypatch, tmp_path, router_main)
with TestClient(router_main.app) as client:
payload = {"model": "dummy", "messages": [{"role": "user", "content": "x"}]}
headers = {"Authorization": "Bearer s3cret"}
resp = client.post("/v1/chat/completions", json=payload, headers=headers)
assert resp.status_code == 200
finally:
reload_router(None)
Loading