Skip to content

Commit 412d06e

Browse files
authored
Merge pull request #30 from eddiedunn/k15kc8-codex/finalize-venice-provider-and-tests
Add Venice provider forwarding
2 parents b564e49 + a22840c commit 412d06e

File tree

5 files changed

+167
-7
lines changed

5 files changed

+167
-7
lines changed

.env.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ OPENROUTER_BASE_URL=https://openrouter.ai
2020
EXTERNAL_OPENROUTER_KEY=
2121
GROK_BASE_URL=https://api.groq.com
2222
EXTERNAL_GROK_KEY=
23+
# Base URL for Venice provider
2324
VENICE_BASE_URL=https://api.venice.ai
25+
# API key for Venice
2426
EXTERNAL_VENICE_KEY=
2527

2628
# Rate limiting settings

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
- llm-d cluster support (`make k3s-up` and router forwarding)
1111
- Redis caching layer with TTL (`REDIS_URL`, `CACHE_TTL`)
12+
- Venice provider forwarding support
1213

1314

1415
## [MVP Release]

IMPLEMENTATION_STATUS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ This section tracks features, integrations, and improvements to be implemented a
6363
- [x] OpenRouter
6464

6565
- Grok
66-
- Venice
66+
- [x] Venice
6767

6868
---
6969

router/main.py

Lines changed: 103 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from pathlib import Path
1515

1616

17-
1817
import httpx
1918
from fastapi import FastAPI, HTTPException, Request
2019
from fastapi.responses import Response, StreamingResponse, JSONResponse
@@ -191,6 +190,20 @@ async def _startup() -> None:
191190
logger.addHandler(stream_handler)
192191

193192

193+
194+
class Message(BaseModel):
195+
role: str
196+
content: str
197+
198+
199+
class ChatCompletionRequest(BaseModel):
200+
model: str
201+
messages: List[Message]
202+
max_tokens: Optional[int] = None
203+
temperature: Optional[float] = None
204+
stream: Optional[bool] = False
205+
206+
194207
class AgentRegistration(BaseModel):
195208
name: str
196209
endpoint: str
@@ -251,13 +264,9 @@ def make_cache_key(payload: ChatCompletionRequest) -> str:
251264
serialized = json.dumps(payload.dict(), sort_keys=True)
252265
digest = hashlib.sha256(serialized.encode()).hexdigest()
253266

254-
return digest
255-
256267

257268
return f"chat:{digest}"
258269

259-
260-
261270
async def forward_to_local_agent(payload: ChatCompletionRequest) -> dict:
262271
async with httpx.AsyncClient(base_url=LOCAL_AGENT_URL) as client:
263272
resp = await client.post("/infer", json=payload.dict())
@@ -364,7 +373,6 @@ async def forward_to_venice(payload: ChatCompletionRequest):
364373

365374
return await venice.forward(payload, VENICE_BASE_URL, EXTERNAL_VENICE_KEY)
366375

367-
368376
@app.post("/register")
369377
async def register_agent(payload: AgentRegistration) -> dict:
370378
"""Register a local agent and update the model registry."""
@@ -454,3 +462,92 @@ async def metrics() -> Response:
454462
"""Expose Prometheus metrics."""
455463

456464
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
465+
466+
467+
backend = select_backend(payload)
468+
469+
if backend == "local":
470+
return await forward_to_local_agent(payload)
471+
472+
if backend == "openai":
473+
return await forward_to_openai(payload)
474+
475+
cache_key = make_cache_key(payload)
476+
if not payload.stream:
477+
cached = await redis_client.get(cache_key)
478+
if cached:
479+
return json.loads(cached)
480+
481+
entry = MODEL_REGISTRY.get(payload.model)
482+
483+
if entry is not None:
484+
if entry.type == "local":
485+
data = await forward_to_local_agent(payload)
486+
if not payload.stream:
487+
await redis_client.setex(cache_key, CACHE_TTL, json.dumps(data))
488+
return data
489+
if entry.type == "openai":
490+
491+
return await forward_to_openai(payload)
492+
493+
if entry.type == "llm-d":
494+
return await forward_to_llmd(payload)
495+
496+
if entry.type == "anthropic":
497+
return await anthropic.forward(
498+
payload, ANTHROPIC_BASE_URL, EXTERNAL_ANTHROPIC_KEY
499+
)
500+
if entry.type == "google":
501+
return await google.forward(payload, GOOGLE_BASE_URL, EXTERNAL_GOOGLE_KEY)
502+
if entry.type == "openrouter":
503+
return await openrouter.forward(
504+
payload, OPENROUTER_BASE_URL, EXTERNAL_OPENROUTER_KEY
505+
)
506+
if entry.type == "grok":
507+
return await grok.forward(payload, GROK_BASE_URL, EXTERNAL_GROK_KEY)
508+
if entry.type == "venice":
509+
return await venice.forward(payload, VENICE_BASE_URL, EXTERNAL_VENICE_KEY)
510+
511+
data = await forward_to_openai(payload)
512+
if not payload.stream:
513+
await redis_client.setex(cache_key, CACHE_TTL, json.dumps(data))
514+
return data
515+
516+
if payload.model.startswith("local"):
517+
data = await forward_to_local_agent(payload)
518+
if not payload.stream:
519+
await redis_client.setex(cache_key, CACHE_TTL, json.dumps(data))
520+
return data
521+
522+
if payload.model.startswith("gpt-"):
523+
data = await forward_to_openai(payload)
524+
if not payload.stream:
525+
await redis_client.setex(cache_key, CACHE_TTL, json.dumps(data))
526+
return data
527+
528+
if payload.model.startswith("llmd-"):
529+
return await forward_to_llmd(payload)
530+
531+
dummy_text = "Hello world"
532+
response = {
533+
"id": f"cmpl-{uuid.uuid4().hex}",
534+
"object": "chat.completion",
535+
"created": int(time.time()),
536+
"model": payload.model,
537+
"choices": [
538+
{
539+
"index": 0,
540+
"message": {"role": "assistant", "content": dummy_text},
541+
"finish_reason": "stop",
542+
}
543+
],
544+
"usage": {
545+
"prompt_tokens": 0,
546+
"completion_tokens": 0,
547+
"total_tokens": 0,
548+
},
549+
}
550+
if not payload.stream:
551+
await redis_client.setex(cache_key, CACHE_TTL, json.dumps(response))
552+
return response
553+
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import httpx
2+
from fastapi import FastAPI
3+
from fastapi.testclient import TestClient
4+
5+
import router.main as router_main
6+
import router.registry as registry
7+
from sqlalchemy import create_engine
8+
9+
venice_app = FastAPI()
10+
11+
12+
@venice_app.post("/v1/chat/completions")
13+
async def _completions(payload: router_main.ChatCompletionRequest):
14+
user_msg = payload.messages[-1].content if payload.messages else ""
15+
content = f"Venice: {user_msg}"
16+
return {
17+
"id": "ven-1",
18+
"object": "chat.completion",
19+
"created": 0,
20+
"model": payload.model,
21+
"choices": [
22+
{
23+
"index": 0,
24+
"message": {"role": "assistant", "content": content},
25+
"finish_reason": "stop",
26+
}
27+
],
28+
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
29+
}
30+
31+
32+
def test_forward_to_venice(monkeypatch, tmp_path) -> None:
33+
monkeypatch.setattr(router_main, "VENICE_BASE_URL", "http://testserver")
34+
monkeypatch.setattr(router_main, "EXTERNAL_VENICE_KEY", "dummy")
35+
36+
db_path = tmp_path / "models.db"
37+
monkeypatch.setattr(router_main, "SQLITE_DB_PATH", str(db_path))
38+
registry.SQLITE_DB_PATH = str(db_path)
39+
registry.engine = create_engine(f"sqlite:///{db_path}")
40+
registry.SessionLocal = registry.sessionmaker(bind=registry.engine)
41+
registry.create_tables()
42+
with registry.get_session() as session:
43+
registry.upsert_model(session, "venus-1", "venice", "unused")
44+
45+
real_async_client = httpx.AsyncClient
46+
transport = httpx.ASGITransport(app=venice_app)
47+
48+
def client_factory(*args, **kwargs):
49+
return real_async_client(transport=transport, base_url="http://testserver")
50+
51+
monkeypatch.setattr(router_main.httpx, "AsyncClient", client_factory)
52+
53+
client = TestClient(router_main.app)
54+
payload = {
55+
"model": "venus-1",
56+
"messages": [{"role": "user", "content": "hi"}],
57+
}
58+
response = client.post("/v1/chat/completions", json=payload)
59+
assert response.status_code == 200
60+
assert response.json()["choices"][0]["message"]["content"] == "Venice: hi"

0 commit comments

Comments
 (0)