AIMOverse
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 1 deletion b/‎.gitignore‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 94 additions & 2 deletions b/‎README.md‎
Lines changed: 94 additions & 2 deletions
diff --git a/‎app/api/routes/chat.py‎
Lines changed: 117 additions & 2 deletions b/‎app/api/routes/chat.py‎
Lines changed: 117 additions & 2 deletions
diff --git a/‎static/.keep‎ ‎app/clients/__init__.py‎static/.keep renamed to app/clients/__init__.py b/‎static/.keep‎ ‎app/clients/__init__.py‎static/.keep renamed to app/clients/__init__.py
@@ -325,4 +325,6 @@ data/prompts/*
 .db
 
 # VS Code
-.vscode/
+.vscode/
+
+.env.litellm
@@ -84,6 +84,67 @@ The following environment variables are used by AIMO:
 | `SECRET_KEY`      | Secret Key for JWT Tokens                        | Yes      | During running applications |
 | `ADMIN_API_KEY`   | Admin Key for manage invitation codes            | Yes      | During running applications |
 
+## LiteLLM Proxy Service
+
+AIMO integrates with LiteLLM Proxy to provide multi-provider LLM support through a unified interface. This enables routing to different LLM providers (OpenAI, Anthropic, OpenRouter, local models) with automatic fallback capabilities.
+
+### Setup LiteLLM Service
+
+1. **Configure Environment Variables**
+
+   Add the following to your `.env` file:
+
+   ```bash
+   # LiteLLM Proxy Configuration
+   LLM_BASE_URL=http://localhost:4000
+   LLM_API_KEY=sk-litellm-proxy-key
+   LLM_MODEL_DEFAULT=prod_default
+   LLM_TIMEOUT=60
+   
+   # LiteLLM Master Key
+   LITELLM_MASTER_KEY=sk-litellm-proxy-key
+   
+   # Provider API Keys (add as needed)
+   OPENROUTER_API_KEY=your_openrouter_key_here
+   ```
+
+2. **Start LiteLLM Proxy Service**
+
+   ```bash
+   # Navigate to the LiteLLM directory
+   cd infra/litellm
+   
+   # Start the LiteLLM Proxy using Docker Compose
+   docker-compose -f docker-compose.litellm.yml up -d
+   ```
+
+3. **Verify LiteLLM Service**
+
+   ```bash
+   # Check if LiteLLM Proxy is running
+   curl http://localhost:4000/health
+   
+   # Test the new endpoints
+   curl -X GET http://localhost:8000/api/v1.0.0/chat/health
+   curl -X GET http://localhost:8000/api/v1.0.0/chat/models
+   ```
+
+4. **Stop LiteLLM Service**
+
+   ```bash
+   # Stop the LiteLLM Proxy
+   cd infra/litellm
+   docker-compose -f docker-compose.litellm.yml down
+   ```
+
+### LiteLLM Benefits
+
+- **Multi-Provider Support**: Route to different LLM providers through one interface
+- **Automatic Fallbacks**: Fallback to alternative models if primary fails
+- **Cost Optimization**: Route to cheaper models when appropriate
+- **Local Model Integration**: Support for local models via Ollama
+- **Centralized Configuration**: Manage all LLM configurations in one place
+
 ## Usage
 
 Start the AIMO server using the following command:
@@ -117,9 +178,9 @@ coverage html --title "${@-coverage}"
 ### Version: `1.0.0`
 
 The AIMO backend provides a RESTful API for interaction. The version 1.0.0 of the server has a base url of /api/v1.0.0.
-Below is an example of the main endpoint:
+Below are the main endpoints:
 
-### Endpoint: `/api/v1.0.0/chat/`
+### Original AIMO Endpoint: `/api/v1.0.0/chat/`
 
 #### Method: `POST`
 
@@ -135,6 +196,37 @@ Below is an example of the main endpoint:
 }
 ```
 
+### LiteLLM Proxy Endpoints
+
+#### Chat Completion: `/api/v1.0.0/chat/completions_proxy`
+
+**Method:** `POST`
+
+**Request Body:**
+```json
+{
+    "model": "prod_default",
+    "messages": [
+        {"role": "user", "content": "Hello!"}
+    ],
+    "temperature": 0.7,
+    "max_tokens": 100,
+    "stream": false
+}
+```
+
+#### Available Models: `/api/v1.0.0/chat/models`
+
+**Method:** `GET`
+
+Returns list of available LLM models configured in LiteLLM.
+
+#### Health Check: `/api/v1.0.0/chat/health`
+
+**Method:** `GET`
+
+Returns LiteLLM Proxy connection status.
+
 #### Response:
 ```json
 {
 
@@ -1,9 +1,10 @@
 import logging
-from typing import Union
-from fastapi import APIRouter
+from typing import Union, List, Dict, Any
+from fastapi import APIRouter, HTTPException
 from sse_starlette.sse import EventSourceResponse
 
 from app.ai.aimo import AIMO
+from app.clients.llm_client import llm_client
 
 logger = logging.getLogger(__name__)
 from app.models.openai import (
@@ -12,6 +13,7 @@
     ChatChoice,
     Message
 )
+from app.models.llm import LLMChatRequest, LLMChatResponse, LLMChoice, LLMMessage, LLMUsage
 
 """
 Author: Jack Pan, Wesley Xu
@@ -52,3 +54,116 @@ async def create_chat_completion(request: ChatCompletionRequest) -> Union[ChatCo
             max_new_tokens=request.max_tokens
         )
     )
+
+
+@router.post("/completions_proxy", response_model=LLMChatResponse)
+async def create_chat_completion_proxy(request: LLMChatRequest) -> Union[LLMChatResponse, EventSourceResponse]:
+    """
+    LiteLLM Proxy chat completion endpoint.
+    
+    This route demonstrates using the new LLM client to interact with LiteLLM Proxy,
+    which can route to multiple LLM providers (OpenAI, Anthropic, local models, etc.).
+    """
+    try:
+        # Convert pydantic models to dict format expected by openai client
+        messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
+        
+        if not request.stream:
+            # Non-streaming response
+            response = await llm_client.chat(
+                messages=messages,
+                model=request.model,
+                temperature=request.temperature,
+                max_tokens=request.max_tokens,
+                tools=request.tools,
+                tool_choice=request.tool_choice,
+                presence_penalty=request.presence_penalty,
+                frequency_penalty=request.frequency_penalty,
+                top_p=request.top_p,
+                user=request.user
+            )
+            
+            # Convert OpenAI response to our LLM response format
+            choices = []
+            for choice in response.choices:
+                llm_choice = LLMChoice(
+                    index=choice.index,
+                    message=LLMMessage(
+                        role=choice.message.role,
+                        content=choice.message.content or ""
+                    ),
+                    finish_reason=choice.finish_reason
+                )
+                choices.append(llm_choice)
+            
+            usage = None
+            if response.usage:
+                usage = LLMUsage(
+                    prompt_tokens=response.usage.prompt_tokens,
+                    completion_tokens=response.usage.completion_tokens,
+                    total_tokens=response.usage.total_tokens
+                )
+            
+            return LLMChatResponse(
+                id=response.id,
+                model=response.model,
+                choices=choices,
+                usage=usage
+            )
+        else:
+            # Streaming response
+            async def stream_generator():
+                async for chunk in await llm_client.chat(
+                    messages=messages,
+                    model=request.model,
+                    temperature=request.temperature,
+                    max_tokens=request.max_tokens,
+                    stream=True,
+                    tools=request.tools,
+                    tool_choice=request.tool_choice,
+                    presence_penalty=request.presence_penalty,
+                    frequency_penalty=request.frequency_penalty,
+                    top_p=request.top_p,
+                    user=request.user
+                ):
+                    if chunk.choices:
+                        choice = chunk.choices[0]
+                        if choice.delta and choice.delta.get('content'):
+                            yield f"data: {choice.delta['content']}\n\n"
+                
+                yield "data: [DONE]\n\n"
+            
+            return EventSourceResponse(stream_generator())
+            
+    except Exception as e:
+        logger.error(f"Error in chat_proxy: {e}")
+        raise HTTPException(status_code=500, detail=f"LLM service error: {str(e)}")
+
+
+@router.get("/models")
+async def list_available_models() -> Dict[str, List[str]]:
+    """Get available models from LiteLLM Proxy."""
+    try:
+        models = await llm_client.get_available_models()
+        return {"models": models}
+    except Exception as e:
+        logger.error(f"Error getting models: {e}")
+        raise HTTPException(status_code=500, detail=f"Error fetching models: {str(e)}")
+
+
+@router.get("/health")
+async def health_check() -> Dict[str, Any]:
+    """Health check for LiteLLM Proxy connection."""
+    try:
+        is_healthy = await llm_client.health_check()
+        return {
+            "status": "healthy" if is_healthy else "unhealthy",
+            "llm_proxy_connected": is_healthy
+        }
+    except Exception as e:
+        logger.error(f"Health check error: {e}")
+        return {
+            "status": "unhealthy",
+            "llm_proxy_connected": False,
+            "error": str(e)
+        }