Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,21 @@ DOCKER_BUILD_ARGS := \
BUILD_DMR ?= 1

# Main targets
.PHONY: build run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers
.PHONY: build build-dmrlet run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers
# Default target
.DEFAULT_GOAL := build

# Build the Go application
build:
CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .

# Build dmrlet binary
build-dmrlet:
@echo "Building dmrlet..."
cd cmd/dmrlet && CGO_ENABLED=0 go build -ldflags="-s -w" -o dmrlet .
mv cmd/dmrlet/dmrlet .
@echo "Built: dmrlet"

# Run the application locally
run: build
@LLAMACPP_BIN="llamacpp/install/bin"; \
Expand All @@ -46,6 +53,7 @@ run: build
# Clean build artifacts
clean:
rm -f $(APP_NAME)
rm -f dmrlet
rm -f model-runner.sock
rm -rf $(MODELS_PATH)

Expand Down Expand Up @@ -148,6 +156,7 @@ docker-run-impl:
help:
@echo "Available targets:"
@echo " build - Build the Go application"
@echo " build-dmrlet - Build dmrlet binary (lightweight node agent)"
@echo " run - Run the application locally"
@echo " clean - Clean build artifacts"
@echo " test - Run tests"
Expand Down
62 changes: 62 additions & 0 deletions cmd/dmrlet/commands/list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package commands

import (
"fmt"
"os"

"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
)

func newListCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "list",
Aliases: []string{"ls"},
Short: "List running models",
Long: `List all running inference models managed by dmrlet.

Examples:
dmrlet list
dmrlet ls`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
return runList(cmd)
},
}

return cmd
}

func runList(cmd *cobra.Command) error {
ctx := cmd.Context()

if err := initManager(ctx); err != nil {
return fmt.Errorf("initializing manager: %w", err)
}

running, err := manager.List(ctx)
if err != nil {
return fmt.Errorf("listing models: %w", err)
}

if len(running) == 0 {
cmd.Println("No running models")
return nil
}

table := tablewriter.NewTable(os.Stdout,
tablewriter.WithHeader([]string{"MODEL", "BACKEND", "PORT", "ENDPOINT"}),
)

for _, m := range running {
table.Append([]string{
m.ModelRef,
string(m.Backend),
fmt.Sprintf("%d", m.Port),
m.Endpoint,
})
}

table.Render()
return nil
}
44 changes: 44 additions & 0 deletions cmd/dmrlet/commands/pull.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package commands

import (
"fmt"
"os"

"github.com/spf13/cobra"
)

func newPullCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "pull MODEL",
Short: "Pull a model without serving",
Long: `Pull a model from Docker Hub or HuggingFace without starting an inference container.
This is useful for pre-downloading models.

Examples:
dmrlet pull ai/smollm2
dmrlet pull huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runPull(cmd, args[0])
},
}

return cmd
}

func runPull(cmd *cobra.Command, modelRef string) error {
ctx := cmd.Context()

if err := initStore(); err != nil {
return fmt.Errorf("initializing store: %w", err)
}

cmd.Printf("Pulling model: %s\n", modelRef)

if err := store.EnsureModel(ctx, modelRef, os.Stdout); err != nil {
return fmt.Errorf("pulling model: %w", err)
}

cmd.Printf("\nModel pulled successfully: %s\n", modelRef)
return nil
}
140 changes: 140 additions & 0 deletions cmd/dmrlet/commands/root.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Package commands implements the dmrlet CLI commands.
package commands

import (
"context"
"os"
"os/signal"
"syscall"

"github.com/docker/model-runner/pkg/dmrlet/inference"
"github.com/docker/model-runner/pkg/dmrlet/models"
"github.com/docker/model-runner/pkg/dmrlet/runtime"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)

var (
// Global flags
verbose bool
logJSON bool

// Shared state
log *logrus.Entry
store *models.Store
rt *runtime.Runtime
manager *inference.Manager
)

// rootCmd is the root command for dmrlet.
var rootCmd = &cobra.Command{
Use: "dmrlet",
Short: "Lightweight node agent for Docker Model Runner",
Long: `dmrlet is a lightweight node agent for Docker Model Runner - a "Kubelet for AI"
that runs inference containers directly with zero YAML overhead.

Example:
dmrlet serve ai/smollm2
# Pulls model, starts inference container, exposes OpenAI API at http://localhost:30000/v1`,
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
// Skip initialization for help and version commands
if cmd.Name() == "help" || cmd.Name() == "version" {
return nil
}

// Setup logging
logger := logrus.New()
if verbose {
logger.SetLevel(logrus.DebugLevel)
} else {
logger.SetLevel(logrus.InfoLevel)
}
if logJSON {
logger.SetFormatter(&logrus.JSONFormatter{})
}

// Check DMRLET_LOG_LEVEL environment variable
if level := os.Getenv("DMRLET_LOG_LEVEL"); level != "" {
if lvl, err := logrus.ParseLevel(level); err == nil {
logger.SetLevel(lvl)
}
}

log = logger.WithField("component", "dmrlet")

return nil
},
SilenceUsage: true,
SilenceErrors: true,
}

// Execute runs the root command.
func Execute() error {
// Setup context with signal handling
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer cancel()

return rootCmd.ExecuteContext(ctx)
}

func init() {
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")
rootCmd.PersistentFlags().BoolVar(&logJSON, "log-json", false, "Output logs in JSON format")

rootCmd.AddCommand(
newServeCmd(),
newStopCmd(),
newListCmd(),
newPullCmd(),
newVersionCmd(),
)
}

// initStore initializes the model store.
func initStore() error {
if store != nil {
return nil
}

var err error
store, err = models.NewStore(
models.WithLogger(log),
)
if err != nil {
return err
}
return nil
}

// initRuntime initializes the containerd runtime.
func initRuntime(ctx context.Context) error {
if rt != nil {
return nil
}

var err error
rt, err = runtime.NewRuntime(ctx,
runtime.WithRuntimeLogger(log),
)
if err != nil {
return err
}
return nil
}

// initManager initializes the inference manager.
func initManager(ctx context.Context) error {
if err := initStore(); err != nil {
return err
}
if err := initRuntime(ctx); err != nil {
return err
}

if manager == nil {
manager = inference.NewManager(store, rt,
inference.WithManagerLogger(log),
)
}
return nil
}
99 changes: 99 additions & 0 deletions cmd/dmrlet/commands/serve.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package commands

import (
"fmt"
"os"
"os/signal"
"syscall"

"github.com/docker/model-runner/pkg/dmrlet/inference"
"github.com/spf13/cobra"
)

type serveFlags struct {
port int
backend string
gpu bool
detach bool
}

func newServeCmd() *cobra.Command {
flags := &serveFlags{}

cmd := &cobra.Command{
Use: "serve MODEL",
Short: "Serve a model (pull if needed, start container, wait for ready)",
Long: `Serve a model by pulling it if needed, starting an inference container,
and waiting for it to be ready. The model will be exposed on an OpenAI-compatible API.

Examples:
dmrlet serve ai/smollm2
dmrlet serve ai/smollm2 --port 8080
dmrlet serve ai/smollm2 --gpu
dmrlet serve ai/smollm2 --backend vllm --gpu
dmrlet serve ai/smollm2 -d # detached mode`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runServe(cmd, args[0], flags)
},
}

cmd.Flags().IntVarP(&flags.port, "port", "p", 0, "Port to expose the API on (auto-allocated if not specified)")
cmd.Flags().StringVarP(&flags.backend, "backend", "b", "", "Inference backend (llama-server, vllm)")
cmd.Flags().BoolVar(&flags.gpu, "gpu", false, "Enable GPU support")
cmd.Flags().BoolVarP(&flags.detach, "detach", "d", false, "Run in detached mode (return immediately)")

return cmd
}

func runServe(cmd *cobra.Command, modelRef string, flags *serveFlags) error {
ctx := cmd.Context()

if err := initManager(ctx); err != nil {
return fmt.Errorf("initializing manager: %w", err)
}

opts := inference.ServeOptions{
Port: flags.port,
Backend: flags.backend,
GPU: flags.gpu,
Detach: flags.detach,
Progress: os.Stdout,
}

running, err := manager.Serve(ctx, modelRef, opts)
if err != nil {
return fmt.Errorf("serving model: %w", err)
}

cmd.Printf("\nModel %s is ready!\n", modelRef)
cmd.Printf("Endpoint: %s\n", running.Endpoint)
cmd.Printf("Backend: %s\n", running.Backend)
cmd.Printf("Port: %d\n", running.Port)
cmd.Println()
cmd.Printf("Example usage:\n")
cmd.Printf(" curl %s/chat/completions -H 'Content-Type: application/json' \\\n", running.Endpoint)
cmd.Printf(" -d '{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"Hello!\"}]}'\n", modelRef)

if flags.detach {
return nil
}

// Wait for interrupt signal
cmd.Println()
cmd.Println("Press Ctrl+C to stop the model...")

sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
<-sigCh

cmd.Println()
cmd.Println("Stopping model...")

if err := manager.Stop(ctx, modelRef); err != nil {
return fmt.Errorf("stopping model: %w", err)
}

cmd.Println("Model stopped.")
return nil
}
Loading
Loading