Skip to content

Commit 644f3d2

Browse files
committed
add dmrlet - lightweight node agent for Docker Model Runner
dmrlet is a "Kubelet for AI" that runs inference containers directly with zero YAML overhead. It provides a simple CLI to serve models: dmrlet serve ai/smollm2 # Pulls model, starts inference container, exposes OpenAI API Key features: - Reuses existing pkg/distribution for model management - containerd integration for container lifecycle - GPU detection and passthrough (NVIDIA/AMD) - Auto port allocation (30000-30999 range) - Health checking with configurable timeout - Backend auto-detection (llama-server for GGUF, vLLM for safetensors) Commands: serve, stop, list, pull, version Signed-off-by: Eric Curtin <eric.curtin@docker.com>
1 parent 232f06b commit 644f3d2

File tree

20 files changed

+3893
-275
lines changed

20 files changed

+3893
-275
lines changed

Makefile

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,21 @@ DOCKER_BUILD_ARGS := \
2626
BUILD_DMR ?= 1
2727

2828
# Main targets
29-
.PHONY: build run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers
29+
.PHONY: build build-dmrlet run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers
3030
# Default target
3131
.DEFAULT_GOAL := build
3232

3333
# Build the Go application
3434
build:
3535
CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .
3636

37+
# Build dmrlet binary
38+
build-dmrlet:
39+
@echo "Building dmrlet..."
40+
cd cmd/dmrlet && CGO_ENABLED=0 go build -ldflags="-s -w" -o dmrlet .
41+
mv cmd/dmrlet/dmrlet .
42+
@echo "Built: dmrlet"
43+
3744
# Run the application locally
3845
run: build
3946
@LLAMACPP_BIN="llamacpp/install/bin"; \
@@ -46,6 +53,7 @@ run: build
4653
# Clean build artifacts
4754
clean:
4855
rm -f $(APP_NAME)
56+
rm -f dmrlet
4957
rm -f model-runner.sock
5058
rm -rf $(MODELS_PATH)
5159

@@ -148,6 +156,7 @@ docker-run-impl:
148156
help:
149157
@echo "Available targets:"
150158
@echo " build - Build the Go application"
159+
@echo " build-dmrlet - Build dmrlet binary (lightweight node agent)"
151160
@echo " run - Run the application locally"
152161
@echo " clean - Clean build artifacts"
153162
@echo " test - Run tests"

cmd/dmrlet/commands/list.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/olekukonko/tablewriter"
8+
"github.com/spf13/cobra"
9+
)
10+
11+
func newListCmd() *cobra.Command {
12+
cmd := &cobra.Command{
13+
Use: "list",
14+
Aliases: []string{"ls"},
15+
Short: "List running models",
16+
Long: `List all running inference models managed by dmrlet.
17+
18+
Examples:
19+
dmrlet list
20+
dmrlet ls`,
21+
Args: cobra.NoArgs,
22+
RunE: func(cmd *cobra.Command, args []string) error {
23+
return runList(cmd)
24+
},
25+
}
26+
27+
return cmd
28+
}
29+
30+
func runList(cmd *cobra.Command) error {
31+
ctx := cmd.Context()
32+
33+
if err := initManager(ctx); err != nil {
34+
return fmt.Errorf("initializing manager: %w", err)
35+
}
36+
37+
running, err := manager.List(ctx)
38+
if err != nil {
39+
return fmt.Errorf("listing models: %w", err)
40+
}
41+
42+
if len(running) == 0 {
43+
cmd.Println("No running models")
44+
return nil
45+
}
46+
47+
table := tablewriter.NewTable(os.Stdout,
48+
tablewriter.WithHeader([]string{"MODEL", "BACKEND", "PORT", "ENDPOINT"}),
49+
)
50+
51+
for _, m := range running {
52+
table.Append([]string{
53+
m.ModelRef,
54+
string(m.Backend),
55+
fmt.Sprintf("%d", m.Port),
56+
m.Endpoint,
57+
})
58+
}
59+
60+
table.Render()
61+
return nil
62+
}

cmd/dmrlet/commands/pull.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/spf13/cobra"
8+
)
9+
10+
func newPullCmd() *cobra.Command {
11+
cmd := &cobra.Command{
12+
Use: "pull MODEL",
13+
Short: "Pull a model without serving",
14+
Long: `Pull a model from Docker Hub or HuggingFace without starting an inference container.
15+
This is useful for pre-downloading models.
16+
17+
Examples:
18+
dmrlet pull ai/smollm2
19+
dmrlet pull huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf`,
20+
Args: cobra.ExactArgs(1),
21+
RunE: func(cmd *cobra.Command, args []string) error {
22+
return runPull(cmd, args[0])
23+
},
24+
}
25+
26+
return cmd
27+
}
28+
29+
func runPull(cmd *cobra.Command, modelRef string) error {
30+
ctx := cmd.Context()
31+
32+
if err := initStore(); err != nil {
33+
return fmt.Errorf("initializing store: %w", err)
34+
}
35+
36+
cmd.Printf("Pulling model: %s\n", modelRef)
37+
38+
if err := store.EnsureModel(ctx, modelRef, os.Stdout); err != nil {
39+
return fmt.Errorf("pulling model: %w", err)
40+
}
41+
42+
cmd.Printf("\nModel pulled successfully: %s\n", modelRef)
43+
return nil
44+
}

cmd/dmrlet/commands/root.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Package commands implements the dmrlet CLI commands.
2+
package commands
3+
4+
import (
5+
"context"
6+
"os"
7+
"os/signal"
8+
"syscall"
9+
10+
"github.com/docker/model-runner/pkg/dmrlet/inference"
11+
"github.com/docker/model-runner/pkg/dmrlet/models"
12+
"github.com/docker/model-runner/pkg/dmrlet/runtime"
13+
"github.com/sirupsen/logrus"
14+
"github.com/spf13/cobra"
15+
)
16+
17+
var (
18+
// Global flags
19+
verbose bool
20+
logJSON bool
21+
22+
// Shared state
23+
log *logrus.Entry
24+
store *models.Store
25+
rt *runtime.Runtime
26+
manager *inference.Manager
27+
)
28+
29+
// rootCmd is the root command for dmrlet.
30+
var rootCmd = &cobra.Command{
31+
Use: "dmrlet",
32+
Short: "Lightweight node agent for Docker Model Runner",
33+
Long: `dmrlet is a lightweight node agent for Docker Model Runner - a "Kubelet for AI"
34+
that runs inference containers directly with zero YAML overhead.
35+
36+
Example:
37+
dmrlet serve ai/smollm2
38+
# Pulls model, starts inference container, exposes OpenAI API at http://localhost:30000/v1`,
39+
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
40+
// Skip initialization for help and version commands
41+
if cmd.Name() == "help" || cmd.Name() == "version" {
42+
return nil
43+
}
44+
45+
// Setup logging
46+
logger := logrus.New()
47+
if verbose {
48+
logger.SetLevel(logrus.DebugLevel)
49+
} else {
50+
logger.SetLevel(logrus.InfoLevel)
51+
}
52+
if logJSON {
53+
logger.SetFormatter(&logrus.JSONFormatter{})
54+
}
55+
56+
// Check DMRLET_LOG_LEVEL environment variable
57+
if level := os.Getenv("DMRLET_LOG_LEVEL"); level != "" {
58+
if lvl, err := logrus.ParseLevel(level); err == nil {
59+
logger.SetLevel(lvl)
60+
}
61+
}
62+
63+
log = logger.WithField("component", "dmrlet")
64+
65+
return nil
66+
},
67+
SilenceUsage: true,
68+
SilenceErrors: true,
69+
}
70+
71+
// Execute runs the root command.
72+
func Execute() error {
73+
// Setup context with signal handling
74+
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
75+
defer cancel()
76+
77+
return rootCmd.ExecuteContext(ctx)
78+
}
79+
80+
func init() {
81+
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")
82+
rootCmd.PersistentFlags().BoolVar(&logJSON, "log-json", false, "Output logs in JSON format")
83+
84+
rootCmd.AddCommand(
85+
newServeCmd(),
86+
newStopCmd(),
87+
newListCmd(),
88+
newPullCmd(),
89+
newVersionCmd(),
90+
)
91+
}
92+
93+
// initStore initializes the model store.
94+
func initStore() error {
95+
if store != nil {
96+
return nil
97+
}
98+
99+
var err error
100+
store, err = models.NewStore(
101+
models.WithLogger(log),
102+
)
103+
if err != nil {
104+
return err
105+
}
106+
return nil
107+
}
108+
109+
// initRuntime initializes the containerd runtime.
110+
func initRuntime(ctx context.Context) error {
111+
if rt != nil {
112+
return nil
113+
}
114+
115+
var err error
116+
rt, err = runtime.NewRuntime(ctx,
117+
runtime.WithRuntimeLogger(log),
118+
)
119+
if err != nil {
120+
return err
121+
}
122+
return nil
123+
}
124+
125+
// initManager initializes the inference manager.
126+
func initManager(ctx context.Context) error {
127+
if err := initStore(); err != nil {
128+
return err
129+
}
130+
if err := initRuntime(ctx); err != nil {
131+
return err
132+
}
133+
134+
if manager == nil {
135+
manager = inference.NewManager(store, rt,
136+
inference.WithManagerLogger(log),
137+
)
138+
}
139+
return nil
140+
}

cmd/dmrlet/commands/serve.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"os/signal"
7+
"syscall"
8+
9+
"github.com/docker/model-runner/pkg/dmrlet/inference"
10+
"github.com/spf13/cobra"
11+
)
12+
13+
type serveFlags struct {
14+
port int
15+
backend string
16+
gpu bool
17+
detach bool
18+
}
19+
20+
func newServeCmd() *cobra.Command {
21+
flags := &serveFlags{}
22+
23+
cmd := &cobra.Command{
24+
Use: "serve MODEL",
25+
Short: "Serve a model (pull if needed, start container, wait for ready)",
26+
Long: `Serve a model by pulling it if needed, starting an inference container,
27+
and waiting for it to be ready. The model will be exposed on an OpenAI-compatible API.
28+
29+
Examples:
30+
dmrlet serve ai/smollm2
31+
dmrlet serve ai/smollm2 --port 8080
32+
dmrlet serve ai/smollm2 --gpu
33+
dmrlet serve ai/smollm2 --backend vllm --gpu
34+
dmrlet serve ai/smollm2 -d # detached mode`,
35+
Args: cobra.ExactArgs(1),
36+
RunE: func(cmd *cobra.Command, args []string) error {
37+
return runServe(cmd, args[0], flags)
38+
},
39+
}
40+
41+
cmd.Flags().IntVarP(&flags.port, "port", "p", 0, "Port to expose the API on (auto-allocated if not specified)")
42+
cmd.Flags().StringVarP(&flags.backend, "backend", "b", "", "Inference backend (llama-server, vllm)")
43+
cmd.Flags().BoolVar(&flags.gpu, "gpu", false, "Enable GPU support")
44+
cmd.Flags().BoolVarP(&flags.detach, "detach", "d", false, "Run in detached mode (return immediately)")
45+
46+
return cmd
47+
}
48+
49+
func runServe(cmd *cobra.Command, modelRef string, flags *serveFlags) error {
50+
ctx := cmd.Context()
51+
52+
if err := initManager(ctx); err != nil {
53+
return fmt.Errorf("initializing manager: %w", err)
54+
}
55+
56+
opts := inference.ServeOptions{
57+
Port: flags.port,
58+
Backend: flags.backend,
59+
GPU: flags.gpu,
60+
Detach: flags.detach,
61+
Progress: os.Stdout,
62+
}
63+
64+
running, err := manager.Serve(ctx, modelRef, opts)
65+
if err != nil {
66+
return fmt.Errorf("serving model: %w", err)
67+
}
68+
69+
cmd.Printf("\nModel %s is ready!\n", modelRef)
70+
cmd.Printf("Endpoint: %s\n", running.Endpoint)
71+
cmd.Printf("Backend: %s\n", running.Backend)
72+
cmd.Printf("Port: %d\n", running.Port)
73+
cmd.Println()
74+
cmd.Printf("Example usage:\n")
75+
cmd.Printf(" curl %s/chat/completions -H 'Content-Type: application/json' \\\n", running.Endpoint)
76+
cmd.Printf(" -d '{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"Hello!\"}]}'\n", modelRef)
77+
78+
if flags.detach {
79+
return nil
80+
}
81+
82+
// Wait for interrupt signal
83+
cmd.Println()
84+
cmd.Println("Press Ctrl+C to stop the model...")
85+
86+
sigCh := make(chan os.Signal, 1)
87+
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
88+
<-sigCh
89+
90+
cmd.Println()
91+
cmd.Println("Stopping model...")
92+
93+
if err := manager.Stop(ctx, modelRef); err != nil {
94+
return fmt.Errorf("stopping model: %w", err)
95+
}
96+
97+
cmd.Println("Model stopped.")
98+
return nil
99+
}

0 commit comments

Comments
 (0)