Skip to content

Commit 9e8337b

Browse files
committed
add dmrlet - lightweight node agent for Docker Model Runner
dmrlet is a "Kubelet for AI" that runs inference containers directly with zero YAML overhead. It provides a simple CLI to serve models: dmrlet serve ai/smollm2 # Pulls model, starts inference container, exposes OpenAI API Key features: - Reuses existing pkg/distribution for model management - containerd integration for container lifecycle - GPU detection and passthrough (NVIDIA/AMD) - Auto port allocation (30000-30999 range) - Health checking with configurable timeout - Backend auto-detection (llama-server for GGUF, vLLM for safetensors) Commands: serve, stop, list, pull, version Signed-off-by: Eric Curtin <eric.curtin@docker.com>
1 parent 3245480 commit 9e8337b

File tree

22 files changed

+4041
-275
lines changed

22 files changed

+4041
-275
lines changed

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,24 @@ DOCKER_BUILD_ARGS := \
2626
BUILD_DMR ?= 1
2727

2828
# Main targets
29-
.PHONY: build run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers
29+
.PHONY: build build-dmrlet run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers
3030
# Default target
3131
.DEFAULT_GOAL := build
3232

3333
# Build the Go application
3434
build:
3535
CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .
3636

37+
# Build dmrlet binary
38+
build-dmrlet:
39+
@echo "Building dmrlet..."
40+
@VERSION=$$(git describe --tags --always --dirty 2>/dev/null || echo "dev"); \
41+
GIT_COMMIT=$$(git rev-parse HEAD 2>/dev/null || echo "unknown"); \
42+
BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || echo "unknown"); \
43+
cd cmd/dmrlet && CGO_ENABLED=0 go build -ldflags="-s -w -X 'main.Version=$${VERSION}' -X 'main.GitCommit=$${GIT_COMMIT}' -X 'main.BuildDate=$${BUILD_DATE}'" -o dmrlet .
44+
mv cmd/dmrlet/dmrlet .
45+
@echo "Built: dmrlet"
46+
3747
# Run the application locally
3848
run: build
3949
@LLAMACPP_BIN="llamacpp/install/bin"; \
@@ -46,6 +56,7 @@ run: build
4656
# Clean build artifacts
4757
clean:
4858
rm -f $(APP_NAME)
59+
rm -f dmrlet
4960
rm -f model-runner.sock
5061
rm -rf $(MODELS_PATH)
5162

@@ -148,6 +159,7 @@ docker-run-impl:
148159
help:
149160
@echo "Available targets:"
150161
@echo " build - Build the Go application"
162+
@echo " build-dmrlet - Build dmrlet binary (lightweight node agent)"
151163
@echo " run - Run the application locally"
152164
@echo " clean - Clean build artifacts"
153165
@echo " test - Run tests"

cmd/dmrlet/commands/list.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/olekukonko/tablewriter"
8+
"github.com/spf13/cobra"
9+
)
10+
11+
func newListCmd() *cobra.Command {
12+
cmd := &cobra.Command{
13+
Use: "list",
14+
Aliases: []string{"ls"},
15+
Short: "List running models",
16+
Long: `List all running inference models managed by dmrlet.
17+
18+
Examples:
19+
dmrlet list
20+
dmrlet ls`,
21+
Args: cobra.NoArgs,
22+
RunE: func(cmd *cobra.Command, args []string) error {
23+
return runList(cmd)
24+
},
25+
}
26+
27+
return cmd
28+
}
29+
30+
func runList(cmd *cobra.Command) error {
31+
ctx := cmd.Context()
32+
33+
if err := initManager(ctx); err != nil {
34+
return fmt.Errorf("initializing manager: %w", err)
35+
}
36+
37+
running, err := manager.List(ctx)
38+
if err != nil {
39+
return fmt.Errorf("listing models: %w", err)
40+
}
41+
42+
if len(running) == 0 {
43+
cmd.Println("No running models")
44+
return nil
45+
}
46+
47+
table := tablewriter.NewWriter(os.Stdout)
48+
table.SetHeader([]string{"MODEL", "BACKEND", "PORT", "ENDPOINT"})
49+
50+
for _, m := range running {
51+
table.Append([]string{
52+
m.ModelRef,
53+
string(m.Backend),
54+
fmt.Sprintf("%d", m.Port),
55+
m.Endpoint,
56+
})
57+
}
58+
59+
table.Render()
60+
return nil
61+
}

cmd/dmrlet/commands/pull.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/spf13/cobra"
8+
)
9+
10+
func newPullCmd() *cobra.Command {
11+
cmd := &cobra.Command{
12+
Use: "pull MODEL",
13+
Short: "Pull a model without serving",
14+
Long: `Pull a model from Docker Hub or HuggingFace without starting an inference container.
15+
This is useful for pre-downloading models.
16+
17+
Examples:
18+
dmrlet pull ai/smollm2
19+
dmrlet pull huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf`,
20+
Args: cobra.ExactArgs(1),
21+
RunE: func(cmd *cobra.Command, args []string) error {
22+
return runPull(cmd, args[0])
23+
},
24+
}
25+
26+
return cmd
27+
}
28+
29+
func runPull(cmd *cobra.Command, modelRef string) error {
30+
ctx := cmd.Context()
31+
32+
if err := initStore(); err != nil {
33+
return fmt.Errorf("initializing store: %w", err)
34+
}
35+
36+
cmd.Printf("Pulling model: %s\n", modelRef)
37+
38+
if err := store.EnsureModel(ctx, modelRef, os.Stdout); err != nil {
39+
return fmt.Errorf("pulling model: %w", err)
40+
}
41+
42+
cmd.Printf("\nModel pulled successfully: %s\n", modelRef)
43+
return nil
44+
}

cmd/dmrlet/commands/root.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Package commands implements the dmrlet CLI commands.
2+
package commands
3+
4+
import (
5+
"context"
6+
"os"
7+
"os/signal"
8+
"syscall"
9+
10+
"github.com/docker/model-runner/pkg/dmrlet/inference"
11+
"github.com/docker/model-runner/pkg/dmrlet/models"
12+
"github.com/docker/model-runner/pkg/dmrlet/runtime"
13+
"github.com/sirupsen/logrus"
14+
"github.com/spf13/cobra"
15+
)
16+
17+
var (
18+
// Global flags
19+
verbose bool
20+
logJSON bool
21+
22+
// Shared state
23+
log *logrus.Entry
24+
store *models.Store
25+
rt *runtime.Runtime
26+
manager *inference.Manager
27+
)
28+
29+
// rootCmd is the root command for dmrlet.
30+
var rootCmd = &cobra.Command{
31+
Use: "dmrlet",
32+
Short: "Lightweight node agent for Docker Model Runner",
33+
Long: `dmrlet is a lightweight node agent for Docker Model Runner - a "Kubelet for AI"
34+
that runs inference containers directly with zero YAML overhead.
35+
36+
Example:
37+
dmrlet serve ai/smollm2
38+
# Pulls model, starts inference container, exposes OpenAI API at http://localhost:30000/v1`,
39+
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
40+
// Skip initialization for help and version commands
41+
if cmd.Name() == "help" || cmd.Name() == "version" {
42+
return nil
43+
}
44+
45+
// Setup logging
46+
logger := logrus.New()
47+
if verbose {
48+
logger.SetLevel(logrus.DebugLevel)
49+
} else {
50+
logger.SetLevel(logrus.InfoLevel)
51+
}
52+
if logJSON {
53+
logger.SetFormatter(&logrus.JSONFormatter{})
54+
}
55+
56+
// Check DMRLET_LOG_LEVEL environment variable
57+
if level := os.Getenv("DMRLET_LOG_LEVEL"); level != "" {
58+
if lvl, err := logrus.ParseLevel(level); err == nil {
59+
logger.SetLevel(lvl)
60+
}
61+
}
62+
63+
log = logger.WithField("component", "dmrlet")
64+
65+
return nil
66+
},
67+
SilenceUsage: true,
68+
SilenceErrors: true,
69+
}
70+
71+
// Execute runs the root command.
72+
func Execute() error {
73+
// Setup context with signal handling
74+
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
75+
defer cancel()
76+
77+
return rootCmd.ExecuteContext(ctx)
78+
}
79+
80+
func init() {
81+
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")
82+
rootCmd.PersistentFlags().BoolVar(&logJSON, "log-json", false, "Output logs in JSON format")
83+
84+
rootCmd.AddCommand(
85+
newServeCmd(),
86+
newStopCmd(),
87+
newListCmd(),
88+
newPullCmd(),
89+
newVersionCmd(),
90+
)
91+
}
92+
93+
// initStore initializes the model store.
94+
func initStore() error {
95+
if store != nil {
96+
return nil
97+
}
98+
99+
var err error
100+
store, err = models.NewStore(
101+
models.WithLogger(log),
102+
)
103+
if err != nil {
104+
return err
105+
}
106+
return nil
107+
}
108+
109+
// initRuntime initializes the containerd runtime.
110+
func initRuntime(ctx context.Context) error {
111+
if rt != nil {
112+
return nil
113+
}
114+
115+
var err error
116+
rt, err = runtime.NewRuntime(ctx,
117+
runtime.WithRuntimeLogger(log),
118+
)
119+
if err != nil {
120+
return err
121+
}
122+
return nil
123+
}
124+
125+
// initManager initializes the inference manager.
126+
func initManager(ctx context.Context) error {
127+
if err := initStore(); err != nil {
128+
return err
129+
}
130+
if err := initRuntime(ctx); err != nil {
131+
return err
132+
}
133+
134+
if manager == nil {
135+
manager = inference.NewManager(store, rt,
136+
inference.WithManagerLogger(log),
137+
)
138+
}
139+
return nil
140+
}

cmd/dmrlet/commands/serve.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package commands
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"os/signal"
8+
"syscall"
9+
10+
"github.com/docker/model-runner/pkg/dmrlet/inference"
11+
"github.com/spf13/cobra"
12+
)
13+
14+
type serveFlags struct {
15+
port int
16+
backend string
17+
gpu bool
18+
detach bool
19+
}
20+
21+
func newServeCmd() *cobra.Command {
22+
flags := &serveFlags{}
23+
24+
cmd := &cobra.Command{
25+
Use: "serve MODEL",
26+
Short: "Serve a model (pull if needed, start container, wait for ready)",
27+
Long: `Serve a model by pulling it if needed, starting an inference container,
28+
and waiting for it to be ready. The model will be exposed on an OpenAI-compatible API.
29+
30+
Examples:
31+
dmrlet serve ai/smollm2
32+
dmrlet serve ai/smollm2 --port 8080
33+
dmrlet serve ai/smollm2 --gpu
34+
dmrlet serve ai/smollm2 --backend vllm --gpu
35+
dmrlet serve ai/smollm2 -d # detached mode`,
36+
Args: cobra.ExactArgs(1),
37+
RunE: func(cmd *cobra.Command, args []string) error {
38+
return runServe(cmd, args[0], flags)
39+
},
40+
}
41+
42+
cmd.Flags().IntVarP(&flags.port, "port", "p", 0, "Port to expose the API on (auto-allocated if not specified)")
43+
cmd.Flags().StringVarP(&flags.backend, "backend", "b", "", "Inference backend (llama-server, vllm)")
44+
cmd.Flags().BoolVar(&flags.gpu, "gpu", false, "Enable GPU support")
45+
cmd.Flags().BoolVarP(&flags.detach, "detach", "d", false, "Run in detached mode (return immediately)")
46+
47+
return cmd
48+
}
49+
50+
func runServe(cmd *cobra.Command, modelRef string, flags *serveFlags) error {
51+
ctx := cmd.Context()
52+
53+
if err := initManager(ctx); err != nil {
54+
return fmt.Errorf("initializing manager: %w", err)
55+
}
56+
57+
opts := inference.ServeOptions{
58+
Port: flags.port,
59+
Backend: flags.backend,
60+
GPU: flags.gpu,
61+
Detach: flags.detach,
62+
Progress: os.Stdout,
63+
}
64+
65+
running, err := manager.Serve(ctx, modelRef, opts)
66+
if err != nil {
67+
return fmt.Errorf("serving model: %w", err)
68+
}
69+
70+
cmd.Printf("\nModel %s is ready!\n", modelRef)
71+
cmd.Printf("Endpoint: %s\n", running.Endpoint)
72+
cmd.Printf("Backend: %s\n", running.Backend)
73+
cmd.Printf("Port: %d\n", running.Port)
74+
cmd.Println()
75+
cmd.Printf("Example usage:\n")
76+
cmd.Printf(" curl %s/chat/completions -H 'Content-Type: application/json' \\\n", running.Endpoint)
77+
cmd.Printf(" -d '{\"model\":\"%s\",\"messages\":[{\"role\":\"user\",\"content\":\"Hello!\"}]}'\n", modelRef)
78+
79+
if flags.detach {
80+
return nil
81+
}
82+
83+
// Wait for interrupt signal
84+
cmd.Println()
85+
cmd.Println("Press Ctrl+C to stop the model...")
86+
87+
sigCh := make(chan os.Signal, 1)
88+
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
89+
<-sigCh
90+
91+
cmd.Println()
92+
cmd.Println("Stopping model...")
93+
94+
if err := manager.Stop(context.Background(), modelRef); err != nil {
95+
return fmt.Errorf("stopping model: %w", err)
96+
}
97+
98+
cmd.Println("Model stopped.")
99+
return nil
100+
}

0 commit comments

Comments
 (0)