Skip to content

Commit 7fda470

Browse files
committed
Update checkpoint and restore to latest docker/master.
- C/R is now an EXPERIMENTAL level feature. - Requires CRIU 1.6 (and builds it from source in the Dockerfile) - Introduces checkpoint and restore as top level cli methods (will likely change) Signed-off-by: Ross Boucher <rboucher@gmail.com>
1 parent 4b2a7b4 commit 7fda470

30 files changed

+816
-358
lines changed

Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ RUN echo deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty main > /etc/apt/s
3737
# Packaged dependencies
3838
RUN apt-get update && apt-get install -y \
3939
apparmor \
40+
asciidoc \
4041
aufs-tools \
4142
automake \
4243
bash-completion \
44+
bsdmainutils \
4345
btrfs-tools \
4446
build-essential \
4547
clang-3.8 \
@@ -50,21 +52,29 @@ RUN apt-get update && apt-get install -y \
5052
git \
5153
iptables \
5254
jq \
55+
libaio-dev \
5356
libapparmor-dev \
5457
libcap-dev \
5558
libltdl-dev \
59+
libprotobuf-c0-dev \
60+
libprotobuf-dev \
5661
libsqlite3-dev \
5762
libsystemd-journal-dev \
5863
mercurial \
5964
parallel \
6065
pkg-config \
66+
protobuf-compiler \
67+
protobuf-c-compiler \
6168
python-dev \
69+
python-minimal \
6270
python-mock \
6371
python-pip \
72+
python-protobuf \
6473
python-websocket \
6574
s3cmd=1.1.0* \
6675
ubuntu-zfs \
6776
xfsprogs \
77+
xmlto \
6878
libzfs-dev \
6979
tar \
7080
--no-install-recommends \
@@ -82,6 +92,14 @@ RUN cd /usr/local/lvm2 \
8292
&& make install_device-mapper
8393
# see https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL
8494

95+
# Install Criu
96+
ENV CRIU_VERSION 1.6
97+
RUN mkdir -p /usr/src/criu \
98+
&& curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
99+
&& cd /usr/src/criu \
100+
&& make \
101+
&& make install
102+
85103
# Install Go
86104
ENV GO_VERSION 1.5.2
87105
RUN curl -sSL "https://storage.googleapis.com/golang/go${GO_VERSION}.linux-amd64.tar.gz" | tar -v -C /usr/local -xz

api/client/checkpoint.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// +build experimental
2+
3+
package client
4+
5+
import (
6+
"fmt"
7+
8+
Cli "github.com/docker/docker/cli"
9+
flag "github.com/docker/docker/pkg/mflag"
10+
"github.com/docker/docker/runconfig"
11+
)
12+
13+
// CmdCheckpoint checkpoints the process running in a container
14+
//
15+
// Usage: docker checkpoint CONTAINER
16+
func (cli *DockerCli) CmdCheckpoint(args ...string) error {
17+
cmd := Cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true)
18+
cmd.Require(flag.Min, 1)
19+
20+
var (
21+
flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files")
22+
flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file")
23+
flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint")
24+
)
25+
26+
if err := cmd.ParseFlags(args, true); err != nil {
27+
return err
28+
}
29+
30+
if cmd.NArg() < 1 {
31+
cmd.Usage()
32+
return nil
33+
}
34+
35+
criuOpts := &runconfig.CriuConfig{
36+
ImagesDirectory: *flImgDir,
37+
WorkDirectory: *flWorkDir,
38+
LeaveRunning: *flLeaveRunning,
39+
TCPEstablished: true,
40+
ExternalUnixConnections: true,
41+
FileLocks: true,
42+
}
43+
44+
var encounteredError error
45+
for _, name := range cmd.Args() {
46+
_, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil))
47+
if err != nil {
48+
fmt.Fprintf(cli.err, "%s\n", err)
49+
encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers")
50+
} else {
51+
fmt.Fprintf(cli.out, "%s\n", name)
52+
}
53+
}
54+
return encounteredError
55+
}

api/client/restore.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// +build experimental
2+
3+
package client
4+
5+
import (
6+
"fmt"
7+
8+
Cli "github.com/docker/docker/cli"
9+
flag "github.com/docker/docker/pkg/mflag"
10+
"github.com/docker/docker/runconfig"
11+
)
12+
13+
// CmdRestore restores the process in a checkpointed container
14+
//
15+
// Usage: docker restore CONTAINER
16+
func (cli *DockerCli) CmdRestore(args ...string) error {
17+
cmd := Cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true)
18+
cmd.Require(flag.Min, 1)
19+
20+
var (
21+
flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from")
22+
flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log")
23+
flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state")
24+
)
25+
26+
if err := cmd.ParseFlags(args, true); err != nil {
27+
return err
28+
}
29+
30+
if cmd.NArg() < 1 {
31+
cmd.Usage()
32+
return nil
33+
}
34+
35+
restoreOpts := &runconfig.RestoreConfig{
36+
CriuOpts: runconfig.CriuConfig{
37+
ImagesDirectory: *flImgDir,
38+
WorkDirectory: *flWorkDir,
39+
TCPEstablished: true,
40+
ExternalUnixConnections: true,
41+
FileLocks: true,
42+
},
43+
ForceRestore: *flForce,
44+
}
45+
46+
var encounteredError error
47+
for _, name := range cmd.Args() {
48+
_, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil))
49+
if err != nil {
50+
fmt.Fprintf(cli.err, "%s\n", err)
51+
encounteredError = fmt.Errorf("Error: failed to restore one or more containers")
52+
} else {
53+
fmt.Fprintf(cli.out, "%s\n", name)
54+
}
55+
}
56+
return encounteredError
57+
}

api/server/router/local/local.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,6 @@ func (r *router) initRoutes() {
105105
// DELETE
106106
NewDeleteRoute("/images/{name:.*}", r.deleteImages),
107107
}
108+
109+
addExperimentalRoutes(r)
108110
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// +build experimental
2+
3+
package local
4+
5+
import (
6+
"encoding/json"
7+
"fmt"
8+
"net/http"
9+
10+
"github.com/docker/docker/api/server/httputils"
11+
dkrouter "github.com/docker/docker/api/server/router"
12+
"github.com/docker/docker/runconfig"
13+
"golang.org/x/net/context"
14+
)
15+
16+
func addExperimentalRoutes(r *router) {
17+
newRoutes := []dkrouter.Route{
18+
NewPostRoute("/containers/{name:.*}/checkpoint", r.postContainersCheckpoint),
19+
NewPostRoute("/containers/{name:.*}/restore", r.postContainersRestore),
20+
}
21+
22+
r.routes = append(r.routes, newRoutes...)
23+
}
24+
25+
func (s *router) postContainersCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
26+
if vars == nil {
27+
return fmt.Errorf("Missing parameter")
28+
}
29+
if err := httputils.CheckForJSON(r); err != nil {
30+
return err
31+
}
32+
33+
criuOpts := &runconfig.CriuConfig{}
34+
if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil {
35+
return err
36+
}
37+
38+
if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil {
39+
return err
40+
}
41+
42+
w.WriteHeader(http.StatusNoContent)
43+
return nil
44+
}
45+
46+
func (s *router) postContainersRestore(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
47+
if vars == nil {
48+
return fmt.Errorf("Missing parameter")
49+
}
50+
if err := httputils.CheckForJSON(r); err != nil {
51+
return err
52+
}
53+
54+
restoreOpts := runconfig.RestoreConfig{}
55+
if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil {
56+
return err
57+
}
58+
59+
if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil {
60+
return err
61+
}
62+
63+
w.WriteHeader(http.StatusNoContent)
64+
return nil
65+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// +build !experimental
2+
3+
package local
4+
5+
func addExperimentalRoutes(r *router) {
6+
}

api/server/server.go

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -120,36 +120,6 @@ func (s *HTTPServer) Close() error {
120120
return s.l.Close()
121121
}
122122

123-
func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
124-
if vars == nil {
125-
return fmt.Errorf("Missing parameter")
126-
}
127-
if err := parseForm(r); err != nil {
128-
return err
129-
}
130-
job := eng.Job("checkpoint", vars["name"])
131-
if err := job.Run(); err != nil {
132-
return err
133-
}
134-
w.WriteHeader(http.StatusNoContent)
135-
return nil
136-
}
137-
138-
func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
139-
if vars == nil {
140-
return fmt.Errorf("Missing parameter")
141-
}
142-
if err := parseForm(r); err != nil {
143-
return err
144-
}
145-
job := eng.Job("restore", vars["name"])
146-
if err := job.Run(); err != nil {
147-
return err
148-
}
149-
w.WriteHeader(http.StatusNoContent)
150-
return nil
151-
}
152-
153123
func writeCorsHeaders(w http.ResponseWriter, r *http.Request, corsHeaders string) {
154124
logrus.Debugf("CORS header is enabled and set to: %s", corsHeaders)
155125
w.Header().Add("Access-Control-Allow-Origin", corsHeaders)

api/types/types.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -249,17 +249,19 @@ type ExecStartCheck struct {
249249
// ContainerState stores container's running state
250250
// it's part of ContainerJSONBase and will return by "inspect" command
251251
type ContainerState struct {
252-
Status string
253-
Running bool
254-
Paused bool
255-
Restarting bool
256-
OOMKilled bool
257-
Dead bool
258-
Pid int
259-
ExitCode int
260-
Error string
261-
StartedAt string
262-
FinishedAt string
252+
Status string
253+
Running bool
254+
Paused bool
255+
Checkpointed bool
256+
Restarting bool
257+
OOMKilled bool
258+
Dead bool
259+
Pid int
260+
ExitCode int
261+
Error string
262+
StartedAt string
263+
FinishedAt string
264+
CheckpointedAt string `json:"-"`
263265
}
264266

265267
// ContainerJSONBase contains response of Remote API:

container/container.go

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -254,41 +254,6 @@ func validateID(id string) error {
254254
return nil
255255
}
256256

257-
func (container *Container) Checkpoint() error {
258-
return container.daemon.Checkpoint(container)
259-
}
260-
261-
func (container *Container) Restore() error {
262-
var err error
263-
264-
container.Lock()
265-
defer container.Unlock()
266-
267-
defer func() {
268-
if err != nil {
269-
container.cleanup()
270-
}
271-
}()
272-
273-
if err = container.initializeNetworking(); err != nil {
274-
return err
275-
}
276-
277-
linkedEnv, err := container.setupLinkedContainers()
278-
if err != nil {
279-
return err
280-
}
281-
if err = container.setupWorkingDirectory(); err != nil {
282-
return err
283-
}
284-
env := container.createDaemonEnvironment(linkedEnv)
285-
if err = populateCommandRestore(container, env); err != nil {
286-
return err
287-
}
288-
289-
return container.waitForRestore()
290-
}
291-
292257
// Returns true if the container exposes a certain port
293258
func (container *Container) exposes(p nat.Port) bool {
294259
_, exists := container.Config.ExposedPorts[p]
@@ -337,29 +302,6 @@ func (container *Container) StartLogger(cfg runconfig.LogConfig) (logger.Logger,
337302
return c(ctx)
338303
}
339304

340-
// Like waitForStart() but for restoring a container.
341-
//
342-
// XXX Does RestartPolicy apply here?
343-
func (container *Container) waitForRestore() error {
344-
container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)
345-
346-
// After calling promise.Go() we'll have two goroutines:
347-
// - The current goroutine that will block in the select
348-
// below until restore is done.
349-
// - A new goroutine that will restore the container and
350-
// wait for it to exit.
351-
select {
352-
case <-container.monitor.restoreSignal:
353-
if container.ExitCode != 0 {
354-
return fmt.Errorf("restore process failed")
355-
}
356-
case err := <-promise.Go(container.monitor.Restore):
357-
return err
358-
}
359-
360-
return nil
361-
}
362-
363305
// GetProcessLabel returns the process label for the container.
364306
func (container *Container) GetProcessLabel() string {
365307
// even if we have a process label return "" if we are running

0 commit comments

Comments
 (0)