Skip to content

Commit 243c8d0

Browse files
Saied Kazemiboucher
authored andcommitted
Checkpoint/Restore Support: add exec driver methods
Methods for checkpointing and restoring containers were added to the native driver. The LXC driver returns an error message that these methods are not implemented yet. Signed-off-by: Saied Kazemi <saied@google.com> Conflicts: daemon/execdriver/native/create.go daemon/execdriver/native/driver.go daemon/execdriver/native/init.go
1 parent 8b568d7 commit 243c8d0

File tree

4 files changed

+180
-0
lines changed

4 files changed

+180
-0
lines changed

daemon/execdriver/driver.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ var (
2424
)
2525

2626
type StartCallback func(*ProcessConfig, int)
27+
type RestoreCallback func(*ProcessConfig, int)
2728

2829
// Driver specific information based on
2930
// processes registered with the driver
@@ -59,6 +60,8 @@ type Driver interface {
5960
Kill(c *Command, sig int) error
6061
Pause(c *Command) error
6162
Unpause(c *Command) error
63+
Checkpoint(c *Command) error
64+
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)
6265
Name() string // Driver name
6366
Info(id string) Info // "temporary" hack (until we move state from core to plugins)
6467
GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.

daemon/execdriver/lxc/driver.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,14 @@ func (d *driver) Unpause(c *execdriver.Command) error {
547547
return err
548548
}
549549

550+
func (d *driver) Checkpoint(c *execdriver.Command) error {
551+
return fmt.Errorf("Checkpointing lxc containers not supported yet\n")
552+
}
553+
554+
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
555+
return 0, fmt.Errorf("Restoring lxc containers not supported yet\n")
556+
}
557+
550558
func (d *driver) Terminate(c *execdriver.Command) error {
551559
return KillLxc(c.ID, 9)
552560
}

daemon/execdriver/native/create.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package native
44

55
import (
66
"errors"
7+
"encoding/json"
78
"fmt"
89
"net"
910
"strings"
@@ -88,6 +89,24 @@ func generateIfaceName() (string, error) {
8889
return "", errors.New("Failed to find name for new interface")
8990
}
9091

92+
// Re-create the container type from the image that was saved during checkpoint.
93+
func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) {
94+
// Read the container.json.
95+
f1, err := os.Open(filepath.Join(imageDir, "container.json"))
96+
if err != nil {
97+
return nil, err
98+
}
99+
defer f1.Close()
100+
101+
var container *libcontainer.Config
102+
err = json.NewDecoder(f1).Decode(&container)
103+
if err != nil {
104+
return nil, err
105+
}
106+
107+
return container, nil
108+
}
109+
91110
func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
92111
if c.Network.ContainerID != "" {
93112
d.Lock()

daemon/execdriver/native/driver.go

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/docker/docker/pkg/reexec"
2020
sysinfo "github.com/docker/docker/pkg/system"
2121
"github.com/docker/docker/pkg/term"
22+
"github.com/docker/docker/utils"
2223
"github.com/docker/libcontainer"
2324
"github.com/docker/libcontainer/apparmor"
2425
"github.com/docker/libcontainer/cgroups/systemd"
@@ -274,6 +275,155 @@ func (d *driver) Unpause(c *execdriver.Command) error {
274275
return active.Resume()
275276
}
276277

278+
// XXX Where is the right place for the following
279+
// const and getCheckpointImageDir() function?
280+
const (
281+
containersDir = "/var/lib/docker/containers"
282+
criuImgDir = "criu_img"
283+
)
284+
285+
func getCheckpointImageDir(containerId string) string {
286+
return filepath.Join(containersDir, containerId, criuImgDir)
287+
}
288+
289+
func (d *driver) Checkpoint(c *execdriver.Command) error {
290+
active := d.activeContainers[c.ID]
291+
if active == nil {
292+
return fmt.Errorf("active container for %s does not exist", c.ID)
293+
}
294+
container := active.container
295+
296+
// Create an image directory for this container (which
297+
// may already exist from a previous checkpoint).
298+
imageDir := getCheckpointImageDir(c.ID)
299+
err := os.MkdirAll(imageDir, 0700)
300+
if err != nil && !os.IsExist(err) {
301+
return err
302+
}
303+
304+
// Copy container.json and state.json files to the CRIU
305+
// image directory for later use during restore. Do this
306+
// before checkpointing because after checkpoint the container
307+
// will exit and these files will be removed.
308+
log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
309+
srcFiles := []string{"container.json", "state.json"}
310+
for _, f := range srcFiles {
311+
srcFile := filepath.Join(d.root, c.ID, f)
312+
dstFile := filepath.Join(imageDir, f)
313+
if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
314+
return err
315+
}
316+
}
317+
318+
d.Lock()
319+
defer d.Unlock()
320+
err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
321+
if err != nil {
322+
return err
323+
}
324+
325+
return nil
326+
}
327+
328+
type restoreOutput struct {
329+
exitCode int
330+
err error
331+
}
332+
333+
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
334+
imageDir := getCheckpointImageDir(c.ID)
335+
container, err := d.createRestoreContainer(c, imageDir)
336+
if err != nil {
337+
return 1, err
338+
}
339+
340+
var term execdriver.Terminal
341+
342+
if c.ProcessConfig.Tty {
343+
term, err = NewTtyConsole(&c.ProcessConfig, pipes)
344+
} else {
345+
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
346+
}
347+
if err != nil {
348+
return -1, err
349+
}
350+
c.ProcessConfig.Terminal = term
351+
352+
d.Lock()
353+
d.activeContainers[c.ID] = &activeContainer{
354+
container: container,
355+
cmd: &c.ProcessConfig.Cmd,
356+
}
357+
d.Unlock()
358+
defer d.cleanContainer(c.ID)
359+
360+
// Since the CRIU binary exits after restoring the container, we
361+
// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
362+
// so that it'll be owned by this process (Docker daemon) after restore.
363+
//
364+
// XXX This really belongs to where the Docker daemon starts.
365+
if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
366+
return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
367+
}
368+
369+
restoreOutputChan := make(chan restoreOutput, 1)
370+
waitForRestore := make(chan struct{})
371+
372+
go func() {
373+
exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
374+
func(child *os.File, args []string) *exec.Cmd {
375+
cmd := new(exec.Cmd)
376+
cmd.Path = d.initPath
377+
cmd.Args = append([]string{
378+
DriverName,
379+
"-restore",
380+
"-pipe", "3",
381+
"--",
382+
}, args...)
383+
cmd.ExtraFiles = []*os.File{child}
384+
return cmd
385+
},
386+
func(restorePid int) error {
387+
log.CRDbg("restorePid=%d", restorePid)
388+
if restorePid == 0 {
389+
restoreCallback(&c.ProcessConfig, 0)
390+
return nil
391+
}
392+
393+
// The container.json file should be written *after* the container
394+
// has started because its StdFds cannot be initialized before.
395+
//
396+
// XXX How do we handle error here?
397+
d.writeContainerFile(container, c.ID)
398+
close(waitForRestore)
399+
if restoreCallback != nil {
400+
c.ProcessConfig.Process, err = os.FindProcess(restorePid)
401+
if err != nil {
402+
log.Debugf("cannot find restored process %d", restorePid)
403+
return err
404+
}
405+
c.ContainerPid = c.ProcessConfig.Process.Pid
406+
restoreCallback(&c.ProcessConfig, c.ContainerPid)
407+
}
408+
return nil
409+
})
410+
restoreOutputChan <- restoreOutput{exitCode, err}
411+
}()
412+
413+
select {
414+
case restoreOutput := <-restoreOutputChan:
415+
// there was an error
416+
return restoreOutput.exitCode, restoreOutput.err
417+
case <-waitForRestore:
418+
// container restored
419+
break
420+
}
421+
422+
// Wait for the container to exit.
423+
restoreOutput := <-restoreOutputChan
424+
return restoreOutput.exitCode, restoreOutput.err
425+
}
426+
277427
func (d *driver) Terminate(c *execdriver.Command) error {
278428
defer d.cleanContainer(c.ID)
279429
container, err := d.factory.Load(c.ID)

0 commit comments

Comments
 (0)