@@ -19,6 +19,7 @@ import (
1919 "github.com/docker/docker/pkg/reexec"
2020 sysinfo "github.com/docker/docker/pkg/system"
2121 "github.com/docker/docker/pkg/term"
22+ "github.com/docker/docker/utils"
2223 "github.com/docker/libcontainer"
2324 "github.com/docker/libcontainer/apparmor"
2425 "github.com/docker/libcontainer/cgroups/systemd"
@@ -274,6 +275,155 @@ func (d *driver) Unpause(c *execdriver.Command) error {
274275 return active .Resume ()
275276}
276277
278+ // XXX Where is the right place for the following
279+ // const and getCheckpointImageDir() function?
280+ const (
281+ containersDir = "/var/lib/docker/containers"
282+ criuImgDir = "criu_img"
283+ )
284+
285+ func getCheckpointImageDir (containerId string ) string {
286+ return filepath .Join (containersDir , containerId , criuImgDir )
287+ }
288+
289+ func (d * driver ) Checkpoint (c * execdriver.Command ) error {
290+ active := d .activeContainers [c .ID ]
291+ if active == nil {
292+ return fmt .Errorf ("active container for %s does not exist" , c .ID )
293+ }
294+ container := active .container
295+
296+ // Create an image directory for this container (which
297+ // may already exist from a previous checkpoint).
298+ imageDir := getCheckpointImageDir (c .ID )
299+ err := os .MkdirAll (imageDir , 0700 )
300+ if err != nil && ! os .IsExist (err ) {
301+ return err
302+ }
303+
304+ // Copy container.json and state.json files to the CRIU
305+ // image directory for later use during restore. Do this
306+ // before checkpointing because after checkpoint the container
307+ // will exit and these files will be removed.
308+ log .CRDbg ("saving container.json and state.json before calling CRIU in %s" , imageDir )
309+ srcFiles := []string {"container.json" , "state.json" }
310+ for _ , f := range srcFiles {
311+ srcFile := filepath .Join (d .root , c .ID , f )
312+ dstFile := filepath .Join (imageDir , f )
313+ if _ , err := utils .CopyFile (srcFile , dstFile ); err != nil {
314+ return err
315+ }
316+ }
317+
318+ d .Lock ()
319+ defer d .Unlock ()
320+ err = namespaces .Checkpoint (container , imageDir , c .ProcessConfig .Process .Pid )
321+ if err != nil {
322+ return err
323+ }
324+
325+ return nil
326+ }
327+
328+ type restoreOutput struct {
329+ exitCode int
330+ err error
331+ }
332+
333+ func (d * driver ) Restore (c * execdriver.Command , pipes * execdriver.Pipes , restoreCallback execdriver.RestoreCallback ) (int , error ) {
334+ imageDir := getCheckpointImageDir (c .ID )
335+ container , err := d .createRestoreContainer (c , imageDir )
336+ if err != nil {
337+ return 1 , err
338+ }
339+
340+ var term execdriver.Terminal
341+
342+ if c .ProcessConfig .Tty {
343+ term , err = NewTtyConsole (& c .ProcessConfig , pipes )
344+ } else {
345+ term , err = execdriver .NewStdConsole (& c .ProcessConfig , pipes )
346+ }
347+ if err != nil {
348+ return - 1 , err
349+ }
350+ c .ProcessConfig .Terminal = term
351+
352+ d .Lock ()
353+ d .activeContainers [c .ID ] = & activeContainer {
354+ container : container ,
355+ cmd : & c .ProcessConfig .Cmd ,
356+ }
357+ d .Unlock ()
358+ defer d .cleanContainer (c .ID )
359+
360+ // Since the CRIU binary exits after restoring the container, we
361+ // need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
362+ // so that it'll be owned by this process (Docker daemon) after restore.
363+ //
364+ // XXX This really belongs to where the Docker daemon starts.
365+ if _ , _ , syserr := syscall .RawSyscall (syscall .SYS_PRCTL , 36 , 1 , 0 ); syserr != 0 {
366+ return - 1 , fmt .Errorf ("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)" , syserr )
367+ }
368+
369+ restoreOutputChan := make (chan restoreOutput , 1 )
370+ waitForRestore := make (chan struct {})
371+
372+ go func () {
373+ exitCode , err := namespaces .Restore (container , c .ProcessConfig .Stdin , c .ProcessConfig .Stdout , c .ProcessConfig .Stderr , c .ProcessConfig .Console , filepath .Join (d .root , c .ID ), imageDir ,
374+ func (child * os.File , args []string ) * exec.Cmd {
375+ cmd := new (exec.Cmd )
376+ cmd .Path = d .initPath
377+ cmd .Args = append ([]string {
378+ DriverName ,
379+ "-restore" ,
380+ "-pipe" , "3" ,
381+ "--" ,
382+ }, args ... )
383+ cmd .ExtraFiles = []* os.File {child }
384+ return cmd
385+ },
386+ func (restorePid int ) error {
387+ log .CRDbg ("restorePid=%d" , restorePid )
388+ if restorePid == 0 {
389+ restoreCallback (& c .ProcessConfig , 0 )
390+ return nil
391+ }
392+
393+ // The container.json file should be written *after* the container
394+ // has started because its StdFds cannot be initialized before.
395+ //
396+ // XXX How do we handle error here?
397+ d .writeContainerFile (container , c .ID )
398+ close (waitForRestore )
399+ if restoreCallback != nil {
400+ c .ProcessConfig .Process , err = os .FindProcess (restorePid )
401+ if err != nil {
402+ log .Debugf ("cannot find restored process %d" , restorePid )
403+ return err
404+ }
405+ c .ContainerPid = c .ProcessConfig .Process .Pid
406+ restoreCallback (& c .ProcessConfig , c .ContainerPid )
407+ }
408+ return nil
409+ })
410+ restoreOutputChan <- restoreOutput {exitCode , err }
411+ }()
412+
413+ select {
414+ case restoreOutput := <- restoreOutputChan :
415+ // there was an error
416+ return restoreOutput .exitCode , restoreOutput .err
417+ case <- waitForRestore :
418+ // container restored
419+ break
420+ }
421+
422+ // Wait for the container to exit.
423+ restoreOutput := <- restoreOutputChan
424+ return restoreOutput .exitCode , restoreOutput .err
425+ }
426+
277427func (d * driver ) Terminate (c * execdriver.Command ) error {
278428 defer d .cleanContainer (c .ID )
279429 container , err := d .factory .Load (c .ID )
0 commit comments