Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 55 additions & 17 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (
"syscall"
"time"

"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/opencontainers/runtime-spec/specs-go"

"github.com/opencontainers/cgroups"
"github.com/opencontainers/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
Expand Down Expand Up @@ -287,24 +288,46 @@ func (p *setnsProcess) addIntoCgroupV1() error {
return nil
}

// initProcessCgroupPath returns container init's cgroup path,
// as read from /proc/PID/cgroup. Only works for cgroup v2.
// Returns empty string if the path can not be obtained.
//
// This is used by runc exec in these cases:
//
// 1. On cgroup v2 + nesting + domain controllers, adding to initial cgroup
// may fail with EBUSY (https://github.com/opencontainers/runc/issues/2356);
//
// 2. A container init process with no cgroupns and /sys/fs/cgroup rw access
// may move itself to any other cgroup, and the original cgroup will disappear.
func (p *setnsProcess) initProcessCgroupPath() string {
if p.initProcessPid == 0 || !cgroups.IsCgroup2UnifiedMode() {
return ""
}

cg, err := cgroups.ParseCgroupFile("/proc/" + strconv.Itoa(p.initProcessPid) + "/cgroup")
if err != nil {
return ""
}
cgroup, ok := cg[""]
if !ok {
return ""
}

return fs2.UnifiedMountpoint + cgroup
}

func (p *setnsProcess) addIntoCgroupV2() error {
sub := p.process.SubCgroupPaths[""]
err := p.manager.AddPid(sub, p.pid())
if err != nil && !p.rootlessCgroups {
// On cgroup v2 + nesting + domain controllers, adding to initial cgroup may fail with EBUSY.
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
// Try to join the cgroup of InitProcessPid, unless sub-cgroup is explicitly set.
if p.initProcessPid != 0 && sub == "" {
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
if initCgErr == nil {
if initCgPath, ok := initCg[""]; ok {
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
logrus.Debugf("adding pid %d to cgroup failed (%v), attempting to join %s",
p.pid(), err, initCgDirpath)
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
}
// Failed to join the configured cgroup, fall back to container init's cgroup
// unless sub-cgroup is explicitly requested.
if sub == "" {
if path := p.initProcessCgroupPath(); path != "" {
logrus.Debugf("adding pid %d to configured cgroup failed (%v), will join container init cgroup %q",
p.pid(), err, path)
// NOTE: path is not guaranteed to exist because we didn't pause the container.
err = cgroups.WriteCgroupProc(path, p.pid())
}
}
if err != nil {
Expand All @@ -331,6 +354,8 @@ func (p *setnsProcess) addIntoCgroup() error {
// to join cgroup early, in p.cmd.Start. Returns an *os.File which
// must be closed by the caller after p.Cmd.Start return.
func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
const openFlags = unix.O_PATH | unix.O_DIRECTORY | unix.O_CLOEXEC

if !cgroups.IsCgroup2UnifiedMode() {
return nil, nil
}
Expand All @@ -348,12 +373,25 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
return nil, fmt.Errorf("bad sub cgroup path: %s", sub)
}

fd, err := cgroups.OpenFile(base, sub, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC)
fd, err := cgroups.OpenFile(base, sub, openFlags)
if err != nil {
if p.rootlessCgroups {
return nil, nil
}
return nil, fmt.Errorf("can't open cgroup: %w", err)
// Failed to open the configured cgroup, fall back to container init's cgroup,
// unless sub-cgroup is explicitly requested.
if sub == "" {
if initCg := p.initProcessCgroupPath(); initCg != "" {
logrus.Debugf("failed to open configured cgroup %q: %v, try using container init's cgroup %q",
cgroup, err, initCg)
// We do not freeze the container so cgroup is not guaranteed to exist.
fd, err = cgroups.OpenFile(initCg, "", openFlags)
cgroup = initCg
}
}
if err != nil {
return nil, fmt.Errorf("can't open cgroup: %w", err)
}
}

logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
Expand Down
35 changes: 35 additions & 0 deletions tests/integration/exec.bats
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,41 @@ function check_exec_debug() {
[ "$status" -eq 0 ]
}

# https://github.com/opencontainers/runc/issues/5089
@test "runc exec [init changes cgroup]" {
requires root cgroups_v2

NEW_CGROUP_REL=/runc-tst-$$
NEW_CGROUP=/sys/fs/cgroup$NEW_CGROUP_REL
mkdir $NEW_CGROUP

# The container is placed into a $CGROUP_V2_PATH cgroup.
set_cgroups_path
# And upon the start it moves itself into $NEW_CGROUP.
set_cgroup_mount_writable
update_config ' .linux.namespaces -= [{"type": "cgroup"}]
| .process.args = ["sh", "-c", "echo 1 > '$NEW_CGROUP'/cgroup.procs && exec sleep 1h"]'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ $status -eq 0 ]
testcontainer test_busybox running
sleep 1
# Remove the original container cgroup. If systemd cgroup manager is used by runc,
# the cgroup might have already be deleted by systemd, so we ignore rmdir errors.
rmdir "$CGROUP_V2_PATH" || true
test -d "$CGROUP_V2_PATH" && false

# Test that runc exec is able to fallback to container's init cgroup
# even if the original cgroup is gone.
runc exec test_busybox cat /proc/self/cgroup
[ $status -eq 0 ]
[ "$output" = "0::$NEW_CGROUP_REL" ]

# Cleanup.
runc delete -f test_busybox
rmdir "$NEW_CGROUP"
}

@test "runc exec [execve error]" {
cat <<EOF >rootfs/run.sh
#!/mmnnttbb foo bar
Expand Down