Skip to content

Commit

Permalink
runc create/run: warn on rootless + shared pidns + no cgroup
Browse files Browse the repository at this point in the history
Shared pid namespace means `runc kill` (or `runc delete -f`) have to
kill all container processes, not just init. To do so, it needs a cgroup
to read the PIDs from.

If there is no cgroup, processes will be leaked, and so such
configuration is bad and should not be allowed. To keep backward
compatibility, though, let's merely warn about this for now.

Alas, the only way to know if cgroup access is available is by returning
an error from Manager.Apply. Amend fs cgroup managers to do so (systemd
doesn't need it, since v1 can't work with rootless, and cgroup v2 does
not have a special rootless case).

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Sep 18, 2024
1 parent cfb6743 commit 3ff4bd3
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 4 deletions.
5 changes: 5 additions & 0 deletions libcontainer/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ var (
// is not configured to set device rules.
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")

// ErrRootless is returned by [Manager.Apply] when there is an error
// creating cgroup directory, and cgroup.Rootless is set. In general,
// this error is to be ignored.
ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")

// DevicesSetV1 and DevicesSetV2 are functions to set devices for
// cgroup v1 and v2, respectively. Unless
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
Expand Down
5 changes: 3 additions & 2 deletions libcontainer/cgroups/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func isIgnorableError(rootless bool, err error) bool {
return false
}

func (m *Manager) Apply(pid int) (err error) {
func (m *Manager) Apply(pid int) (retErr error) {
m.mu.Lock()
defer m.mu.Unlock()

Expand All @@ -129,14 +129,15 @@ func (m *Manager) Apply(pid int) (err error) {
// later by Set, which fails with a friendly error (see
// if path == "" in Set).
if isIgnorableError(c.Rootless, err) && c.Path == "" {
retErr = cgroups.ErrRootless
delete(m.paths, name)
continue
}
return err
}

}
return nil
return retErr
}

func (m *Manager) Destroy() error {
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (m *Manager) Apply(pid int) error {
if m.config.Rootless {
if m.config.Path == "" {
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
return nil
return cgroups.ErrRootless
}
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
}
Expand Down
10 changes: 9 additions & 1 deletion libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,15 @@ func (p *initProcess) start() (retErr error) {
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
// ErrRootless is to be ignored except when the container doesn't have private pidns.
if errors.Is(err, cgroups.ErrRootless) && !p.config.Config.Namespaces.IsPrivate(configs.NEWPID) {
// TODO: make this an error in runc 1.3.
logrus.Warn("Creating a rootless container with no cgroup and no private pid namespace. " +
"Such configuration is strongly discouraged (as it is impossible to properly kill all container's processes) " +
"and will result in an error in a future runc version.")
} else {
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
}
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Apply(p.pid()); err != nil {
Expand Down
28 changes: 28 additions & 0 deletions tests/integration/create.bats
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,31 @@ function teardown() {

testcontainer test_busybox running
}

# https://github.com/opencontainers/runc/issues/4394#issuecomment-2334926257
@test "runc create [shared pidns + rootless]" {
# Remove pidns so it's shared with the host.
update_config ' .linux.namespaces -= [{"type": "pid"}]'
if [ $EUID -ne 0 ]; then
if rootless_cgroup; then
# Rootless containers have empty cgroup path by default.
set_cgroups_path
fi
# Can't mount real /proc when rootless + no pidns,
# so change it to a bind-mounted one from the host.
update_config ' .mounts |= map((select(.type == "proc")
| .type = "none"
| .source = "/proc"
| .options = ["rbind", "nosuid", "nodev", "noexec"]
) // .)'
fi

exp="Such configuration is strongly discouraged"
runc create --console-socket "$CONSOLE_SOCKET" test
[ "$status" -eq 0 ]
if [ $EUID -ne 0 ] && ! rootless_cgroup; then
[[ "$output" = *"$exp"* ]]
else
[[ "$output" != *"$exp"* ]]
fi
}

0 comments on commit 3ff4bd3

Please sign in to comment.