From d07e46090186d04365229e85a31cb959260e7f76 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Mon, 10 Jul 2023 13:59:33 +0200 Subject: [PATCH] snap-{seccomp,confine}: rework seccomp denylist When a denylist was introduced in PR#12849 we reached the limits of the API of libseccomp and some things are now hard to reason about [1]. This is mostly because what we try to do does not match the libseccomp API very well and a slightly different approach is probably more aligned with it's design (see also this libseccomp issue [2] that is related to our issue). So this commit changes the approach and instead of trying to use a single filter the work is split into two filters: 1. explicit allow list 2. explicit deny list and then load both filters into the kernel. The way the kernel works is that it selects the most restrictive action. So in the case of PR#12849: ``` ~ioctl - TIOCSTI ~ioctl - TIOCLINUX ioctl ``` For `ioctl(TIOCLINUX)` the first allow filter would pass `ioctl` but the second deny filter would correctly deny the TIOCLINUX. [1] https://github.com/snapcore/snapd/pull/12849#discussion_r1206855700 [2] https://github.com/seccomp/libseccomp/issues/44 --- cmd/snap-confine/seccomp-support.c | 42 ++++++++++--- cmd/snap-confine/seccomp-support.h | 4 ++ cmd/snap-seccomp/main.go | 94 +++++++++++++++++++++--------- cmd/snap-seccomp/main_test.go | 67 ++++++++++++++++----- interfaces/seccomp/template.go | 9 +-- tests/main/snap-seccomp/task.yaml | 44 +++++++------- 6 files changed, 179 insertions(+), 81 deletions(-) diff --git a/cmd/snap-confine/seccomp-support.c b/cmd/snap-confine/seccomp-support.c index 7432d79cb15..9c12c142f33 100644 --- a/cmd/snap-confine/seccomp-support.c +++ b/cmd/snap-confine/seccomp-support.c @@ -109,13 +109,38 @@ static void validate_bpfpath_is_safe(const char *path) } } -bool sc_apply_seccomp_profile_for_security_tag(const char *security_tag) -{ +bool sc_apply_seccomp_profile_for_security_tag(const char *security_tag) { debug("loading bpf program for security tag %s", security_tag); char profile_path[PATH_MAX] = { 0 }; - sc_must_snprintf(profile_path, sizeof(profile_path), "%s/%s.bin", + + struct sock_fprog prog_allow = { 0 }; + sc_must_snprintf(profile_path, sizeof(profile_path), "%s/%s.bin.allow", + filter_profile_dir, security_tag); + if (!sc_load_seccomp_profile_path(profile_path, &prog_allow)) { + return false; + } + + struct sock_fprog prog_deny = { 0 }; + sc_must_snprintf(profile_path, sizeof(profile_path), "%s/%s.bin.deny", filter_profile_dir, security_tag); + if (!sc_load_seccomp_profile_path(profile_path, &prog_deny)) { + return false; + } + + sc_apply_seccomp_filter(&prog_deny); + sc_apply_seccomp_filter(&prog_allow); + + // XXX: this is not ideal, allocation happens in + // sc_load_seccomp_profile but free here :( + free(prog_allow.filter); + free(prog_deny.filter); + return true; +} + +bool sc_load_seccomp_profile_path(const char *profile_path, struct sock_fprog *prog) +{ + debug("loading bpf program from file %s", profile_path); // Wait some time for the security profile to show up. When // the system boots snapd will created security profiles, but @@ -165,11 +190,12 @@ bool sc_apply_seccomp_profile_for_security_tag(const char *security_tag) if (sc_streq(bpf, "@unrestricted\n")) { return false; } - struct sock_fprog prog = { - .len = num_read / sizeof(struct sock_filter), - .filter = (struct sock_filter *)bpf, - }; - sc_apply_seccomp_filter(&prog); + prog->len = num_read / sizeof(struct sock_filter); + prog->filter = (struct sock_filter *)malloc(num_read); + if (prog->filter == NULL) { + die("cannot allocate %li bytes of memory for seccomp filter ", num_read); + } + memcpy(prog->filter, bpf, num_read); return true; } diff --git a/cmd/snap-confine/seccomp-support.h b/cmd/snap-confine/seccomp-support.h index 9ed9b0aaaab..b387913e471 100644 --- a/cmd/snap-confine/seccomp-support.h +++ b/cmd/snap-confine/seccomp-support.h @@ -19,6 +19,10 @@ #include +#include + +bool sc_load_seccomp_profile_path(const char *profile_path, struct sock_fprog *prog); + /** * sc_apply_seccomp_profile_for_security_tag applies a seccomp profile to the * current process. The filter is loaded from a pre-compiled bpf bytecode diff --git a/cmd/snap-seccomp/main.go b/cmd/snap-seccomp/main.go index 4ff3799b0a1..b0df5941ddb 100644 --- a/cmd/snap-seccomp/main.go +++ b/cmd/snap-seccomp/main.go @@ -583,11 +583,12 @@ var ( errnoOnImplicitDenial int16 = C.EPERM ) -func parseLine(line string, secFilter *seccomp.ScmpFilter) error { +func parseLine(line string, secFilterAllow, secFilterDeny *seccomp.ScmpFilter) error { // ignore comments and empty lines if strings.HasPrefix(line, "#") || line == "" { return nil } + secFilter := secFilterAllow // regular line tokens := strings.Fields(line) @@ -604,6 +605,7 @@ func parseLine(line string, secFilter *seccomp.ScmpFilter) error { if strings.HasPrefix(syscallName, "~") { action = seccomp.ActErrno.SetReturnCode(errnoOnExplicitDenial) syscallName = syscallName[1:] + secFilter = secFilterDeny } secSyscall, err := seccomp.GetSyscallFromName(syscallName) @@ -789,27 +791,44 @@ func complainAction() seccomp.ScmpAction { return seccomp.ActAllow } +func newComplainFilter(complainAct seccomp.ScmpAction) (*seccomp.ScmpFilter, error) { + secFilter, err := seccomp.NewFilter(complainAct) + if err != nil { + if complainAct != seccomp.ActAllow { + // ActLog is only supported in newer versions + // of the kernel, libseccomp, and + // libseccomp-golang. Attempt to fall back to + // ActAllow before erroring out. + complainAct = seccomp.ActAllow + secFilter, err = seccomp.NewFilter(complainAct) + } + } + return secFilter, err +} + func compile(content []byte, out string) error { var err error - var secFilter *seccomp.ScmpFilter + var secFilterAllow, secFilterDeny *seccomp.ScmpFilter unrestricted, complain := preprocess(content) switch { case unrestricted: - return osutil.AtomicWrite(out, bytes.NewBufferString("@unrestricted\n"), 0644, 0) + for _, ext := range []string{".allow", ".deny"} { + if err := osutil.AtomicWrite(out+ext, bytes.NewBufferString("@unrestricted\n"), 0644, 0); err != nil { + return err + } + } + return nil case complain: var complainAct seccomp.ScmpAction = complainAction() - secFilter, err = seccomp.NewFilter(complainAct) + secFilterAllow, err = newComplainFilter(complainAct) if err != nil { - if complainAct != seccomp.ActAllow { - // ActLog is only supported in newer versions - // of the kernel, libseccomp, and - // libseccomp-golang. Attempt to fall back to - // ActAllow before erroring out. - complainAct = seccomp.ActAllow - secFilter, err = seccomp.NewFilter(complainAct) - } + return fmt.Errorf("cannot create seccomp filter: %s", err) + } + secFilterDeny, err = newComplainFilter(complainAct) + if err != nil { + return fmt.Errorf("cannot create seccomp filter: %s", err) } // Set unrestricted to 'true' to fallback to the pre-ActLog @@ -819,19 +838,26 @@ func compile(content []byte, out string) error { unrestricted = true } default: - secFilter, err = seccomp.NewFilter(seccomp.ActErrno.SetReturnCode(errnoOnImplicitDenial)) + secFilterAllow, err = seccomp.NewFilter(seccomp.ActErrno.SetReturnCode(errnoOnImplicitDenial)) + if err != nil { + return fmt.Errorf("cannot create seccomp filter: %s", err) + } + secFilterDeny, err = seccomp.NewFilter(seccomp.ActAllow) + if err != nil { + return fmt.Errorf("cannot create seccomp filter: %s", err) + } } - if err != nil { - return fmt.Errorf("cannot create seccomp filter: %s", err) + if err := addSecondaryArches(secFilterAllow); err != nil { + return err } - if err := addSecondaryArches(secFilter); err != nil { + if err := addSecondaryArches(secFilterDeny); err != nil { return err } if !unrestricted { scanner := bufio.NewScanner(bytes.NewBuffer(content)) for scanner.Scan() { - if err := parseLine(scanner.Text(), secFilter); err != nil { + if err := parseLine(scanner.Text(), secFilterAllow, secFilterDeny); err != nil { return fmt.Errorf("cannot parse line: %s", err) } } @@ -841,21 +867,33 @@ func compile(content []byte, out string) error { } if osutil.GetenvBool("SNAP_SECCOMP_DEBUG") { - secFilter.ExportPFC(os.Stdout) + secFilterAllow.ExportPFC(os.Stdout) + secFilterDeny.ExportPFC(os.Stdout) } // write atomically - fout, err := osutil.NewAtomicFile(out, 0644, 0, osutil.NoChown, osutil.NoChown) - if err != nil { - return err - } - // Cancel once Committed is a NOP - defer fout.Cancel() - - if err := secFilter.ExportBPF(fout.File); err != nil { - return err + for _, ext := range []string{".allow", ".deny"} { + fout, err := osutil.NewAtomicFile(out+ext, 0644, 0, osutil.NoChown, osutil.NoChown) + if err != nil { + return err + } + // Cancel once Committed is a NOP + defer fout.Cancel() + + switch ext { + case ".allow": + err = secFilterAllow.ExportBPF(fout.File) + case ".deny": + err = secFilterDeny.ExportBPF(fout.File) + } + if err != nil { + return err + } + if err := fout.Commit(); err != nil { + return err + } } - return fout.Commit() + return nil } // caches for uid and gid lookups diff --git a/cmd/snap-seccomp/main_test.go b/cmd/snap-seccomp/main_test.go index 38db6fa16e8..b61e7a67769 100644 --- a/cmd/snap-seccomp/main_test.go +++ b/cmd/snap-seccomp/main_test.go @@ -71,18 +71,45 @@ var seccompBpfLoaderContent = []byte(` #define MAX_BPF_SIZE 32 * 1024 -int sc_apply_seccomp_bpf(const char* profile_path) +int sc_apply_seccomp_bpf(const char* profile_path_allow, const char* profile_path_deny) { - unsigned char bpf[MAX_BPF_SIZE + 1]; // account for EOF + unsigned char bpf_allow[MAX_BPF_SIZE + 1]; // account for EOF + unsigned char bpf_deny[MAX_BPF_SIZE + 1]; // account for EOF FILE* fp; - fp = fopen(profile_path, "rb"); + + // allow + fp = fopen(profile_path_allow, "rb"); + if (fp == NULL) { + fprintf(stderr, "cannot read %s\n", profile_path_allow); + return -1; + } + + // set 'size' to 1; to get bytes transferred + size_t num_read = fread(bpf_allow, 1, sizeof(bpf_allow), fp); + + if (ferror(fp) != 0) { + perror("fread()"); + return -1; + } else if (feof(fp) == 0) { + fprintf(stderr, "file too big\n"); + return -1; + } + fclose(fp); + + struct sock_fprog prog_allow = { + .len = num_read / sizeof(struct sock_filter), + .filter = (struct sock_filter*)bpf_allow, + }; + + // deny + fp = fopen(profile_path_deny, "rb"); if (fp == NULL) { - fprintf(stderr, "cannot read %s\n", profile_path); + fprintf(stderr, "cannot read %s\n", profile_path_deny); return -1; } // set 'size' to 1; to get bytes transferred - size_t num_read = fread(bpf, 1, sizeof(bpf), fp); + num_read = fread(bpf_deny, 1, sizeof(bpf_deny), fp); if (ferror(fp) != 0) { perror("fread()"); @@ -93,9 +120,9 @@ int sc_apply_seccomp_bpf(const char* profile_path) } fclose(fp); - struct sock_fprog prog = { + struct sock_fprog prog_deny = { .len = num_read / sizeof(struct sock_filter), - .filter = (struct sock_filter*)bpf, + .filter = (struct sock_filter*)bpf_deny, }; // Set NNP to allow loading seccomp policy into the kernel without @@ -105,10 +132,15 @@ int sc_apply_seccomp_bpf(const char* profile_path) return -1; } - if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { - perror("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, ...) failed"); + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_deny)) { + perror("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, ...) deny failed"); return -1; } + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_allow)) { + perror("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, ...) allow failed"); + return -1; + } + return 0; } @@ -116,15 +148,16 @@ int main(int argc, char* argv[]) { int rc = 0; if (argc < 2) { - fprintf(stderr, "Usage: %s [prog ...]\n", argv[0]); + fprintf(stderr, "Usage: %s [prog ...]\n", argv[0]); return 1; } - rc = sc_apply_seccomp_bpf(argv[1]); + // allow, deny + rc = sc_apply_seccomp_bpf(argv[1], argv[2]); if (rc != 0) return -rc; - execv(argv[2], (char* const*)&argv[2]); + execv(argv[3], (char* const*)&argv[3]); perror("execv failed"); return 1; } @@ -333,7 +366,7 @@ mprotect } } - cmd := exec.Command(s.seccompBpfLoader, bpfPath, syscallRunner, syscallRunnerArgs[0], syscallRunnerArgs[1], syscallRunnerArgs[2], syscallRunnerArgs[3], syscallRunnerArgs[4], syscallRunnerArgs[5], syscallRunnerArgs[6]) + cmd := exec.Command(s.seccompBpfLoader, bpfPath+".allow", bpfPath+".deny", syscallRunner, syscallRunnerArgs[0], syscallRunnerArgs[1], syscallRunnerArgs[2], syscallRunnerArgs[3], syscallRunnerArgs[4], syscallRunnerArgs[5], syscallRunnerArgs[6]) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr @@ -363,7 +396,8 @@ func (s *snapSeccompSuite) TestUnrestricted(c *C) { err := main.Compile([]byte(inp), outPath) c.Assert(err, IsNil) - c.Check(outPath, testutil.FileEquals, inp) + c.Check(outPath+".allow", testutil.FileEquals, inp) + c.Check(outPath+".deny", testutil.FileEquals, inp) } // TestCompile iterates over a range of textual seccomp whitelist rules and @@ -395,8 +429,9 @@ func (s *snapSeccompSuite) TestCompile(c *C) { {"read", "read", Allow}, {"read\nwrite\nexecve\n", "write", Allow}, - // trivial denial - {"read", "ioctl", Deny}, + // trivial denial (uses write in allow-list to ensure any + // errors printing is visible) + {"write", "ioctl", Deny}, // test argument filtering syntax, we currently support: // >=, <=, !, <, >, | diff --git a/interfaces/seccomp/template.go b/interfaces/seccomp/template.go index e92bafc3bde..3006e791bd4 100644 --- a/interfaces/seccomp/template.go +++ b/interfaces/seccomp/template.go @@ -205,14 +205,7 @@ inotify_rm_watch # TODO: this should be scaled back even more ~ioctl - TIOCSTI ~ioctl - TIOCLINUX -# restrict argument otherwise will match all uses of ioctl() and allow the rules -# that were disallowed above -# TODO: Fix the need to keep TIOCLINUX here - the issue is a unrestricted -# allow for "ioctl" here makes libseccomp "optimize" the deny rules -# above away and the generated bpf becomes just "allow ioctl". -# We should fix this by creating a way to make "AND" rules, so -# this becomes "ioctl - !TIOCSTI&&!TIOCLINUX" and remove the "~" again. -ioctl - !TIOCSTI +ioctl io_cancel io_destroy diff --git a/tests/main/snap-seccomp/task.yaml b/tests/main/snap-seccomp/task.yaml index b81a663bd91..a9ca07b59c4 100644 --- a/tests/main/snap-seccomp/task.yaml +++ b/tests/main/snap-seccomp/task.yaml @@ -1,8 +1,6 @@ summary: Ensure that the snap-seccomp bpf handling works -# FIXME: once $(snap debug confinment) can be used (in 2.27+) remove -# the systems line -systems: [ubuntu-16*, ubuntu-18*] +systems: [ubuntu-*] # Start early as it takes a long time. priority: 100 @@ -25,7 +23,7 @@ execute: | # from the old test_complain echo "Test that the @complain keyword works" - rm -f "${PROFILE}.bin" + rm -f "${PROFILE}.bin"* cat >"${PROFILE}.src" <"${PROFILE}.src" <"${PROFILE}.src" <"${PROFILE}.src" <&1 ); then - echo "test-snapd-sh.sh should fail with invalid seccomp profile" - exit 1 - fi + for ext in allow deny; do + dd if=/dev/urandom of="${PROFILE}.bin.$ext" count=1 bs=1024 + if output=$(test-snapd-sh.sh -c 'echo hello' 2>&1 ); then + echo "test-snapd-sh.sh should fail with invalid seccomp profile" + exit 1 + fi + done echo "$output" | MATCH "cannot apply seccomp profile: Invalid argument" echo "Add huge snapd.test-snapd-sh.bin to ensure size limit works" - dd if=/dev/zero of="${PROFILE}.bin" count=50 bs=1M - if output=$(test-snapd-sh.sh -c 'echo hello' 2>&1 ); then - echo "test-snapd-sh.sh should fail with big seccomp profile" - exit 1 - fi + for ext in allow deny; do + dd if=/dev/zero of="${PROFILE}.bin.$ext" count=50 bs=1M + if output=$(test-snapd-sh.sh -c 'echo hello' 2>&1 ); then + echo "test-snapd-sh.sh should fail with big seccomp profile" + exit 1 + fi + done echo "$output" | MATCH "cannot fit .* to memory buffer" echo "Ensure the code cannot not run with a missing .bin profile" - rm -f "${PROFILE}.bin" + rm -f "${PROFILE}.bin"* if test-snapd-sh.sh -c 'echo hello'; then echo "filtering broken: program should have failed to run" exit 1 fi echo "Ensure the code cannot not run with an empty seccomp profile" - rm -f "${PROFILE}.bin" + rm -f "${PROFILE}.bin"* echo "" > "${PROFILE}.src" $SNAP_SECCOMP compile "${PROFILE}.src" "${PROFILE}.bin" if test-snapd-sh.sh -c 'echo hello'; then @@ -121,7 +123,7 @@ execute: | fi echo "Ensure snap-confine waits for security profiles to appear" - rm -f "${PROFILE}.bin" + rm -f "${PROFILE}.bin"* cat >"${PROFILE}.src" <