linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kees Cook <keescook@chromium.org>
To: YiFei Zhu <yifeifz2@illinois.edu>
Cc: Kees Cook <keescook@chromium.org>, Jann Horn <jannh@google.com>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Tycho Andersen <tycho@tycho.pizza>,
	Andy Lutomirski <luto@amacapital.net>,
	Will Drewry <wad@chromium.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Giuseppe Scrivano <gscrivan@redhat.com>,
	Tobin Feldman-Fitzthum <tobin@ibm.com>,
	Dimitrios Skarlatos <dskarlat@cs.cmu.edu>,
	Valentin Rothberg <vrothber@redhat.com>,
	Hubertus Franke <frankeh@us.ibm.com>,
	Jack Chen <jianyan2@illinois.edu>,
	Josep Torrellas <torrella@illinois.edu>,
	Tianyin Xu <tyxu@illinois.edu>,
	bpf@vger.kernel.org, containers@lists.linux-foundation.org,
	linux-api@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 1/6] seccomp: Introduce SECCOMP_PIN_ARCHITECTURE
Date: Wed, 23 Sep 2020 16:29:18 -0700	[thread overview]
Message-ID: <20200923232923.3142503-2-keescook@chromium.org> (raw)
In-Reply-To: <20200923232923.3142503-1-keescook@chromium.org>

For systems that provide multiple syscall maps based on audit
architectures (e.g. AUDIT_ARCH_X86_64 and AUDIT_ARCH_I386 via
CONFIG_COMPAT) or via syscall masks (e.g. x86_x32), allow a fast way
to pin the process to a specific syscall table, instead of needing
to generate all filters with an architecture check as the first filter
action.

This creates the internal representation that seccomp itself can use
(which is separate from the filters, which need to stay runtime
agnostic). Additionally paves the way for constant-action bitmaps.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h                       |  9 +++
 include/uapi/linux/seccomp.h                  |  1 +
 kernel/seccomp.c                              | 79 ++++++++++++++++++-
 tools/testing/selftests/seccomp/seccomp_bpf.c | 33 ++++++++
 4 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..0be20bc81ea9 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -20,12 +20,18 @@
 #include <linux/atomic.h>
 #include <asm/seccomp.h>
 
+#define SECCOMP_ARCH_IS_NATIVE		1
+#define SECCOMP_ARCH_IS_COMPAT		2
+#define SECCOMP_ARCH_IS_MULTIPLEX	3
+#define SECCOMP_ARCH_IS_UNKNOWN		0xff
+
 struct seccomp_filter;
 /**
  * struct seccomp - the state of a seccomp'ed process
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
+ * @arch: seccomp's internal architecture identifier (not seccomp_data->arch)
  * @filter: must always point to a valid seccomp-filter or NULL as it is
  *          accessed without locking during system call entry.
  *
@@ -34,6 +40,9 @@ struct seccomp_filter;
  */
 struct seccomp {
 	int mode;
+#ifdef SECCOMP_ARCH
+	u8 arch;
+#endif
 	atomic_t filter_count;
 	struct seccomp_filter *filter;
 };
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 6ba18b82a02e..f4d134ebfa7e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -16,6 +16,7 @@
 #define SECCOMP_SET_MODE_FILTER		1
 #define SECCOMP_GET_ACTION_AVAIL	2
 #define SECCOMP_GET_NOTIF_SIZES		3
+#define SECCOMP_PIN_ARCHITECTURE	4
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC		(1UL << 0)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ae6b40cc39f4..0a3ff8eb8aea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -298,6 +298,47 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 	return 0;
 }
 
+#ifdef SECCOMP_ARCH
+static inline u8 seccomp_get_arch(u32 syscall_arch, u32 syscall_nr)
+{
+	u8 seccomp_arch;
+
+	switch (syscall_arch) {
+	case SECCOMP_ARCH:
+		seccomp_arch = SECCOMP_ARCH_IS_NATIVE;
+		break;
+#ifdef CONFIG_COMPAT
+	case SECCOMP_ARCH_COMPAT:
+		seccomp_arch = SECCOMP_ARCH_IS_COMPAT;
+		break;
+#endif
+	default:
+		seccomp_arch = SECCOMP_ARCH_IS_UNKNOWN;
+	}
+
+#ifdef SECCOMP_MULTIPLEXED_SYSCALL_TABLE_ARCH
+	if (syscall_arch == SECCOMP_MULTIPLEXED_SYSCALL_TABLE_ARCH) {
+		seccomp_arch |= (sd->nr & SECCOMP_MULTIPLEXED_SYSCALL_TABLE_MASK) >>
+				SECCOMP_MULTIPLEXED_SYSCALL_TABLE_SHIFT;
+	}
+#endif
+
+	return seccomp_arch;
+}
+#endif
+
+static inline bool seccomp_arch_mismatch(struct seccomp *seccomp,
+					 const struct seccomp_data *sd)
+{
+#ifdef SECCOMP_ARCH
+	/* Block mismatched architectures. */
+	if (seccomp->arch && seccomp->arch != seccomp_get_arch(sd->arch, sd->nr))
+		return true;
+#endif
+
+	return false;
+}
+
 /**
  * seccomp_run_filters - evaluates all seccomp filters against @sd
  * @sd: optional seccomp data to be passed to filters
@@ -312,9 +353,14 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 			       struct seccomp_filter **match)
 {
 	u32 ret = SECCOMP_RET_ALLOW;
+	struct seccomp_filter *f;
+	struct seccomp *seccomp = &current->seccomp;
+
+	if (seccomp_arch_mismatch(seccomp, sd))
+		return SECCOMP_RET_KILL_PROCESS;
+
 	/* Make sure cross-thread synced filter points somewhere sane. */
-	struct seccomp_filter *f =
-			READ_ONCE(current->seccomp.filter);
+	f = READ_ONCE(seccomp->filter);
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (WARN_ON(f == NULL))
@@ -522,6 +568,11 @@ static inline void seccomp_sync_threads(unsigned long flags)
 		if (task_no_new_privs(caller))
 			task_set_no_new_privs(thread);
 
+#ifdef SECCOMP_ARCH
+		/* Copy any pinned architecture. */
+		thread->seccomp.arch = caller->seccomp.arch;
+#endif
+
 		/*
 		 * Opt the other thread into seccomp if needed.
 		 * As threads are considered to be trust-realm
@@ -1652,6 +1703,23 @@ static long seccomp_get_notif_sizes(void __user *usizes)
 	return 0;
 }
 
+static long seccomp_pin_architecture(void)
+{
+#ifdef SECCOMP_ARCH
+	struct task_struct *task = current;
+
+	u8 arch = seccomp_get_arch(syscall_get_arch(task),
+				   syscall_get_nr(task, task_pt_regs(task)));
+
+	/* How did you even get here? */
+	if (task->seccomp.arch && task->seccomp.arch != arch)
+		return -EBUSY;
+
+	task->seccomp.arch = arch;
+#endif
+	return 0;
+}
+
 /* Common entry point for both prctl and syscall. */
 static long do_seccomp(unsigned int op, unsigned int flags,
 		       void __user *uargs)
@@ -1673,6 +1741,13 @@ static long do_seccomp(unsigned int op, unsigned int flags,
 			return -EINVAL;
 
 		return seccomp_get_notif_sizes(uargs);
+	case SECCOMP_PIN_ARCHITECTURE:
+		if (flags != 0)
+			return -EINVAL;
+		if (uargs != NULL)
+			return -EINVAL;
+
+		return seccomp_pin_architecture();
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9c398768553b..d90551e0385e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -157,6 +157,10 @@ struct seccomp_data {
 #define SECCOMP_GET_NOTIF_SIZES 3
 #endif
 
+#ifndef SECCOMP_PIN_ARCHITECTURE
+#define SECCOMP_PIN_ARCHITECTURE 4
+#endif
+
 #ifndef SECCOMP_FILTER_FLAG_TSYNC
 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
 #endif
@@ -2221,6 +2225,35 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
+TEST(seccomp_architecture_pin)
+{
+	long ret;
+
+	ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 0, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_PIN_ARCHITECTURE!");
+	}
+
+	/* Make sure unexpected arguments are rejected. */
+	ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 1, NULL);
+	ASSERT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject SECCOMP_PIN_ARCHITECTURE with flags!");
+	}
+
+	ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 0, &ret);
+	ASSERT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject SECCOMP_PIN_ARCHITECTURE with address!");
+	}
+
+	ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 1, &ret);
+	ASSERT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject SECCOMP_PIN_ARCHITECTURE with flags and address!");
+	}
+}
+
 TEST(seccomp_syscall)
 {
 	struct sock_filter filter[] = {
-- 
2.25.1


  reply	other threads:[~2020-09-23 23:32 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-23 23:29 [PATCH v1 0/6] seccomp: Implement constant action bitmaps Kees Cook
2020-09-23 23:29 ` Kees Cook [this message]
2020-09-24  0:41   ` [PATCH 1/6] seccomp: Introduce SECCOMP_PIN_ARCHITECTURE Jann Horn
2020-09-24  7:11     ` Kees Cook
2020-09-23 23:29 ` [PATCH 2/6] x86: Enable seccomp architecture tracking Kees Cook
2020-09-24  0:45   ` Jann Horn
2020-09-24  7:12     ` Kees Cook
2020-09-23 23:29 ` [PATCH 3/6] seccomp: Implement constant action bitmaps Kees Cook
2020-09-24  0:25   ` Jann Horn
2020-09-24  7:36     ` Kees Cook
2020-09-24  8:07       ` YiFei Zhu
2020-09-24  8:15         ` Kees Cook
2020-09-24  8:22           ` YiFei Zhu
2020-09-24 12:28       ` Jann Horn
2020-09-24 12:37         ` David Laight
2020-09-24 12:56           ` Jann Horn
     [not found]   ` <DM6PR11MB271492D0565E91475D949F5DEF390@DM6PR11MB2714.namprd11.prod.outlook.com>
2020-09-24  0:36     ` YiFei Zhu
2020-09-24  7:38       ` Kees Cook
2020-09-24  7:51         ` YiFei Zhu
2020-09-23 23:29 ` [PATCH 4/6] seccomp: Emulate basic filters for constant action results Kees Cook
2020-09-23 23:47   ` Jann Horn
2020-09-24  7:46     ` Kees Cook
2020-09-24 15:28       ` Paul Moore
2020-09-24 19:52         ` Kees Cook
2020-09-24 20:46           ` Paul Moore
2020-09-24 21:35             ` Kees Cook
2020-09-23 23:29 ` [PATCH 5/6] selftests/seccomp: Compare bitmap vs filter overhead Kees Cook
2020-09-23 23:29 ` [PATCH 6/6] [DEBUG] seccomp: Report bitmap coverage ranges Kees Cook
2020-09-24 13:40 ` [PATCH v1 0/6] seccomp: Implement constant action bitmaps Rasmus Villemoes
2020-09-24 13:58   ` YiFei Zhu
2020-09-25  5:56     ` Rasmus Villemoes
2020-09-25  7:07       ` YiFei Zhu
2020-09-26 18:11         ` YiFei Zhu
2020-09-28 20:04           ` Kees Cook
2020-09-28 20:16             ` YiFei Zhu
2020-09-24 14:05   ` Jann Horn
2020-09-24 18:57 ` Andrea Arcangeli
2020-09-24 19:18   ` Jann Horn
     [not found]   ` <9dbe8e3bbdad43a1872202ff38c34ca2@DM5PR11MB1692.namprd11.prod.outlook.com>
2020-09-24 19:48     ` Tianyin Xu
2020-09-24 20:00   ` Kees Cook

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200923232923.3142503-2-keescook@chromium.org \
    --to=keescook@chromium.org \
    --cc=aarcange@redhat.com \
    --cc=bpf@vger.kernel.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=dskarlat@cs.cmu.edu \
    --cc=frankeh@us.ibm.com \
    --cc=gscrivan@redhat.com \
    --cc=jannh@google.com \
    --cc=jianyan2@illinois.edu \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=tobin@ibm.com \
    --cc=torrella@illinois.edu \
    --cc=tycho@tycho.pizza \
    --cc=tyxu@illinois.edu \
    --cc=vrothber@redhat.com \
    --cc=wad@chromium.org \
    --cc=yifeifz2@illinois.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).