All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range"
  2023-05-30 18:50 [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" ~akihirosuda
@ 2023-05-30 11:34 ` ~akihirosuda
  2023-05-31  4:20   ` kernel test robot
  2023-05-30 14:42 ` [PATCH linux 1/3] net/ipv4: split group_range logic to kernel/group_range.c ~akihirosuda
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: ~akihirosuda @ 2023-05-30 11:34 UTC (permalink / raw)
  To: linux-kernel, containers, serge, brauner, paul, ebiederm
  Cc: suda.kyoto, akihiro.suda.cz

From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

This sysctl limits groups who can create a new userns without CAP_SYS_ADMIN
in the current userns, so as to mitigate potential kernel vulnerabilities
around userns.

The sysctl value format is same as "net.ipv4.ping_group_range".

To disable creating new unprivileged userns, set the sysctl value to "1 0"
in the initial userns.

To allow everyone to create new userns, set the sysctl value to
"0 4294967294". This is the default value.

This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
Ubuntu [1] and Debian GNU/Linux.

Link: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
---
 include/linux/user_namespace.h |  5 +++++
 kernel/fork.c                  | 24 ++++++++++++++++++++++++
 kernel/sysctl.c                | 30 ++++++++++++++++++++++++++++++
 kernel/user.c                  |  9 +++++++++
 4 files changed, 68 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c4..b8b5a982f818 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -11,6 +11,10 @@
 #include <linux/sysctl.h>
 #include <linux/err.h>
 
+#ifdef CONFIG_SYSCTL
+#include <linux/group_range.h>
+#endif
+
 #define UID_GID_MAP_MAX_BASE_EXTENTS 5
 #define UID_GID_MAP_MAX_EXTENTS 340
 
@@ -98,6 +102,7 @@ struct user_namespace {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
+	struct group_range group_range;
 #endif
 	struct ucounts		*ucounts;
 	long ucount_max[UCOUNT_COUNTS];
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..1e8debdf0896 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -111,6 +111,10 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/task.h>
 
+#ifdef CONFIG_USER_NS
+#include <linux/group_range.h>
+#endif
+
 /*
  * Minimum number of threads to boot the kernel
  */
@@ -2235,6 +2239,16 @@ static void rv_task_fork(struct task_struct *p)
 #define rv_task_fork(p) do {} while (0)
 #endif
 
+#ifdef CONFIG_USER_NS
+static bool userns_clone_is_allowed(void)
+{
+	if (capable(CAP_SYS_ADMIN))
+		return true;
+
+	return check_current_group_range(&current_user_ns()->group_range);
+}
+#endif
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -2266,6 +2280,11 @@ __latent_entropy struct task_struct *copy_process(
 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
 		return ERR_PTR(-EINVAL);
 
+#ifdef CONFIG_USER_NS
+	if ((clone_flags & CLONE_NEWUSER) && !userns_clone_is_allowed())
+		return ERR_PTR(-EPERM);
+#endif
+
 	/*
 	 * Thread groups must share signals as well, and detached threads
 	 * can only be started up within the thread group.
@@ -3340,6 +3359,11 @@ static int check_unshare_flags(unsigned long unshare_flags)
 			return -EINVAL;
 	}
 
+#ifdef CONFIG_USER_NS
+	if ((unshare_flags & CLONE_NEWUSER) && !userns_clone_is_allowed())
+		return -EPERM;
+#endif
+
 	return 0;
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bfe53e835524..ace7bf0fe9fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -80,6 +80,9 @@
 #ifdef CONFIG_RT_MUTEXES
 #include <linux/rtmutex.h>
 #endif
+#ifdef CONFIG_USER_NS
+#include <linux/group_range.h>
+#endif
 
 /* shared constants to be used in various sysctls */
 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1615,6 +1618,24 @@ int proc_do_static_key(struct ctl_table *table, int write,
 	return ret;
 }
 
+#ifdef CONFIG_USER_NS
+static struct group_range *userns_group_range_func(struct ctl_table *table)
+{
+	struct user_namespace *user_ns =
+		container_of(table->data, struct user_namespace, group_range.range);
+
+	return &user_ns->group_range;
+}
+
+/* Validate changes from /proc interface. */
+static int userns_group_range(struct ctl_table *table, int write,
+	void *buffer, size_t *lenp, loff_t *ppos)
+{
+	return sysctl_group_range(userns_group_range_func, table,
+		write, buffer, lenp, ppos);
+}
+#endif
+
 static struct ctl_table kern_table[] = {
 	{
 		.procname	= "panic",
@@ -1623,6 +1644,15 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_USER_NS
+	{
+		.procname	= "userns_group_range",
+		.data = &init_user_ns.group_range.range,
+		.maxlen		= sizeof(init_user_ns.group_range.range),
+		.mode		= 0644,
+		.proc_handler	= userns_group_range,
+	},
+#endif
 #ifdef CONFIG_PROC_SYSCTL
 	{
 		.procname	= "tainted",
diff --git a/kernel/user.c b/kernel/user.c
index d667debeafd6..4704c93f62f9 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -20,6 +20,10 @@
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
+#ifdef CONFIG_SYSCTL
+#include <linux/group_range.h>
+#endif
+
 /*
  * userns count is 1 for root user, 1 for init_uts_ns,
  * and 1 for... ?
@@ -67,6 +71,11 @@ struct user_namespace init_user_ns = {
 	.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
 	.keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem),
 #endif
+#ifdef CONFIG_SYSCTL
+	.group_range = {
+		.range = {0, ((gid_t)~0U) - 1},
+	},
+#endif
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
-- 
2.38.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH linux 1/3] net/ipv4: split group_range logic to kernel/group_range.c
  2023-05-30 18:50 [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" ~akihirosuda
  2023-05-30 11:34 ` [PATCH linux 3/3] " ~akihirosuda
@ 2023-05-30 14:42 ` ~akihirosuda
  2023-05-30 17:31 ` [PATCH linux 2/3] group_range: allow GID from 2147483648 to 4294967294 ~akihirosuda
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: ~akihirosuda @ 2023-05-30 14:42 UTC (permalink / raw)
  To: linux-kernel, containers, serge, brauner, paul, ebiederm
  Cc: suda.kyoto, akihiro.suda.cz

From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

The logic can be reused for other sysctls in future.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
---
 include/linux/group_range.h | 24 ++++++++++
 include/net/netns/ipv4.h    |  9 +---
 include/net/ping.h          |  6 ---
 kernel/Makefile             |  2 +-
 kernel/group_range.c        | 91 +++++++++++++++++++++++++++++++++++++
 net/ipv4/ping.c             | 39 ++--------------
 net/ipv4/sysctl_net_ipv4.c  | 56 ++---------------------
 7 files changed, 125 insertions(+), 102 deletions(-)
 create mode 100644 include/linux/group_range.h
 create mode 100644 kernel/group_range.c

diff --git a/include/linux/group_range.h b/include/linux/group_range.h
new file mode 100644
index 000000000000..5bd837eced95
--- /dev/null
+++ b/include/linux/group_range.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_GROUP_RANGE_H
+#define _LINUX_GROUP_RANGE_H
+
+#include <linux/seqlock.h>
+#include <linux/uidgid.h>
+
+/*
+ * gid_t is either uint or ushort.  We want to pass it to
+ * proc_dointvec_minmax(), so it must not be larger than MAX_INT
+ */
+#define GROUP_RANGE_MAX (((gid_t)~0U) >> 1)
+
+struct group_range {
+	seqlock_t       lock;
+	kgid_t          range[2];
+};
+
+typedef struct group_range* (*sysctl_group_range_func_t)(struct ctl_table *);
+int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *table,
+				 int write, void *buffer, size_t *lenp, loff_t *ppos);
+
+bool check_current_group_range(struct group_range *gr);
+#endif /* _LINUX_GROUP_RANGE_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index db762e35aca9..75d745a7c6e1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -6,11 +6,11 @@
 #ifndef __NETNS_IPV4_H__
 #define __NETNS_IPV4_H__
 
-#include <linux/uidgid.h>
 #include <net/inet_frag.h>
 #include <linux/rcupdate.h>
 #include <linux/seqlock.h>
 #include <linux/siphash.h>
+#include <linux/group_range.h>
 
 struct ctl_table_header;
 struct ipv4_devconf;
@@ -24,11 +24,6 @@ struct local_ports {
 	bool		warned;
 };
 
-struct ping_group_range {
-	seqlock_t	lock;
-	kgid_t		range[2];
-};
-
 struct inet_hashinfo;
 
 struct inet_timewait_death_row {
@@ -204,7 +199,7 @@ struct netns_ipv4 {
 	int sysctl_igmp_max_msf;
 	int sysctl_igmp_qrv;
 
-	struct ping_group_range ping_group_range;
+	struct group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
 
diff --git a/include/net/ping.h b/include/net/ping.h
index 9233ad3de0ad..37b1d7baeb7b 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -16,12 +16,6 @@
 #define PING_HTABLE_SIZE 	64
 #define PING_HTABLE_MASK 	(PING_HTABLE_SIZE-1)
 
-/*
- * gid_t is either uint or ushort.  We want to pass it to
- * proc_dointvec_minmax(), so it must not be larger than MAX_INT
- */
-#define GID_T_MAX (((gid_t)~0U) >> 1)
-
 /* Compatibility glue so we can support IPv6 when it's compiled as a module */
 struct pingv6_ops {
 	int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len,
diff --git a/kernel/Makefile b/kernel/Makefile
index b69c95315480..fb3a812cf92e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = fork.o exec_domain.o panic.o \
 	    extable.o params.o \
 	    kthread.o sys_ni.o nsproxy.o \
 	    notifier.o ksysfs.o cred.o reboot.o \
-	    async.o range.o smpboot.o ucount.o regset.o
+	    async.o range.o smpboot.o ucount.o regset.o group_range.o
 
 obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o
 obj-$(CONFIG_MULTIUSER) += groups.o
diff --git a/kernel/group_range.c b/kernel/group_range.c
new file mode 100644
index 000000000000..b5c7d35d680b
--- /dev/null
+++ b/kernel/group_range.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/cred.h>
+#include <linux/group_range.h>
+#include <linux/uidgid.h>
+#include <linux/seqlock.h>
+#include <linux/sysctl.h>
+
+static void get_group_range(struct group_range *gr, kgid_t *low, kgid_t *high)
+{
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&gr->lock);
+
+		*low = gr->range[0];
+		*high = gr->range[1];
+	} while (read_seqretry(&gr->lock, seq));
+}
+
+static void set_group_range(struct group_range *gr, kgid_t low, kgid_t high)
+{
+	write_seqlock(&gr->lock);
+	gr->range[0] = low;
+	gr->range[1] = high;
+	write_sequnlock(&gr->lock);
+}
+
+static int group_range_min[] = { 0, 0 };
+static int group_range_max[] = { GROUP_RANGE_MAX, GROUP_RANGE_MAX };
+
+int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *table,
+				 int write, void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct group_range *gr = fn(table);
+	struct user_namespace *user_ns = current_user_ns();
+	int ret;
+	gid_t urange[2];
+	kgid_t low, high;
+	struct ctl_table tmp = {
+		.data = &urange,
+		.maxlen = sizeof(urange),
+		.mode = table->mode,
+		.extra1 = &group_range_min,
+		.extra2 = &group_range_max,
+	};
+
+	get_group_range(gr, &low, &high);
+	urange[0] = from_kgid_munged(user_ns, low);
+	urange[1] = from_kgid_munged(user_ns, high);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+	if (write && ret == 0) {
+		low = make_kgid(user_ns, urange[0]);
+		high = make_kgid(user_ns, urange[1]);
+		if (!gid_valid(low) || !gid_valid(high))
+			return -EINVAL;
+		if (urange[1] < urange[0] || gid_lt(high, low)) {
+			low = make_kgid(&init_user_ns, 1);
+			high = make_kgid(&init_user_ns, 0);
+		}
+		set_group_range(gr, low, high);
+	}
+
+	return ret;
+}
+
+bool check_current_group_range(struct group_range *gr)
+{
+	kgid_t group = current_egid();
+	struct group_info *group_info;
+	int i;
+	kgid_t low, high;
+	bool ret = true;
+
+	get_group_range(gr, &low, &high);
+	if (gid_lte(low, group) && gid_lte(group, high))
+		return true;
+
+	group_info = get_current_groups();
+	for (i = 0; i < group_info->ngroups; i++) {
+		kgid_t gid = group_info->gid[i];
+
+		if (gid_lte(low, gid) && gid_lte(gid, high))
+			goto out_release_group;
+	}
+	ret = false;
+out_release_group:
+	put_group_info(group_info);
+	return ret;
+}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 5178a3f3cb53..6e23771c5234 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -244,50 +244,17 @@ exit:
 	return sk;
 }
 
-static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
-					  kgid_t *high)
-{
-	kgid_t *data = net->ipv4.ping_group_range.range;
-	unsigned int seq;
-
-	do {
-		seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
-
-		*low = data[0];
-		*high = data[1];
-	} while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
-}
-
-
 int ping_init_sock(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
-	kgid_t group = current_egid();
-	struct group_info *group_info;
-	int i;
-	kgid_t low, high;
-	int ret = 0;
 
 	if (sk->sk_family == AF_INET6)
 		sk->sk_ipv6only = 1;
 
-	inet_get_ping_group_range_net(net, &low, &high);
-	if (gid_lte(low, group) && gid_lte(group, high))
-		return 0;
-
-	group_info = get_current_groups();
-	for (i = 0; i < group_info->ngroups; i++) {
-		kgid_t gid = group_info->gid[i];
+	if (!check_current_group_range(&net->ipv4.ping_group_range))
+		return -EACCES;
 
-		if (gid_lte(low, gid) && gid_lte(gid, high))
-			goto out_release_group;
-	}
-
-	ret = -EACCES;
-
-out_release_group:
-	put_group_info(group_info);
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ping_init_sock);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 40fe70fc2015..ad355ab265db 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -34,8 +34,6 @@ static int ip_ttl_min = 1;
 static int ip_ttl_max = 255;
 static int tcp_syn_retries_min = 1;
 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
-static int ip_ping_group_range_min[] = { 0, 0 };
-static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
 static int one_day_secs = 24 * 3600;
 static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
@@ -133,66 +131,20 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
 	return ret;
 }
 
-static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
+static struct group_range *ipv4_ping_group_range_func(struct ctl_table *table)
 {
-	kgid_t *data = table->data;
 	struct net *net =
 		container_of(table->data, struct net, ipv4.ping_group_range.range);
-	unsigned int seq;
-	do {
-		seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
 
-		*low = data[0];
-		*high = data[1];
-	} while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
-}
-
-/* Update system visible IP port range */
-static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
-{
-	kgid_t *data = table->data;
-	struct net *net =
-		container_of(table->data, struct net, ipv4.ping_group_range.range);
-	write_seqlock(&net->ipv4.ping_group_range.lock);
-	data[0] = low;
-	data[1] = high;
-	write_sequnlock(&net->ipv4.ping_group_range.lock);
+	return &net->ipv4.ping_group_range;
 }
 
 /* Validate changes from /proc interface. */
 static int ipv4_ping_group_range(struct ctl_table *table, int write,
 				 void *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct user_namespace *user_ns = current_user_ns();
-	int ret;
-	gid_t urange[2];
-	kgid_t low, high;
-	struct ctl_table tmp = {
-		.data = &urange,
-		.maxlen = sizeof(urange),
-		.mode = table->mode,
-		.extra1 = &ip_ping_group_range_min,
-		.extra2 = &ip_ping_group_range_max,
-	};
-
-	inet_get_ping_group_range_table(table, &low, &high);
-	urange[0] = from_kgid_munged(user_ns, low);
-	urange[1] = from_kgid_munged(user_ns, high);
-	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
-
-	if (write && ret == 0) {
-		low = make_kgid(user_ns, urange[0]);
-		high = make_kgid(user_ns, urange[1]);
-		if (!gid_valid(low) || !gid_valid(high))
-			return -EINVAL;
-		if (urange[1] < urange[0] || gid_lt(high, low)) {
-			low = make_kgid(&init_user_ns, 1);
-			high = make_kgid(&init_user_ns, 0);
-		}
-		set_ping_group_range(table, low, high);
-	}
-
-	return ret;
+	return sysctl_group_range(ipv4_ping_group_range_func, table,
+		write, buffer, lenp, ppos);
 }
 
 static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
-- 
2.38.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH linux 2/3] group_range: allow GID from 2147483648 to 4294967294
  2023-05-30 18:50 [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" ~akihirosuda
  2023-05-30 11:34 ` [PATCH linux 3/3] " ~akihirosuda
  2023-05-30 14:42 ` [PATCH linux 1/3] net/ipv4: split group_range logic to kernel/group_range.c ~akihirosuda
@ 2023-05-30 17:31 ` ~akihirosuda
  2023-05-30 21:58 ` [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" Paul Moore
  2023-06-02  0:06 ` Eric W. Biederman
  4 siblings, 0 replies; 13+ messages in thread
From: ~akihirosuda @ 2023-05-30 17:31 UTC (permalink / raw)
  To: linux-kernel, containers, serge, brauner, paul, ebiederm
  Cc: suda.kyoto, akihiro.suda.cz

From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

proc_dointvec_minmax is no longer used because it does not support GID
from 2147483648 to 4294967294.

proc_douintvec is not used either, because it does not support vectors,
despite its function name.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
---
 include/linux/group_range.h |  6 -----
 kernel/group_range.c        | 52 ++++++++++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/include/linux/group_range.h b/include/linux/group_range.h
index 5bd837eced95..8f71dc956693 100644
--- a/include/linux/group_range.h
+++ b/include/linux/group_range.h
@@ -5,12 +5,6 @@
 #include <linux/seqlock.h>
 #include <linux/uidgid.h>
 
-/*
- * gid_t is either uint or ushort.  We want to pass it to
- * proc_dointvec_minmax(), so it must not be larger than MAX_INT
- */
-#define GROUP_RANGE_MAX (((gid_t)~0U) >> 1)
-
 struct group_range {
 	seqlock_t       lock;
 	kgid_t          range[2];
diff --git a/kernel/group_range.c b/kernel/group_range.c
index b5c7d35d680b..13db83b77832 100644
--- a/kernel/group_range.c
+++ b/kernel/group_range.c
@@ -4,6 +4,7 @@
 #include <linux/group_range.h>
 #include <linux/uidgid.h>
 #include <linux/seqlock.h>
+#include <linux/slab.h>
 #include <linux/sysctl.h>
 
 static void get_group_range(struct group_range *gr, kgid_t *low, kgid_t *high)
@@ -26,9 +27,6 @@ static void set_group_range(struct group_range *gr, kgid_t low, kgid_t high)
 	write_sequnlock(&gr->lock);
 }
 
-static int group_range_min[] = { 0, 0 };
-static int group_range_max[] = { GROUP_RANGE_MAX, GROUP_RANGE_MAX };
-
 int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *table,
 				 int write, void *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -37,24 +35,56 @@ int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *table,
 	int ret;
 	gid_t urange[2];
 	kgid_t low, high;
+	size_t slen = 256; /* total bytes including '\0' */
+	char *s = kmalloc(slen, GFP_KERNEL); /* clobbered by strsep */
 	struct ctl_table tmp = {
-		.data = &urange,
-		.maxlen = sizeof(urange),
+		.data = s,
+		.maxlen = slen,
 		.mode = table->mode,
-		.extra1 = &group_range_min,
-		.extra2 = &group_range_max,
 	};
 
+	if (unlikely(!s))
+		return -ENOMEM;
+
+	/*
+	 * proc_dointvec_minmax is no longer used because it does not support
+	 * GID from 2147483648 to 4294967294.
+	 *
+	 * proc_douintvec is not used either, because it does not support
+	 * vectors, despite its function name.
+	 */
 	get_group_range(gr, &low, &high);
 	urange[0] = from_kgid_munged(user_ns, low);
 	urange[1] = from_kgid_munged(user_ns, high);
-	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	ret = snprintf(tmp.data, slen, "%u\t%u", urange[0], urange[1]);
+	if (ret < 0)
+		goto done;
+	ret = proc_dostring(&tmp, write, buffer, lenp, ppos);
+	if (*lenp >= slen - 1) /* truncated */
+		ret = -EINVAL;
 
 	if (write && ret == 0) {
+		char *tok[2];
+		int i;
+
+		s = strim(s);
+		tok[0] = strsep(&s, " \t");
+		tok[1] = s;
+		for (i = 0; i < 2; i++) {
+			if (!tok[i]) {
+				ret = -EINVAL;
+				goto done;
+			}
+			ret = kstrtouint(tok[i], 0, &urange[i]);
+			if (ret < 0)
+				goto done;
+		}
 		low = make_kgid(user_ns, urange[0]);
 		high = make_kgid(user_ns, urange[1]);
-		if (!gid_valid(low) || !gid_valid(high))
-			return -EINVAL;
+		if (!gid_valid(low) || !gid_valid(high)) {
+			ret = -EINVAL;
+			goto done;
+		}
 		if (urange[1] < urange[0] || gid_lt(high, low)) {
 			low = make_kgid(&init_user_ns, 1);
 			high = make_kgid(&init_user_ns, 0);
@@ -62,6 +92,8 @@ int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *table,
 		set_group_range(gr, low, high);
 	}
 
+done:
+	kfree(tmp.data);
 	return ret;
 }
 
-- 
2.38.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
@ 2023-05-30 18:50 ~akihirosuda
  2023-05-30 11:34 ` [PATCH linux 3/3] " ~akihirosuda
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: ~akihirosuda @ 2023-05-30 18:50 UTC (permalink / raw)
  To: linux-kernel, containers, serge, brauner, paul, ebiederm
  Cc: suda.kyoto, akihiro.suda.cz

This sysctl limits groups who can create a new userns without
CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
vulnerabilities around userns.

The sysctl value format is same as "net.ipv4.ping_group_range".

To disable creating new unprivileged userns, set the sysctl value to "1
0" in the initial userns.

To allow everyone to create new userns, set the sysctl value to "0
4294967294". This is the default value.

This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
Ubuntu [1] and Debian GNU/Linux.

Link: https://git.launchpad.net/~ubuntu-
kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

Akihiro Suda (3):
  net/ipv4: split group_range logic to kernel/group_range.c
  group_range: allow GID from 2147483648 to 4294967294
  userns: add sysctl "kernel.userns_group_range"

 include/linux/group_range.h    |  18 +++++
 include/linux/user_namespace.h |   5 ++
 include/net/netns/ipv4.h       |   9 +--
 include/net/ping.h             |   6 --
 kernel/Makefile                |   2 +-
 kernel/fork.c                  |  24 +++++++
 kernel/group_range.c           | 123 +++++++++++++++++++++++++++++++++
 kernel/sysctl.c                |  30 ++++++++
 kernel/user.c                  |   9 +++
 net/ipv4/ping.c                |  39 +----------
 net/ipv4/sysctl_net_ipv4.c     |  56 ++-------------
 11 files changed, 219 insertions(+), 102 deletions(-)
 create mode 100644 include/linux/group_range.h
 create mode 100644 kernel/group_range.c

-- 
2.38.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-05-30 18:50 [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" ~akihirosuda
                   ` (2 preceding siblings ...)
  2023-05-30 17:31 ` [PATCH linux 2/3] group_range: allow GID from 2147483648 to 4294967294 ~akihirosuda
@ 2023-05-30 21:58 ` Paul Moore
  2023-05-31  7:50   ` Christian Brauner
  2023-06-02  0:14   ` Eric W. Biederman
  2023-06-02  0:06 ` Eric W. Biederman
  4 siblings, 2 replies; 13+ messages in thread
From: Paul Moore @ 2023-05-30 21:58 UTC (permalink / raw)
  To: ~akihirosuda
  Cc: linux-kernel, containers, serge, brauner, ebiederm, akihiro.suda.cz

On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <akihirosuda@git.sr.ht> wrote:
>
> This sysctl limits groups who can create a new userns without
> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> vulnerabilities around userns.
>
> The sysctl value format is same as "net.ipv4.ping_group_range".
>
> To disable creating new unprivileged userns, set the sysctl value to "1
> 0" in the initial userns.
>
> To allow everyone to create new userns, set the sysctl value to "0
> 4294967294". This is the default value.
>
> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> Ubuntu [1] and Debian GNU/Linux.
>
> Link: https://git.launchpad.net/~ubuntu-
> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]

Given the challenges around adding access controls to userns
operations, have you considered using the LSM support that was added
upstream last year?  The relevant LSM hook can be found in commit
7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
and although only SELinux currently provides an access control
implementation, there is no reason you couldn't add support for your
favorite LSM, or even just a simple BPF LSM to enforce the group
controls as you've described them here.

> Akihiro Suda (3):
>   net/ipv4: split group_range logic to kernel/group_range.c
>   group_range: allow GID from 2147483648 to 4294967294
>   userns: add sysctl "kernel.userns_group_range"
>
>  include/linux/group_range.h    |  18 +++++
>  include/linux/user_namespace.h |   5 ++
>  include/net/netns/ipv4.h       |   9 +--
>  include/net/ping.h             |   6 --
>  kernel/Makefile                |   2 +-
>  kernel/fork.c                  |  24 +++++++
>  kernel/group_range.c           | 123 +++++++++++++++++++++++++++++++++
>  kernel/sysctl.c                |  30 ++++++++
>  kernel/user.c                  |   9 +++
>  net/ipv4/ping.c                |  39 +----------
>  net/ipv4/sysctl_net_ipv4.c     |  56 ++-------------
>  11 files changed, 219 insertions(+), 102 deletions(-)
>  create mode 100644 include/linux/group_range.h
>  create mode 100644 kernel/group_range.c

-- 
paul-moore.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range"
  2023-05-30 11:34 ` [PATCH linux 3/3] " ~akihirosuda
@ 2023-05-31  4:20   ` kernel test robot
  0 siblings, 0 replies; 13+ messages in thread
From: kernel test robot @ 2023-05-31  4:20 UTC (permalink / raw)
  To: ~akihirosuda, linux-kernel, containers, serge, brauner, paul, ebiederm
  Cc: oe-kbuild-all, suda.kyoto, akihiro.suda.cz

Hi ~akihirosuda,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linux/master]

url:    https://github.com/intel-lab-lkp/linux/commits/akihirosuda/group_range-allow-GID-from-2147483648-to-4294967294/20230531-030041
base:   linux/master
patch link:    https://lore.kernel.org/r/168547265011.24337.4306067683997517082-3%40git.sr.ht
patch subject: [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range"
config: um-x86_64_defconfig (https://download.01.org/0day-ci/archive/20230531/202305311221.0GtxUteW-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/7d893a64ad74171c7ad9aa2296da7bc4214bdf37
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review akihirosuda/group_range-allow-GID-from-2147483648-to-4294967294/20230531-030041
        git checkout 7d893a64ad74171c7ad9aa2296da7bc4214bdf37
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=um SUBARCH=x86_64 olddefconfig
        make W=1 O=build_dir ARCH=um SUBARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305311221.0GtxUteW-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/user.c:31:38: warning: missing braces around initializer [-Wmissing-braces]
      31 | struct user_namespace init_user_ns = {
         |                                      ^
   ......
      40 |                 },
         |                 }
   ......
      50 |                 },
         |                 }
   ......
      60 |                 },
         |                 }
      61 |         },
      62 |         .ns.count = REFCOUNT_INIT(3),
         |                                     }
   ......
      65 |         .ns.inum = PROC_USER_INIT_INO,
         |                                      }
   ......
      76 |                 .range = {0, ((gid_t)~0U) - 1},
         |                           {} {               }
>> kernel/user.c:31:38: warning: missing braces around initializer [-Wmissing-braces]
      31 | struct user_namespace init_user_ns = {
         |                                      ^
   ......
      40 |                 },
         |                 }
   ......
      50 |                 },
         |                 }
   ......
      60 |                 },
         |                 }
      61 |         },
      62 |         .ns.count = REFCOUNT_INIT(3),
         |                                     }
   ......
      65 |         .ns.inum = PROC_USER_INIT_INO,
         |                                      }
   ......
      76 |                 .range = {0, ((gid_t)~0U) - 1},
         |                           {} {               }
>> kernel/user.c:31:38: warning: missing braces around initializer [-Wmissing-braces]
      31 | struct user_namespace init_user_ns = {
         |                                      ^
   ......
      40 |                 },
         |                 }
   ......
      50 |                 },
         |                 }
   ......
      60 |                 },
         |                 }
      61 |         },
      62 |         .ns.count = REFCOUNT_INIT(3),
         |                                     }
   ......
      65 |         .ns.inum = PROC_USER_INIT_INO,
         |                                      }
   ......
      76 |                 .range = {0, ((gid_t)~0U) - 1},
         |                           {} {               }


vim +31 kernel/user.c

7d893a64ad7417 Akihiro Suda      2023-05-30  26  
59607db367c57f Serge E. Hallyn   2011-03-23  27  /*
59607db367c57f Serge E. Hallyn   2011-03-23  28   * userns count is 1 for root user, 1 for init_uts_ns,
59607db367c57f Serge E. Hallyn   2011-03-23  29   * and 1 for... ?
59607db367c57f Serge E. Hallyn   2011-03-23  30   */
aee16ce73c71a2 Pavel Emelyanov   2008-02-08 @31  struct user_namespace init_user_ns = {
22d917d80e8428 Eric W. Biederman 2011-11-17  32  	.uid_map = {
22d917d80e8428 Eric W. Biederman 2011-11-17  33  		.nr_extents = 1,
aa4bf44dc851c6 Christian Brauner 2017-10-25  34  		{
22d917d80e8428 Eric W. Biederman 2011-11-17  35  			.extent[0] = {
22d917d80e8428 Eric W. Biederman 2011-11-17  36  				.first = 0,
22d917d80e8428 Eric W. Biederman 2011-11-17  37  				.lower_first = 0,
4b06a81f1daee6 Eric W. Biederman 2012-05-19  38  				.count = 4294967295U,
22d917d80e8428 Eric W. Biederman 2011-11-17  39  			},
22d917d80e8428 Eric W. Biederman 2011-11-17  40  		},
aa4bf44dc851c6 Christian Brauner 2017-10-25  41  	},
22d917d80e8428 Eric W. Biederman 2011-11-17  42  	.gid_map = {
22d917d80e8428 Eric W. Biederman 2011-11-17  43  		.nr_extents = 1,
aa4bf44dc851c6 Christian Brauner 2017-10-25  44  		{
22d917d80e8428 Eric W. Biederman 2011-11-17  45  			.extent[0] = {
22d917d80e8428 Eric W. Biederman 2011-11-17  46  				.first = 0,
22d917d80e8428 Eric W. Biederman 2011-11-17  47  				.lower_first = 0,
4b06a81f1daee6 Eric W. Biederman 2012-05-19  48  				.count = 4294967295U,
22d917d80e8428 Eric W. Biederman 2011-11-17  49  			},
22d917d80e8428 Eric W. Biederman 2011-11-17  50  		},
aa4bf44dc851c6 Christian Brauner 2017-10-25  51  	},
f76d207a66c3a5 Eric W. Biederman 2012-08-30  52  	.projid_map = {
f76d207a66c3a5 Eric W. Biederman 2012-08-30  53  		.nr_extents = 1,
aa4bf44dc851c6 Christian Brauner 2017-10-25  54  		{
f76d207a66c3a5 Eric W. Biederman 2012-08-30  55  			.extent[0] = {
f76d207a66c3a5 Eric W. Biederman 2012-08-30  56  				.first = 0,
f76d207a66c3a5 Eric W. Biederman 2012-08-30  57  				.lower_first = 0,
f76d207a66c3a5 Eric W. Biederman 2012-08-30  58  				.count = 4294967295U,
f76d207a66c3a5 Eric W. Biederman 2012-08-30  59  			},
f76d207a66c3a5 Eric W. Biederman 2012-08-30  60  		},
aa4bf44dc851c6 Christian Brauner 2017-10-25  61  	},
265cbd62e034cb Kirill Tkhai      2020-08-03  62  	.ns.count = REFCOUNT_INIT(3),
783291e6900292 Eric W. Biederman 2011-11-17  63  	.owner = GLOBAL_ROOT_UID,
783291e6900292 Eric W. Biederman 2011-11-17  64  	.group = GLOBAL_ROOT_GID,
435d5f4bb2ccba Al Viro           2014-10-31  65  	.ns.inum = PROC_USER_INIT_INO,
33c429405a2c8d Al Viro           2014-11-01  66  #ifdef CONFIG_USER_NS
33c429405a2c8d Al Viro           2014-11-01  67  	.ns.ops = &userns_operations,
33c429405a2c8d Al Viro           2014-11-01  68  #endif
9cc46516ddf497 Eric W. Biederman 2014-12-02  69  	.flags = USERNS_INIT_FLAGS,
b206f281d0ee14 David Howells     2019-06-26  70  #ifdef CONFIG_KEYS
b206f281d0ee14 David Howells     2019-06-26  71  	.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
0f44e4d976f96c David Howells     2019-06-26  72  	.keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem),
f36f8c75ae2e7d David Howells     2013-09-24  73  #endif
7d893a64ad7417 Akihiro Suda      2023-05-30  74  #ifdef CONFIG_SYSCTL
7d893a64ad7417 Akihiro Suda      2023-05-30  75  	.group_range = {
7d893a64ad7417 Akihiro Suda      2023-05-30  76  		.range = {0, ((gid_t)~0U) - 1},
7d893a64ad7417 Akihiro Suda      2023-05-30  77  	},
7d893a64ad7417 Akihiro Suda      2023-05-30  78  #endif
aee16ce73c71a2 Pavel Emelyanov   2008-02-08  79  };
aee16ce73c71a2 Pavel Emelyanov   2008-02-08  80  EXPORT_SYMBOL_GPL(init_user_ns);
aee16ce73c71a2 Pavel Emelyanov   2008-02-08  81  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-05-30 21:58 ` [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" Paul Moore
@ 2023-05-31  7:50   ` Christian Brauner
  2023-06-02  0:14   ` Eric W. Biederman
  1 sibling, 0 replies; 13+ messages in thread
From: Christian Brauner @ 2023-05-31  7:50 UTC (permalink / raw)
  To: Paul Moore
  Cc: ~akihirosuda, linux-kernel, containers, serge, ebiederm, akihiro.suda.cz

On Tue, May 30, 2023 at 05:58:48PM -0400, Paul Moore wrote:
> On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <akihirosuda@git.sr.ht> wrote:
> >
> > This sysctl limits groups who can create a new userns without
> > CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> > vulnerabilities around userns.
> >
> > The sysctl value format is same as "net.ipv4.ping_group_range".
> >
> > To disable creating new unprivileged userns, set the sysctl value to "1
> > 0" in the initial userns.
> >
> > To allow everyone to create new userns, set the sysctl value to "0
> > 4294967294". This is the default value.
> >
> > This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> > Ubuntu [1] and Debian GNU/Linux.
> >
> > Link: https://git.launchpad.net/~ubuntu-
> > kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
> 
> Given the challenges around adding access controls to userns
> operations, have you considered using the LSM support that was added
> upstream last year?  The relevant LSM hook can be found in commit
> 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> and although only SELinux currently provides an access control
> implementation, there is no reason you couldn't add support for your
> favorite LSM, or even just a simple BPF LSM to enforce the group
> controls as you've described them here.

Yes. Please, no more sysctls...

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-05-30 18:50 [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" ~akihirosuda
                   ` (3 preceding siblings ...)
  2023-05-30 21:58 ` [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" Paul Moore
@ 2023-06-02  0:06 ` Eric W. Biederman
  4 siblings, 0 replies; 13+ messages in thread
From: Eric W. Biederman @ 2023-06-02  0:06 UTC (permalink / raw)
  To: ~akihirosuda
  Cc: linux-kernel, containers, serge, brauner, paul, ~akihirosuda,
	akihiro.suda.cz

~akihirosuda <akihirosuda@git.sr.ht> writes:

> This sysctl limits groups who can create a new userns without
> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> vulnerabilities around userns.
>
> The sysctl value format is same as "net.ipv4.ping_group_range".
>
> To disable creating new unprivileged userns, set the sysctl value to "1
> 0" in the initial userns.
>
> To allow everyone to create new userns, set the sysctl value to "0
> 4294967294". This is the default value.
>
> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> Ubuntu [1] and Debian GNU/Linux.
>
> Link: https://git.launchpad.net/~ubuntu-
> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
>
> Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

How does this functionally differ from what already exists
user.max_user_namespaces?

Given that setns exists I don't see limiting creation of user namespaces
by group being meaningful, if your goal is to reduce the attack surface
of the kernel to mitigate potential kernel vulnerabilities.

How does this functionality interact with the use of setgroups in a user
namespace?

What is the value of a group_range inside of a newly created user
namespace?  How does that work to maintain the policy you are trying to
implement?

Eric

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-05-30 21:58 ` [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" Paul Moore
  2023-05-31  7:50   ` Christian Brauner
@ 2023-06-02  0:14   ` Eric W. Biederman
  2023-06-02  1:01     ` Paul Moore
  1 sibling, 1 reply; 13+ messages in thread
From: Eric W. Biederman @ 2023-06-02  0:14 UTC (permalink / raw)
  To: Paul Moore
  Cc: ~akihirosuda, linux-kernel, containers, serge, brauner, akihiro.suda.cz

Paul Moore <paul@paul-moore.com> writes:

> On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <akihirosuda@git.sr.ht> wrote:
>>
>> This sysctl limits groups who can create a new userns without
>> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
>> vulnerabilities around userns.
>>
>> The sysctl value format is same as "net.ipv4.ping_group_range".
>>
>> To disable creating new unprivileged userns, set the sysctl value to "1
>> 0" in the initial userns.
>>
>> To allow everyone to create new userns, set the sysctl value to "0
>> 4294967294". This is the default value.
>>
>> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
>> Ubuntu [1] and Debian GNU/Linux.
>>
>> Link: https://git.launchpad.net/~ubuntu-
>> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
>
> Given the challenges around adding access controls to userns
> operations, have you considered using the LSM support that was added
> upstream last year?  The relevant LSM hook can be found in commit
> 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),


Paul how have you handled the real world regression I reported against
chromium?

Paul are you aware that the LSM hook can not be used to achieve the
objective of this patchset?

> and although only SELinux currently provides an access control
> implementation, there is no reason you couldn't add support for your
> favorite LSM, or even just a simple BPF LSM to enforce the group
> controls as you've described them here.

Is there a publicly available SELinux policy that uses that LSM hook?

Eric

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-06-02  0:14   ` Eric W. Biederman
@ 2023-06-02  1:01     ` Paul Moore
  2023-06-02  1:41       ` Eric W. Biederman
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Moore @ 2023-06-02  1:01 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: ~akihirosuda, linux-kernel, containers, serge, brauner, akihiro.suda.cz

On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> Paul Moore <paul@paul-moore.com> writes:
> > On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <akihirosuda@git.sr.ht> wrote:
> >>
> >> This sysctl limits groups who can create a new userns without
> >> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> >> vulnerabilities around userns.
> >>
> >> The sysctl value format is same as "net.ipv4.ping_group_range".
> >>
> >> To disable creating new unprivileged userns, set the sysctl value to "1
> >> 0" in the initial userns.
> >>
> >> To allow everyone to create new userns, set the sysctl value to "0
> >> 4294967294". This is the default value.
> >>
> >> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> >> Ubuntu [1] and Debian GNU/Linux.
> >>
> >> Link: https://git.launchpad.net/~ubuntu-
> >> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
> >
> > Given the challenges around adding access controls to userns
> > operations, have you considered using the LSM support that was added
> > upstream last year?  The relevant LSM hook can be found in commit
> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
>
> Paul how have you handled the real world regression I reported against
> chromium?

I don't track chromium development.

> Paul are you aware that the LSM hook can not be used to achieve the
> objective of this patchset?

/me shrugs

I thought one could look into a cred struct using a BPF LSM, which
would allow one to make access control decisions based on group ID,
but I will be the first to admit I'm not a BPF LSM expert.
Regardless, one could introduce a group ID check into a LSM if they
were so inclined.

I also find it slightly amusing that you are arguing against my reply
that was discussing *not* adding another userns control point; of all
people I thought you would be supportive of this ... /me shrugs again.

> > and although only SELinux currently provides an access control
> > implementation, there is no reason you couldn't add support for your
> > favorite LSM, or even just a simple BPF LSM to enforce the group
> > controls as you've described them here.
>
> Is there a publicly available SELinux policy that uses that LSM hook?

I have no idea, I don't track all of the publicly available SELinux
policies because frankly it doesn't matter; the SELinux feature
exists, and it is my role to support and maintain it.  There are
LSM/SELinux features which are not widely exercised in general purpose
SELinux policies for various reasons, but *are* used in specialized
environments that are not often discussed on public mailing lists.

-- 
paul-moore.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-06-02  1:01     ` Paul Moore
@ 2023-06-02  1:41       ` Eric W. Biederman
  2023-06-02 14:50         ` Paul Moore
  0 siblings, 1 reply; 13+ messages in thread
From: Eric W. Biederman @ 2023-06-02  1:41 UTC (permalink / raw)
  To: Paul Moore
  Cc: ~akihirosuda, linux-kernel, containers, serge, brauner, akihiro.suda.cz

Paul Moore <paul@paul-moore.com> writes:

> On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>> Paul Moore <paul@paul-moore.com> writes:
>> >
>> > Given the challenges around adding access controls to userns
>> > operations, have you considered using the LSM support that was added
>> > upstream last year?  The relevant LSM hook can be found in commit
>> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
>>
>> Paul how have you handled the real world regression I reported against
>> chromium?
>
> I don't track chromium development.

You have chosen to be the maintainer and I reported it to you.

>> Paul are you aware that the LSM hook can not be used to achieve the
>> objective of this patchset?
>
> /me shrugs
>

[snip parts about performing a group id check]

The LSM hook you added does not have the technical capability to reduce
the attack surface to mitigate bugs in the kernel.  It is the
ineffectiveness of the hook not the permission check that I was
referring to.

Eric

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-06-02  1:41       ` Eric W. Biederman
@ 2023-06-02 14:50         ` Paul Moore
  2023-06-02 21:02           ` Akihiro Suda
  0 siblings, 1 reply; 13+ messages in thread
From: Paul Moore @ 2023-06-02 14:50 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: ~akihirosuda, linux-kernel, containers, serge, brauner, akihiro.suda.cz

On Thu, Jun 1, 2023 at 9:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> Paul Moore <paul@paul-moore.com> writes:
> > On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> >> Paul Moore <paul@paul-moore.com> writes:
> >> >
> >> > Given the challenges around adding access controls to userns
> >> > operations, have you considered using the LSM support that was added
> >> > upstream last year?  The relevant LSM hook can be found in commit
> >> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> >>
> >> Paul how have you handled the real world regression I reported against
> >> chromium?
> >
> > I don't track chromium development.
>
> You have chosen to be the maintainer and I reported it to you.

I just dug through all of the mail I've received from you over the
past two (?) years, as well as checking the LSM archive on lore and I
don't see any bug reports from you directed at the upstream LSM or
SELinux code ... perhaps I missed something, do you have a pointer?

Also, for the sake of clarification, I do not maintain any part of
Chromium or Chrome OS.  I do maintain the upstream LSM, SELinux,
audit, and labeled networking subsystems in the Linux Kernel as well
as a couple of userspace packages.

> >> Paul are you aware that the LSM hook can not be used to achieve the
> >> objective of this patchset?
> >
> > /me shrugs
>
> [snip parts about performing a group id check]

My comments here were only discussing the possibility of performing a
group ID based access control check; I made no claims about the
desirability of such a check, and I have no interest in rehashing our
old debates.

-- 
paul-moore.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"
  2023-06-02 14:50         ` Paul Moore
@ 2023-06-02 21:02           ` Akihiro Suda
  0 siblings, 0 replies; 13+ messages in thread
From: Akihiro Suda @ 2023-06-02 21:02 UTC (permalink / raw)
  To: Paul Moore
  Cc: Eric W. Biederman, linux-kernel, containers, serge, brauner,
	akihiro.suda.cz

Thank you all for feedbacks,

> (Paul)

> Given the challenges around adding access controls to userns
> operations, have you considered using the LSM support that was added
> upstream last year?

I'll consider this.

> The relevant LSM hook can be found in commit
> 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> and although only SELinux currently provides an access control
> implementation, there is no reason you couldn't add support for your
> favorite LSM, or even just a simple BPF LSM to enforce the group
> controls as you've described them here.

My intent is to provide an universal knob that works for both SELinux
distros and AppArmor distros.
So I guess I should try to use BPF LSM (and find out how its end-user
UX in the userspace can be simplified just like sysctl).

---
> (Christian)

> Yes. Please, no more sysctls...

I'll try to find another way, such as (BPF) LSM.

---
> (Eric)

> How does this functionally differ from what already exists
> user.max_user_namespaces?

My patch is not about the numbers of the UserNS, but about who is
permitted to create UserNS,
which can be a potential attack surface to pwn the root in the initial UserNS.

> Given that setns exists I don't see limiting creation of user namespaces
> by group being meaningful, if your goal is to reduce the attack surface
> of the kernel to mitigate potential kernel vulnerabilities.

Yes, that's my goal.
The intent is to allow creating UserNS only for (semi-trusted) human
users who need to run unprivileged (aka rootless) containers.
Creating UserNS is expected to be prohibited for system daemon
accounts and human users who do not need (or who are not trusted
enough) to run containers.

This configuration should be more secure than just allowing everybody
to run unprivileged (aka rootless) containers, and also more secure
than just disabling UserNS and running everything as the root.

> How does this functionality interact with the use of setgroups in a user
> namespace?
>
> What is the value of a group_range inside of a newly created user
> namespace?  How does that work to maintain the policy you are trying to
> implement?

In a child UserNS, the group_range file is expected to use the mapped
IDs, not the initial UserNS IDs.
(So, the range can't be just initialized with `.range = {0,
((gid_t)~0U) - 1}`. My patch v1 is wrong.)

> Paul are you aware that the LSM hook can not be used to achieve the
> objective of this patchset?

What would be an obstacle to using an LSM hook for this? (with an
addition of GID checks)

2023年6月2日(金) 23:50 Paul Moore <paul@paul-moore.com>:
>
> On Thu, Jun 1, 2023 at 9:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> > Paul Moore <paul@paul-moore.com> writes:
> > > On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> > >> Paul Moore <paul@paul-moore.com> writes:
> > >> >
> > >> > Given the challenges around adding access controls to userns
> > >> > operations, have you considered using the LSM support that was added
> > >> > upstream last year?  The relevant LSM hook can be found in commit
> > >> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> > >>
> > >> Paul how have you handled the real world regression I reported against
> > >> chromium?
> > >
> > > I don't track chromium development.
> >
> > You have chosen to be the maintainer and I reported it to you.
>
> I just dug through all of the mail I've received from you over the
> past two (?) years, as well as checking the LSM archive on lore and I
> don't see any bug reports from you directed at the upstream LSM or
> SELinux code ... perhaps I missed something, do you have a pointer?
>
> Also, for the sake of clarification, I do not maintain any part of
> Chromium or Chrome OS.  I do maintain the upstream LSM, SELinux,
> audit, and labeled networking subsystems in the Linux Kernel as well
> as a couple of userspace packages.
>
> > >> Paul are you aware that the LSM hook can not be used to achieve the
> > >> objective of this patchset?
> > >
> > > /me shrugs
> >
> > [snip parts about performing a group id check]
>
> My comments here were only discussing the possibility of performing a
> group ID based access control check; I made no claims about the
> desirability of such a check, and I have no interest in rehashing our
> old debates.
>
> --
> paul-moore.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2023-06-02 21:02 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-30 18:50 [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" ~akihirosuda
2023-05-30 11:34 ` [PATCH linux 3/3] " ~akihirosuda
2023-05-31  4:20   ` kernel test robot
2023-05-30 14:42 ` [PATCH linux 1/3] net/ipv4: split group_range logic to kernel/group_range.c ~akihirosuda
2023-05-30 17:31 ` [PATCH linux 2/3] group_range: allow GID from 2147483648 to 4294967294 ~akihirosuda
2023-05-30 21:58 ` [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range" Paul Moore
2023-05-31  7:50   ` Christian Brauner
2023-06-02  0:14   ` Eric W. Biederman
2023-06-02  1:01     ` Paul Moore
2023-06-02  1:41       ` Eric W. Biederman
2023-06-02 14:50         ` Paul Moore
2023-06-02 21:02           ` Akihiro Suda
2023-06-02  0:06 ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.