All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/10] userns: sysctl limits for namespaces
@ 2016-07-20  1:13 Eric W. Biederman
       [not found] ` <8737n5dscy.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
  0 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:13 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov, Jann Horn


This patchset addresses two use cases:
- Implement a sane upper bound on the number of namespaces.
- Provide a way for sandboxes to limit the attack surface from
  namespaces.

The maximum sane case I can imagine is if every process is a fat
process, so I set the maximum number of namespaces to the maximum
number of threads.

I make these limits recursive and per user namespace so that a
usernamespace root can reduce the limits further.  If a user namespace
root raises the limit the limit in the parent namespace will be honored.

I have cut this implementation to the bare minimum needed to achieve
these objections.

Assuming nothing problematic shows up in the review I will add these to
my user namespace tree.

These patches are also available at:
    git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing

Eric W. Biederman (10):
      sysctl: Stop implicitly passing current into sysctl_table_root.lookup
      userns: Add per user namespace sysctls.
      userns: Add a limit on the number of user namespaces
      userns: Generalize the user namespace count into ucount
      pidns: Add a limit on the number of pid namespaces
      utsns: Add a limit on the number of uts namespaces
      ipcns: Add a  limit on the number of ipc namespaces
      cgroupns: Add a limit on the number of cgroup namespaces
      netns: Add a limit on the number of net namespaces
      mntns: Add a limit on the number of mount namespaces.

 fs/namespace.c                 |  19 ++++-
 fs/proc/proc_sysctl.c          |  14 ++--
 include/linux/sysctl.h         |   3 +-
 include/linux/user_namespace.h |  40 +++++++++
 ipc/namespace.c                |  42 +++++++---
 kernel/cgroup.c                |  15 ++++
 kernel/fork.c                  |   5 ++
 kernel/pid_namespace.c         |  22 ++++-
 kernel/user_namespace.c        | 184 ++++++++++++++++++++++++++++++++++++++---
 kernel/utsname.c               |  31 +++++--
 net/core/net_namespace.c       |  15 ++++
 net/sysctl_net.c               |   4 +-
 12 files changed, 351 insertions(+), 43 deletions(-)

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup
       [not found] ` <8737n5dscy.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-20  1:21   ` Eric W. Biederman
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-20  4:02   ` [PATCH 0/10] userns: sysctl limits for namespaces Kees Cook
  2016-07-21 16:39     ` Eric W. Biederman
  2 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Passing nsproxy into sysctl_table_root.lookup was a premature
optimization in attempt to avoid depending on current.  The
directory /proc/self/sys has not appeared and if and when
it does this code will need to be reviewed closely and reworked
anyway.  So remove the premature optimization.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/proc/proc_sysctl.c  | 14 +++++++-------
 include/linux/sysctl.h |  3 +--
 net/sysctl_net.c       |  4 ++--
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5e57c3e46e1d..534630687489 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
 
 static void drop_sysctl_table(struct ctl_table_header *header);
 static int sysctl_follow_link(struct ctl_table_header **phead,
-	struct ctl_table **pentry, struct nsproxy *namespaces);
+	struct ctl_table **pentry);
 static int insert_links(struct ctl_table_header *head);
 static void put_links(struct ctl_table_header *header);
 
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
 }
 
 static struct ctl_table_set *
-lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+lookup_header_set(struct ctl_table_root *root)
 {
 	struct ctl_table_set *set = &root->default_set;
 	if (root->lookup)
-		set = root->lookup(root, namespaces);
+		set = root->lookup(root);
 	return set;
 }
 
@@ -491,7 +491,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	if (S_ISLNK(p->mode)) {
-		ret = sysctl_follow_link(&h, &p, current->nsproxy);
+		ret = sysctl_follow_link(&h, &p);
 		err = ERR_PTR(ret);
 		if (ret)
 			goto out;
@@ -659,7 +659,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
 
 	if (S_ISLNK(table->mode)) {
 		/* It is not an error if we can not follow the link ignore it */
-		int err = sysctl_follow_link(&head, &table, current->nsproxy);
+		int err = sysctl_follow_link(&head, &table);
 		if (err)
 			goto out;
 	}
@@ -976,7 +976,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
 }
 
 static int sysctl_follow_link(struct ctl_table_header **phead,
-	struct ctl_table **pentry, struct nsproxy *namespaces)
+	struct ctl_table **pentry)
 {
 	struct ctl_table_header *head;
 	struct ctl_table_root *root;
@@ -988,7 +988,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
 	ret = 0;
 	spin_lock(&sysctl_lock);
 	root = (*pentry)->data;
-	set = lookup_header_set(root, namespaces);
+	set = lookup_header_set(root);
 	dir = xlate_dir(set, (*phead)->parent);
 	if (IS_ERR(dir))
 		ret = PTR_ERR(dir);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index fa7bc29925c9..6385b331f2b9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -154,8 +154,7 @@ struct ctl_table_set {
 
 struct ctl_table_root {
 	struct ctl_table_set default_set;
-	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
-					   struct nsproxy *namespaces);
+	struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
 	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ed98c1fc3de1..2951f229a855 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -27,9 +27,9 @@
 #endif
 
 static struct ctl_table_set *
-net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
+net_ctl_header_lookup(struct ctl_table_root *root)
 {
-	return &namespaces->net_ns->sysctls;
+	return &current->nsproxy->net_ns->sysctls;
 }
 
 static int is_seen(struct ctl_table_set *set)
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-20  1:21       ` Eric W. Biederman
       [not found]         ` <20160720012138.7894-2-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-20  1:21       ` [PATCH 03/10] userns: Add a limit on the number of user namespaces Eric W. Biederman
                         ` (9 subsequent siblings)
  10 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Limit per userns sysctls to only be opened for write by a holder
of CAP_SYS_RESOURCE.

Add all of the necessary boilerplate for having per user namespace
sysctls.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  4 ++
 kernel/user_namespace.c        | 92 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9217169c64cb..7d59af1f08f1 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -39,6 +39,10 @@ struct user_namespace {
 	struct key		*persistent_keyring_register;
 	struct rw_semaphore	persistent_keyring_register_sem;
 #endif
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_set	set;
+	struct ctl_table_header *sysctls;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 68f594212759..b5b5cbb8fe92 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -30,6 +30,70 @@ static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_set *
+set_lookup(struct ctl_table_root *root)
+{
+	return &current->cred->user_ns->set;
+}
+
+static int set_is_seen(struct ctl_table_set *set)
+{
+	return &current->cred->user_ns->set == set;
+}
+
+static int set_permissions(struct ctl_table_header *head,
+				  struct ctl_table *table)
+{
+	struct user_namespace *user_ns =
+		container_of(head->set, struct user_namespace, set);
+	int mode;
+
+	/* Allow users with CAP_SYS_ADMIN unrestrainted access */
+	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+		mode = (table->mode & S_IRWXU) >> 6;
+	else
+	/* Allow all others at most read-only access */
+		mode = table->mode & S_IROTH;
+	return (mode << 6) | (mode << 3) | mode;
+}
+
+static struct ctl_table_root set_root = {
+	.lookup = set_lookup,
+	.permissions = set_permissions,
+};
+
+static struct ctl_table userns_table[] = {
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static bool setup_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	struct ctl_table *tbl;
+	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
+	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
+	if (tbl) {
+		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
+	}
+	if (!ns->sysctls) {
+		kfree(tbl);
+		retire_sysctl_set(&ns->set);
+		return false;
+	}
+#endif
+	return true;
+}
+
+static void retire_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(ns->sysctls);
+	retire_sysctl_set(&ns->set);
+#endif
+}
+
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 {
 	/* Start with the same capabilities as init but useless for doing
@@ -107,12 +171,22 @@ int create_user_ns(struct cred *new)
 	ns->flags = parent_ns->flags;
 	mutex_unlock(&userns_state_mutex);
 
-	set_cred_user_ns(new, ns);
-
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	init_rwsem(&ns->persistent_keyring_register_sem);
 #endif
+	ret = -ENOMEM;
+	if (!setup_userns_sysctls(ns))
+		goto fail_keyring;
+
+	set_cred_user_ns(new, ns);
 	return 0;
+fail_keyring:
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	key_put(ns->persistent_keyring_register);
+#endif
+	ns_free_inum(&ns->ns);
+	kmem_cache_free(user_ns_cachep, ns);
+	return ret;
 }
 
 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
@@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
 
 	do {
 		parent = ns->parent;
+		retire_userns_sysctls(ns);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif
@@ -1012,9 +1087,22 @@ const struct proc_ns_operations userns_operations = {
 	.install	= userns_install,
 };
 
+static struct ctl_table_header *userns_header;
 static __init int user_namespaces_init(void)
 {
+	static struct ctl_table empty[1];
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
+
+#ifdef CONFIG_SYSCTL
+	/*
+	 * Avoid limits in the sysctl limitation by registering
+	 * "/proc/sys/userns" as an empty directory not in a network
+	 * namespace.
+	 */
+	userns_header = register_sysctl("userns", empty);
+	BUG_ON(!userns_header);
+#endif
+	BUG_ON(!setup_userns_sysctls(&init_user_ns));
 	return 0;
 }
 subsys_initcall(user_namespaces_init);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 03/10] userns: Add a limit on the number of user namespaces
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-20  1:21       ` [PATCH 02/10] userns: Add per user namespace sysctls Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
       [not found]         ` <20160720012138.7894-3-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-20  1:21       ` [PATCH 04/10] userns: Generalize the user namespace count into ucount Eric W. Biederman
                         ` (8 subsequent siblings)
  10 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Export the export the maximum number of user namespaces as
/proc/sys/userns/max_user_namespaces.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  2 ++
 kernel/fork.c                  |  2 ++
 kernel/user_namespace.c        | 68 +++++++++++++++++++++++++++++++++++++-----
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7d59af1f08f1..ba6a995178f9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -43,6 +43,8 @@ struct user_namespace {
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
 #endif
+	int max_user_namespaces;
+	atomic_t user_namespaces;
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355aa97f..95d5498c463f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -323,6 +323,8 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
+
+	init_user_ns.max_user_namespaces = max_threads;
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b5b5cbb8fe92..a3cacf3f4747 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -63,7 +63,18 @@ static struct ctl_table_root set_root = {
 	.permissions = set_permissions,
 };
 
+static int zero = 0;
+static int max = INT_MAX - 1;
 static struct ctl_table userns_table[] = {
+	{
+		.procname	= "max_user_namespaces",
+		.data		= &init_user_ns.max_user_namespaces,
+		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &max,
+	},
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -75,6 +86,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
+		tbl[0].data = &ns->max_user_namespaces;
+
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -113,6 +126,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 	cred->user_ns = user_ns;
 }
 
+static bool inc_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos, *bad;
+	for (pos = ns; pos; pos = pos->parent) {
+		int max = READ_ONCE(pos->max_user_namespaces);
+		int sum = atomic_inc_return(&pos->user_namespaces);
+		if (sum > max)
+			goto fail;
+	}
+	return true;
+fail:
+	bad = pos;
+	atomic_dec(&pos->user_namespaces);
+	for (pos = ns; pos != bad; pos = pos->parent)
+		atomic_dec(&pos->user_namespaces);
+
+	return false;
+}
+
+static void dec_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos;
+	for (pos = ns; pos; pos = pos->parent) {
+		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		WARN_ON_ONCE(dec < 0);
+	}
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -128,8 +169,12 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	ret = -EUSERS;
 	if (parent_ns->level > 32)
-		return -EUSERS;
+		goto fail;
+
+	if (!inc_user_namespaces(parent_ns))
+		goto fail;
 
 	/*
 	 * Verify that we can not violate the policy of which files
@@ -137,26 +182,27 @@ int create_user_ns(struct cred *new)
 	 * by verifing that the root directory is at the root of the
 	 * mount namespace which allows all files to be accessed.
 	 */
+	ret = -EPERM;
 	if (current_chrooted())
-		return -EPERM;
+		goto fail_dec;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
 	 * created a user_namespace.
 	 */
+	ret = -EPERM;
 	if (!kuid_has_mapping(parent_ns, owner) ||
 	    !kgid_has_mapping(parent_ns, group))
-		return -EPERM;
+		goto fail_dec;
 
+	ret = -ENOMEM;
 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
 	if (!ns)
-		return -ENOMEM;
+		goto fail_dec;
 
 	ret = ns_alloc_inum(&ns->ns);
-	if (ret) {
-		kmem_cache_free(user_ns_cachep, ns);
-		return ret;
-	}
+	if (ret)
+		goto fail_free;
 	ns->ns.ops = &userns_operations;
 
 	atomic_set(&ns->count, 1);
@@ -165,6 +211,7 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
+	ns->max_user_namespaces = INT_MAX - 1;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
@@ -185,7 +232,11 @@ fail_keyring:
 	key_put(ns->persistent_keyring_register);
 #endif
 	ns_free_inum(&ns->ns);
+fail_free:
 	kmem_cache_free(user_ns_cachep, ns);
+fail_dec:
+	dec_user_namespaces(parent_ns);
+fail:
 	return ret;
 }
 
@@ -221,6 +272,7 @@ void free_user_ns(struct user_namespace *ns)
 #endif
 		ns_free_inum(&ns->ns);
 		kmem_cache_free(user_ns_cachep, ns);
+		dec_user_namespaces(parent);
 		ns = parent;
 	} while (atomic_dec_and_test(&parent->count));
 }
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 04/10] userns: Generalize the user namespace count into ucount
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-20  1:21       ` [PATCH 02/10] userns: Add per user namespace sysctls Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 03/10] userns: Add a limit on the number of user namespaces Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 05/10] pidns: Add a limit on the number of pid namespaces Eric W. Biederman
                         ` (7 subsequent siblings)
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

The same kind of recursive sane default limit and policy
countrol that has been implemented for the user namespace
is desirable for the other namespaces, so generalize
the user namespace refernce count into a ucount.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h | 32 ++++++++++++++++++++++--
 kernel/fork.c                  |  5 +++-
 kernel/user_namespace.c        | 56 +++++++++++++++++++++++++++---------------
 3 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index ba6a995178f9..6fbb2cb6e55c 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -22,6 +22,16 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 
 #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
 
+enum ucounts {
+	UCOUNT_USER_NAMESPACES,
+	UCOUNT_MAX,
+};
+
+struct ucount {
+	int max;
+	atomic_t count;
+};
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -43,8 +53,7 @@ struct user_namespace {
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
 #endif
-	int max_user_namespaces;
-	atomic_t user_namespaces;
+	struct ucount ucount[UCOUNT_MAX];
 };
 
 extern struct user_namespace init_user_ns;
@@ -79,6 +88,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
 extern int proc_setgroups_show(struct seq_file *m, void *v);
 extern bool userns_may_setgroups(const struct user_namespace *ns);
 extern bool current_in_userns(const struct user_namespace *target_ns);
+extern bool inc_ucount(struct user_namespace *ns, enum ucounts type);
+extern void dec_ucount(struct user_namespace *ns, enum ucounts type);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -112,6 +123,23 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
 {
 	return true;
 }
+
+static inline bool inc_ucount(struct user_namespace *ns, enum ucounts type)
+{
+	int max = READ_ONCE(init_user_ns.ucount[type].max);
+	int sum = atomic_inc_return(&init_user_ns.ucount[type].count);
+	if (sum > max) {
+		atomic_dec(&init_user_ns.ucount[type].count);
+		return false;
+	}
+	return true;
+}
+
+static inline bool dec_ucount(struct user_namespace *ns, enum ucounts type)
+{
+	int dec = atomic_dec_if_possible(&init_user_ns.ucount[type].count);
+	WARN_ON_ONCE(dec < 0);
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 95d5498c463f..85922b990b68 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,6 +304,7 @@ int arch_task_struct_size __read_mostly;
 
 void __init fork_init(void)
 {
+	int i;
 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
@@ -324,7 +325,9 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
 
-	init_user_ns.max_user_namespaces = max_threads;
+	for (i = 0; i < UCOUNT_MAX; i++) {
+		init_user_ns.ucount[i].max = max_threads;
+	}
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a3cacf3f4747..8b5e66fba8bf 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -65,16 +65,18 @@ static struct ctl_table_root set_root = {
 
 static int zero = 0;
 static int max = INT_MAX - 1;
+#define UCOUNT_ENTRY(name) 				\
+	{						\
+		.procname	= name,			\
+		.maxlen		= sizeof(int),		\
+		.mode		= 0644,			\
+		.proc_handler	= proc_dointvec_minmax,	\
+		.extra1		= &zero,		\
+		.extra2		= &max,			\
+	}
+
 static struct ctl_table userns_table[] = {
-	{
-		.procname	= "max_user_namespaces",
-		.data		= &init_user_ns.max_user_namespaces,
-		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &max,
-	},
+	UCOUNT_ENTRY("max_user_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -86,8 +88,10 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
-		tbl[0].data = &ns->max_user_namespaces;
-
+		int i;
+		for (i = 0; i < UCOUNT_MAX; i++) {
+			tbl[i].data = &ns->ucount[i].max;
+		}
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -126,34 +130,44 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 	cred->user_ns = user_ns;
 }
 
-static bool inc_user_namespaces(struct user_namespace *ns)
+bool inc_ucount(struct user_namespace *ns, enum ucounts type)
 {
 	struct user_namespace *pos, *bad;
 	for (pos = ns; pos; pos = pos->parent) {
-		int max = READ_ONCE(pos->max_user_namespaces);
-		int sum = atomic_inc_return(&pos->user_namespaces);
+		int max = READ_ONCE(pos->ucount[type].max);
+		int sum = atomic_inc_return(&pos->ucount[type].count);
 		if (sum > max)
 			goto fail;
 	}
 	return true;
 fail:
 	bad = pos;
-	atomic_dec(&pos->user_namespaces);
+	atomic_dec(&pos->ucount[type].count);
 	for (pos = ns; pos != bad; pos = pos->parent)
-		atomic_dec(&pos->user_namespaces);
+		atomic_dec(&pos->ucount[type].count);
 
 	return false;
 }
 
-static void dec_user_namespaces(struct user_namespace *ns)
+void dec_ucount(struct user_namespace *ns, enum ucounts type)
 {
 	struct user_namespace *pos;
 	for (pos = ns; pos; pos = pos->parent) {
-		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		int dec = atomic_dec_if_positive(&pos->ucount[type].count);
 		WARN_ON_ONCE(dec < 0);
 	}
 }
 
+static bool inc_user_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_USER_NAMESPACES);
+}
+
+static void dec_user_namespaces(struct user_namespace *ns)
+{
+	return dec_ucount(ns, UCOUNT_USER_NAMESPACES);
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -167,7 +181,7 @@ int create_user_ns(struct cred *new)
 	struct user_namespace *ns, *parent_ns = new->user_ns;
 	kuid_t owner = new->euid;
 	kgid_t group = new->egid;
-	int ret;
+	int ret, i;
 
 	ret = -EUSERS;
 	if (parent_ns->level > 32)
@@ -211,7 +225,9 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
-	ns->max_user_namespaces = INT_MAX - 1;
+	for (i = 0; i < UCOUNT_MAX; i++) {
+		ns->ucount[i].max = INT_MAX - 1;
+	}
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 05/10] pidns: Add a limit on the number of pid namespaces
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (2 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 04/10] userns: Generalize the user namespace count into ucount Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 06/10] utsns: Add a limit on the number of uts namespaces Eric W. Biederman
                         ` (6 subsequent siblings)
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/pid_namespace.c         | 22 ++++++++++++++++++----
 kernel/user_namespace.c        |  1 +
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6fbb2cb6e55c..80e7e5084f7d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -24,6 +24,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 
 enum ucounts {
 	UCOUNT_USER_NAMESPACES,
+	UCOUNT_PID_NAMESPACES,
 	UCOUNT_MAX,
 };
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..049cc14ae37a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -79,6 +79,16 @@ static void proc_cleanup_work(struct work_struct *work)
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
+static bool inc_pid_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_PID_NAMESPACES);
+}
+
+static void dec_pid_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_PID_NAMESPACES);
+}
+
 static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
 	struct pid_namespace *parent_pid_ns)
 {
@@ -87,15 +97,16 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	int i;
 	int err;
 
-	if (level > MAX_PID_NS_LEVEL) {
-		err = -EINVAL;
+	err = -EINVAL;
+	if (level > MAX_PID_NS_LEVEL)
+		goto out;
+	if (!inc_pid_namespaces(user_ns))
 		goto out;
-	}
 
 	err = -ENOMEM;
 	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
 	if (ns == NULL)
-		goto out;
+		goto out_dec;
 
 	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!ns->pidmap[0].page)
@@ -129,6 +140,8 @@ out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
 	kmem_cache_free(pid_ns_cachep, ns);
+out_dec:
+	dec_pid_namespaces(user_ns);
 out:
 	return ERR_PTR(err);
 }
@@ -146,6 +159,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
+	dec_pid_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 }
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8b5e66fba8bf..d9c3275278f5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -77,6 +77,7 @@ static int max = INT_MAX - 1;
 
 static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
+	UCOUNT_ENTRY("max_pid_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 06/10] utsns: Add a limit on the number of uts namespaces
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (3 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 05/10] pidns: Add a limit on the number of pid namespaces Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 07/10] ipcns: Add a limit on the number of ipc namespaces Eric W. Biederman
                         ` (5 subsequent siblings)
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 kernel/utsname.c               | 31 ++++++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 80e7e5084f7d..10bb012a71d7 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 enum ucounts {
 	UCOUNT_USER_NAMESPACES,
 	UCOUNT_PID_NAMESPACES,
+	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_MAX,
 };
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index d9c3275278f5..4e019620f297 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,6 +78,7 @@ static int max = INT_MAX - 1;
 static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
 	UCOUNT_ENTRY("max_pid_namespaces"),
+	UCOUNT_ENTRY("max_uts_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 831ea7108232..4f13c0419d64 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,16 @@
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
+static bool inc_uts_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_UTS_NAMESPACES);
+}
+
+static void dec_uts_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_UTS_NAMESPACES);
+}
+
 static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
@@ -38,15 +48,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	struct uts_namespace *ns;
 	int err;
 
+	err = -ENFILE;
+	if (!inc_uts_namespaces(user_ns))
+		goto fail;
+
+	err = -ENOMEM;
 	ns = create_uts_ns();
 	if (!ns)
-		return ERR_PTR(-ENOMEM);
+		goto fail_dec;
 
 	err = ns_alloc_inum(&ns->ns);
-	if (err) {
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_free;
 
 	ns->ns.ops = &utsns_operations;
 
@@ -55,6 +68,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	ns->user_ns = get_user_ns(user_ns);
 	up_read(&uts_sem);
 	return ns;
+
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_uts_namespaces(user_ns);
+fail:
+	return ERR_PTR(err);
 }
 
 /*
@@ -85,6 +105,7 @@ void free_uts_ns(struct kref *kref)
 	struct uts_namespace *ns;
 
 	ns = container_of(kref, struct uts_namespace, kref);
+	dec_uts_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 07/10] ipcns: Add a  limit on the number of ipc namespaces
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (4 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 06/10] utsns: Add a limit on the number of uts namespaces Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 08/10] cgroupns: Add a limit on the number of cgroup namespaces Eric W. Biederman
                         ` (4 subsequent siblings)
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 ipc/namespace.c                | 42 +++++++++++++++++++++++++++++++-----------
 kernel/user_namespace.c        |  1 +
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 10bb012a71d7..19f9ee590062 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,6 +26,7 @@ enum ucounts {
 	UCOUNT_USER_NAMESPACES,
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
+	UCOUNT_IPC_NAMESPACES,
 	UCOUNT_MAX,
 };
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 04cb07eb81f1..3996a1e41a1d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,33 +16,42 @@
 
 #include "util.h"
 
+static bool inc_ipc_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_IPC_NAMESPACES);
+}
+
+static void dec_ipc_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_IPC_NAMESPACES);
+}
+
 static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 					   struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
 	int err;
 
+	err = -ENFILE;
+	if (!inc_ipc_namespaces(user_ns))
+		goto fail;
+
+	err = -ENOMEM;
 	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
 	if (ns == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto fail_dec;
 
 	err = ns_alloc_inum(&ns->ns);
-	if (err) {
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_free;
 	ns->ns.ops = &ipcns_operations;
 
 	atomic_set(&ns->count, 1);
 	ns->user_ns = get_user_ns(user_ns);
 
 	err = mq_init_ns(ns);
-	if (err) {
-		put_user_ns(ns->user_ns);
-		ns_free_inum(&ns->ns);
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_put;
 	atomic_inc(&nr_ipc_ns);
 
 	sem_init_ns(ns);
@@ -50,6 +59,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	shm_init_ns(ns);
 
 	return ns;
+
+fail_put:
+	put_user_ns(ns->user_ns);
+	ns_free_inum(&ns->ns);
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_ipc_namespaces(user_ns);
+fail:
+	return ERR_PTR(err);
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -98,6 +117,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	shm_exit_ns(ns);
 	atomic_dec(&nr_ipc_ns);
 
+	dec_ipc_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 4e019620f297..8c3a6e351b3e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -79,6 +79,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
+	UCOUNT_ENTRY("max_ipc_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 08/10] cgroupns: Add a limit on the number of cgroup namespaces
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (5 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 07/10] ipcns: Add a limit on the number of ipc namespaces Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 09/10] netns: Add a limit on the number of net namespaces Eric W. Biederman
                         ` (3 subsequent siblings)
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/cgroup.c                | 15 +++++++++++++++
 kernel/user_namespace.c        |  1 +
 3 files changed, 17 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 19f9ee590062..3ad3cb505781 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ enum ucounts {
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
+	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_MAX,
 };
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86cb5c6e8932..240eec43390b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6260,6 +6260,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 
 /* cgroup namespaces */
 
+static bool inc_cgroup_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
+}
+
+static void dec_cgroup_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
+}
+
 static struct cgroup_namespace *alloc_cgroup_ns(void)
 {
 	struct cgroup_namespace *new_ns;
@@ -6281,6 +6291,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 void free_cgroup_ns(struct cgroup_namespace *ns)
 {
 	put_css_set(ns->root_cset);
+	dec_cgroup_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
@@ -6305,6 +6316,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 
+	if (!inc_cgroup_namespaces(user_ns))
+		return ERR_PTR(-ENFILE);
+
 	mutex_lock(&cgroup_mutex);
 	spin_lock_bh(&css_set_lock);
 
@@ -6317,6 +6331,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 	new_ns = alloc_cgroup_ns();
 	if (IS_ERR(new_ns)) {
 		put_css_set(cset);
+		dec_cgroup_namespaces(user_ns);
 		return new_ns;
 	}
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8c3a6e351b3e..c150765c328e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
+	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 09/10] netns: Add a limit on the number of net namespaces
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (6 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 08/10] cgroupns: Add a limit on the number of cgroup namespaces Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:21       ` [PATCH 10/10] mntns: Add a limit on the number of mount namespaces Eric W. Biederman
                         ` (2 subsequent siblings)
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 net/core/net_namespace.c       | 15 +++++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 3ad3cb505781..785a7ca2731b 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ enum ucounts {
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
+	UCOUNT_NET_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_MAX,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c150765c328e..23a70d66a310 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
+	UCOUNT_ENTRY("max_net_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..a489f192d619 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
 	return peer;
 }
 
+static bool inc_net_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
+}
+
+static void dec_net_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_NET_NAMESPACES);
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	int error = 0;
 	LIST_HEAD(net_exit_list);
 
+	if (!inc_net_namespaces(user_ns))
+		return -ENFILE;
+
 	atomic_set(&net->count, 1);
 	atomic_set(&net->passive, 1);
 	net->dev_base_seq = 1;
@@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
 	}
 	mutex_unlock(&net_mutex);
 	if (rv < 0) {
+		dec_net_namespaces(user_ns);
 		put_user_ns(user_ns);
 		net_drop_ns(net);
 		return ERR_PTR(rv);
@@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
 	/* Finally it is safe to free my network namespace structure */
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 		list_del_init(&net->exit_list);
+		dec_net_namespaces(net->user_ns);
 		put_user_ns(net->user_ns);
 		net_drop_ns(net);
 	}
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH 10/10] mntns: Add a limit on the number of mount namespaces.
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (7 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 09/10] netns: Add a limit on the number of net namespaces Eric W. Biederman
@ 2016-07-20  1:21       ` Eric W. Biederman
  2016-07-20  1:58       ` [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup Eric W. Biederman
  2016-07-20  5:06       ` Serge E. Hallyn
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:21 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Eric W. Biederman, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/namespace.c                 | 19 ++++++++++++++++++-
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index aabe8e397fc3..3942ae6c34f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2718,9 +2718,20 @@ dput_out:
 	return retval;
 }
 
+static bool inc_mnt_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_MNT_NAMESPACES);
+}
+
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
 	ns_free_inum(&ns->ns);
+	dec_mnt_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2739,12 +2750,18 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	struct mnt_namespace *new_ns;
 	int ret;
 
+	if (!inc_mnt_namespaces(user_ns))
+		return ERR_PTR(-ENFILE);
+
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-	if (!new_ns)
+	if (!new_ns) {
+		dec_mnt_namespaces(user_ns);
 		return ERR_PTR(-ENOMEM);
+	}
 	ret = ns_alloc_inum(&new_ns->ns);
 	if (ret) {
 		kfree(new_ns);
+		dec_mnt_namespaces(user_ns);
 		return ERR_PTR(ret);
 	}
 	new_ns->ns.ops = &mntns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 785a7ca2731b..05140d67f805 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -28,6 +28,7 @@ enum ucounts {
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
 	UCOUNT_NET_NAMESPACES,
+	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_MAX,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 23a70d66a310..422058c3621d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -81,6 +81,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
 	UCOUNT_ENTRY("max_net_namespaces"),
+	UCOUNT_ENTRY("max_mnt_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* Re: [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (8 preceding siblings ...)
  2016-07-20  1:21       ` [PATCH 10/10] mntns: Add a limit on the number of mount namespaces Eric W. Biederman
@ 2016-07-20  1:58       ` Eric W. Biederman
  2016-07-20  5:06       ` Serge E. Hallyn
  10 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20  1:58 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, linux-api-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Jann Horn

Added linux-api because I forgot when I composesed this email.

"Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> writes:

> Passing nsproxy into sysctl_table_root.lookup was a premature
> optimization in attempt to avoid depending on current.  The
> directory /proc/self/sys has not appeared and if and when
> it does this code will need to be reviewed closely and reworked
> anyway.  So remove the premature optimization.
>
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
>  fs/proc/proc_sysctl.c  | 14 +++++++-------
>  include/linux/sysctl.h |  3 +--
>  net/sysctl_net.c       |  4 ++--
>  3 files changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 5e57c3e46e1d..534630687489 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
>  
>  static void drop_sysctl_table(struct ctl_table_header *header);
>  static int sysctl_follow_link(struct ctl_table_header **phead,
> -	struct ctl_table **pentry, struct nsproxy *namespaces);
> +	struct ctl_table **pentry);
>  static int insert_links(struct ctl_table_header *head);
>  static void put_links(struct ctl_table_header *header);
>  
> @@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
>  }
>  
>  static struct ctl_table_set *
> -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
> +lookup_header_set(struct ctl_table_root *root)
>  {
>  	struct ctl_table_set *set = &root->default_set;
>  	if (root->lookup)
> -		set = root->lookup(root, namespaces);
> +		set = root->lookup(root);
>  	return set;
>  }
>  
> @@ -491,7 +491,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
>  		goto out;
>  
>  	if (S_ISLNK(p->mode)) {
> -		ret = sysctl_follow_link(&h, &p, current->nsproxy);
> +		ret = sysctl_follow_link(&h, &p);
>  		err = ERR_PTR(ret);
>  		if (ret)
>  			goto out;
> @@ -659,7 +659,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
>  
>  	if (S_ISLNK(table->mode)) {
>  		/* It is not an error if we can not follow the link ignore it */
> -		int err = sysctl_follow_link(&head, &table, current->nsproxy);
> +		int err = sysctl_follow_link(&head, &table);
>  		if (err)
>  			goto out;
>  	}
> @@ -976,7 +976,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
>  }
>  
>  static int sysctl_follow_link(struct ctl_table_header **phead,
> -	struct ctl_table **pentry, struct nsproxy *namespaces)
> +	struct ctl_table **pentry)
>  {
>  	struct ctl_table_header *head;
>  	struct ctl_table_root *root;
> @@ -988,7 +988,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
>  	ret = 0;
>  	spin_lock(&sysctl_lock);
>  	root = (*pentry)->data;
> -	set = lookup_header_set(root, namespaces);
> +	set = lookup_header_set(root);
>  	dir = xlate_dir(set, (*phead)->parent);
>  	if (IS_ERR(dir))
>  		ret = PTR_ERR(dir);
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index fa7bc29925c9..6385b331f2b9 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -154,8 +154,7 @@ struct ctl_table_set {
>  
>  struct ctl_table_root {
>  	struct ctl_table_set default_set;
> -	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
> -					   struct nsproxy *namespaces);
> +	struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
>  	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
>  };
>  
> diff --git a/net/sysctl_net.c b/net/sysctl_net.c
> index ed98c1fc3de1..2951f229a855 100644
> --- a/net/sysctl_net.c
> +++ b/net/sysctl_net.c
> @@ -27,9 +27,9 @@
>  #endif
>  
>  static struct ctl_table_set *
> -net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
> +net_ctl_header_lookup(struct ctl_table_root *root)
>  {
> -	return &namespaces->net_ns->sysctls;
> +	return &current->nsproxy->net_ns->sysctls;
>  }
>  
>  static int is_seen(struct ctl_table_set *set)

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 0/10] userns: sysctl limits for namespaces
       [not found] ` <8737n5dscy.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
  2016-07-20  1:21   ` [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup Eric W. Biederman
@ 2016-07-20  4:02   ` Kees Cook
       [not found]     ` <CAGXu5jKWjHKxtPSGuogjR+XE+SbNWiUp2bOxugpD+hUex9QeZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2016-07-21 16:39     ` Eric W. Biederman
  2 siblings, 1 reply; 113+ messages in thread
From: Kees Cook @ 2016-07-20  4:02 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Jann Horn

On Tue, Jul 19, 2016 at 6:13 PM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.
>
> The maximum sane case I can imagine is if every process is a fat
> process, so I set the maximum number of namespaces to the maximum
> number of threads.
>
> I make these limits recursive and per user namespace so that a
> usernamespace root can reduce the limits further.  If a user namespace
> root raises the limit the limit in the parent namespace will be honored.
>
> I have cut this implementation to the bare minimum needed to achieve
> these objections.
>
> Assuming nothing problematic shows up in the review I will add these to
> my user namespace tree.

This looks great; thank you! I think the design is effective. One
thought that pops to mind is how does an admin query the current
number of active namespaces of a given type? (It's likely this is
already exposed somewhere and I just don't know where to look...)

-Kees

>
> These patches are also available at:
>     git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing
>
> Eric W. Biederman (10):
>       sysctl: Stop implicitly passing current into sysctl_table_root.lookup
>       userns: Add per user namespace sysctls.
>       userns: Add a limit on the number of user namespaces
>       userns: Generalize the user namespace count into ucount
>       pidns: Add a limit on the number of pid namespaces
>       utsns: Add a limit on the number of uts namespaces
>       ipcns: Add a  limit on the number of ipc namespaces
>       cgroupns: Add a limit on the number of cgroup namespaces
>       netns: Add a limit on the number of net namespaces
>       mntns: Add a limit on the number of mount namespaces.
>
>  fs/namespace.c                 |  19 ++++-
>  fs/proc/proc_sysctl.c          |  14 ++--
>  include/linux/sysctl.h         |   3 +-
>  include/linux/user_namespace.h |  40 +++++++++
>  ipc/namespace.c                |  42 +++++++---
>  kernel/cgroup.c                |  15 ++++
>  kernel/fork.c                  |   5 ++
>  kernel/pid_namespace.c         |  22 ++++-
>  kernel/user_namespace.c        | 184 ++++++++++++++++++++++++++++++++++++++---
>  kernel/utsname.c               |  31 +++++--
>  net/core/net_namespace.c       |  15 ++++
>  net/sysctl_net.c               |   4 +-
>  12 files changed, 351 insertions(+), 43 deletions(-)
>
> Eric



-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup
       [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                         ` (9 preceding siblings ...)
  2016-07-20  1:58       ` [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup Eric W. Biederman
@ 2016-07-20  5:06       ` Serge E. Hallyn
  10 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-20  5:06 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Passing nsproxy into sysctl_table_root.lookup was a premature
> optimization in attempt to avoid depending on current.  The
> directory /proc/self/sys has not appeared and if and when
> it does this code will need to be reviewed closely and reworked
> anyway.  So remove the premature optimization.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  fs/proc/proc_sysctl.c  | 14 +++++++-------
>  include/linux/sysctl.h |  3 +--
>  net/sysctl_net.c       |  4 ++--
>  3 files changed, 10 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 5e57c3e46e1d..534630687489 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
>  
>  static void drop_sysctl_table(struct ctl_table_header *header);
>  static int sysctl_follow_link(struct ctl_table_header **phead,
> -	struct ctl_table **pentry, struct nsproxy *namespaces);
> +	struct ctl_table **pentry);
>  static int insert_links(struct ctl_table_header *head);
>  static void put_links(struct ctl_table_header *header);
>  
> @@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
>  }
>  
>  static struct ctl_table_set *
> -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
> +lookup_header_set(struct ctl_table_root *root)
>  {
>  	struct ctl_table_set *set = &root->default_set;
>  	if (root->lookup)
> -		set = root->lookup(root, namespaces);
> +		set = root->lookup(root);
>  	return set;
>  }
>  
> @@ -491,7 +491,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
>  		goto out;
>  
>  	if (S_ISLNK(p->mode)) {
> -		ret = sysctl_follow_link(&h, &p, current->nsproxy);
> +		ret = sysctl_follow_link(&h, &p);
>  		err = ERR_PTR(ret);
>  		if (ret)
>  			goto out;
> @@ -659,7 +659,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
>  
>  	if (S_ISLNK(table->mode)) {
>  		/* It is not an error if we can not follow the link ignore it */
> -		int err = sysctl_follow_link(&head, &table, current->nsproxy);
> +		int err = sysctl_follow_link(&head, &table);
>  		if (err)
>  			goto out;
>  	}
> @@ -976,7 +976,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
>  }
>  
>  static int sysctl_follow_link(struct ctl_table_header **phead,
> -	struct ctl_table **pentry, struct nsproxy *namespaces)
> +	struct ctl_table **pentry)
>  {
>  	struct ctl_table_header *head;
>  	struct ctl_table_root *root;
> @@ -988,7 +988,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
>  	ret = 0;
>  	spin_lock(&sysctl_lock);
>  	root = (*pentry)->data;
> -	set = lookup_header_set(root, namespaces);
> +	set = lookup_header_set(root);
>  	dir = xlate_dir(set, (*phead)->parent);
>  	if (IS_ERR(dir))
>  		ret = PTR_ERR(dir);
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index fa7bc29925c9..6385b331f2b9 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -154,8 +154,7 @@ struct ctl_table_set {
>  
>  struct ctl_table_root {
>  	struct ctl_table_set default_set;
> -	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
> -					   struct nsproxy *namespaces);
> +	struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
>  	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
>  };
>  
> diff --git a/net/sysctl_net.c b/net/sysctl_net.c
> index ed98c1fc3de1..2951f229a855 100644
> --- a/net/sysctl_net.c
> +++ b/net/sysctl_net.c
> @@ -27,9 +27,9 @@
>  #endif
>  
>  static struct ctl_table_set *
> -net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
> +net_ctl_header_lookup(struct ctl_table_root *root)
>  {
> -	return &namespaces->net_ns->sysctls;
> +	return &current->nsproxy->net_ns->sysctls;
>  }
>  
>  static int is_seen(struct ctl_table_set *set)
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]         ` <20160720012138.7894-2-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-20  5:12           ` Serge E. Hallyn
       [not found]             ` <20160720051226.GC16885-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
  2016-07-20  6:28           ` Nikolay Borisov
  1 sibling, 1 reply; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-20  5:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Limit per userns sysctls to only be opened for write by a holder
> of CAP_SYS_RESOURCE.
> 
> Add all of the necessary boilerplate for having per user namespace
> sysctls.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
>  include/linux/user_namespace.h |  4 ++
>  kernel/user_namespace.c        | 92 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 94 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 9217169c64cb..7d59af1f08f1 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -39,6 +39,10 @@ struct user_namespace {
>  	struct key		*persistent_keyring_register;
>  	struct rw_semaphore	persistent_keyring_register_sem;
>  #endif
> +#ifdef CONFIG_SYSCTL
> +	struct ctl_table_set	set;
> +	struct ctl_table_header *sysctls;
> +#endif
>  };
>  
>  extern struct user_namespace init_user_ns;
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 68f594212759..b5b5cbb8fe92 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -30,6 +30,70 @@ static bool new_idmap_permitted(const struct file *file,
>  				struct user_namespace *ns, int cap_setid,
>  				struct uid_gid_map *map);
>  
> +#ifdef CONFIG_SYSCTL
> +static struct ctl_table_set *
> +set_lookup(struct ctl_table_root *root)
> +{
> +	return &current->cred->user_ns->set;
> +}
> +
> +static int set_is_seen(struct ctl_table_set *set)
> +{
> +	return &current->cred->user_ns->set == set;
> +}
> +
> +static int set_permissions(struct ctl_table_header *head,
> +				  struct ctl_table *table)
> +{
> +	struct user_namespace *user_ns =
> +		container_of(head->set, struct user_namespace, set);
> +	int mode;
> +
> +	/* Allow users with CAP_SYS_ADMIN unrestrainted access */

nit 1 - s/unrestrainted/unrestrained/
nit 2 - you say CAP_SYS_ADMIN but check for CAP_SYS_RESOURCE

> +	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
> +		mode = (table->mode & S_IRWXU) >> 6;
> +	else
> +	/* Allow all others at most read-only access */
> +		mode = table->mode & S_IROTH;
> +	return (mode << 6) | (mode << 3) | mode;
> +}
> +
> +static struct ctl_table_root set_root = {
> +	.lookup = set_lookup,
> +	.permissions = set_permissions,
> +};
> +
> +static struct ctl_table userns_table[] = {
> +	{ }
> +};
> +#endif /* CONFIG_SYSCTL */
> +
> +static bool setup_userns_sysctls(struct user_namespace *ns)
> +{
> +#ifdef CONFIG_SYSCTL
> +	struct ctl_table *tbl;
> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
> +	if (tbl) {
> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
> +	}

What happens if tbl is null due to oom?  Would it be better to just
return false in that case here?

> +	if (!ns->sysctls) {
> +		kfree(tbl);
> +		retire_sysctl_set(&ns->set);
> +		return false;
> +	}
> +#endif
> +	return true;
> +}
> +
> +static void retire_userns_sysctls(struct user_namespace *ns)
> +{
> +#ifdef CONFIG_SYSCTL
> +	unregister_sysctl_table(ns->sysctls);
> +	retire_sysctl_set(&ns->set);
> +#endif
> +}
> +
>  static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  {
>  	/* Start with the same capabilities as init but useless for doing
> @@ -107,12 +171,22 @@ int create_user_ns(struct cred *new)
>  	ns->flags = parent_ns->flags;
>  	mutex_unlock(&userns_state_mutex);
>  
> -	set_cred_user_ns(new, ns);
> -
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  	init_rwsem(&ns->persistent_keyring_register_sem);
>  #endif
> +	ret = -ENOMEM;
> +	if (!setup_userns_sysctls(ns))
> +		goto fail_keyring;
> +
> +	set_cred_user_ns(new, ns);
>  	return 0;
> +fail_keyring:
> +#ifdef CONFIG_PERSISTENT_KEYRINGS
> +	key_put(ns->persistent_keyring_register);
> +#endif
> +	ns_free_inum(&ns->ns);
> +	kmem_cache_free(user_ns_cachep, ns);
> +	return ret;
>  }
>  
>  int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
> @@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
>  
>  	do {
>  		parent = ns->parent;
> +		retire_userns_sysctls(ns);
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  		key_put(ns->persistent_keyring_register);
>  #endif
> @@ -1012,9 +1087,22 @@ const struct proc_ns_operations userns_operations = {
>  	.install	= userns_install,
>  };
>  
> +static struct ctl_table_header *userns_header;
>  static __init int user_namespaces_init(void)
>  {
> +	static struct ctl_table empty[1];
>  	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
> +
> +#ifdef CONFIG_SYSCTL
> +	/*
> +	 * Avoid limits in the sysctl limitation by registering
> +	 * "/proc/sys/userns" as an empty directory not in a network
> +	 * namespace.
> +	 */
> +	userns_header = register_sysctl("userns", empty);
> +	BUG_ON(!userns_header);
> +#endif
> +	BUG_ON(!setup_userns_sysctls(&init_user_ns));
>  	return 0;
>  }
>  subsys_initcall(user_namespaces_init);
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]         ` <20160720012138.7894-2-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-20  5:12           ` Serge E. Hallyn
@ 2016-07-20  6:28           ` Nikolay Borisov
       [not found]             ` <578F1A00.6030701-6AxghH7DbtA@public.gmane.org>
  1 sibling, 1 reply; 113+ messages in thread
From: Nikolay Borisov @ 2016-07-20  6:28 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: Jann Horn, Seth Forshee, Kees Cook, Andy Lutomirski

Just a couple of nits.

On 07/20/2016 04:21 AM, Eric W. Biederman wrote:
> Limit per userns sysctls to only be opened for write by a holder
> of CAP_SYS_RESOURCE.
> 
> Add all of the necessary boilerplate for having per user namespace
> sysctls.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
>  include/linux/user_namespace.h |  4 ++
>  kernel/user_namespace.c        | 92 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 94 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 9217169c64cb..7d59af1f08f1 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -39,6 +39,10 @@ struct user_namespace {
>  	struct key		*persistent_keyring_register;
>  	struct rw_semaphore	persistent_keyring_register_sem;
>  #endif
> +#ifdef CONFIG_SYSCTL
> +	struct ctl_table_set	set;
> +	struct ctl_table_header *sysctls;
> +#endif
>  };
>  
>  extern struct user_namespace init_user_ns;
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 68f594212759..b5b5cbb8fe92 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -30,6 +30,70 @@ static bool new_idmap_permitted(const struct file *file,
>  				struct user_namespace *ns, int cap_setid,
>  				struct uid_gid_map *map);
>  
> +#ifdef CONFIG_SYSCTL
> +static struct ctl_table_set *
> +set_lookup(struct ctl_table_root *root)
> +{
> +	return &current->cred->user_ns->set;
Why not current_user_ns()->set?

> +}
> +
> +static int set_is_seen(struct ctl_table_set *set)
> +{
> +	return &current->cred->user_ns->set == set;
DITTO

> +}
> +
> +static int set_permissions(struct ctl_table_header *head,
> +				  struct ctl_table *table)
> +{
> +	struct user_namespace *user_ns =
> +		container_of(head->set, struct user_namespace, set);
> +	int mode;
> +
> +	/* Allow users with CAP_SYS_ADMIN unrestrainted access */
Just a minor nit: The comment and following code don't match.

> +	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
> +		mode = (table->mode & S_IRWXU) >> 6;
> +	else
> +	/* Allow all others at most read-only access */
> +		mode = table->mode & S_IROTH;
> +	return (mode << 6) | (mode << 3) | mode;
> +}
> +
> +static struct ctl_table_root set_root = {
> +	.lookup = set_lookup,
> +	.permissions = set_permissions,
> +};
> +
> +static struct ctl_table userns_table[] = {
> +	{ }
> +};
> +#endif /* CONFIG_SYSCTL */
> +
> +static bool setup_userns_sysctls(struct user_namespace *ns)
> +{
> +#ifdef CONFIG_SYSCTL
> +	struct ctl_table *tbl;
> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
> +	if (tbl) {
> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
> +	}
> +	if (!ns->sysctls) {
> +		kfree(tbl);
> +		retire_sysctl_set(&ns->set);
> +		return false;
> +	}
> +#endif
> +	return true;
> +}
> +
> +static void retire_userns_sysctls(struct user_namespace *ns)
> +{
> +#ifdef CONFIG_SYSCTL
> +	unregister_sysctl_table(ns->sysctls);
> +	retire_sysctl_set(&ns->set);
> +#endif
> +}
> +
>  static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  {
>  	/* Start with the same capabilities as init but useless for doing
> @@ -107,12 +171,22 @@ int create_user_ns(struct cred *new)
>  	ns->flags = parent_ns->flags;
>  	mutex_unlock(&userns_state_mutex);
>  
> -	set_cred_user_ns(new, ns);
> -
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  	init_rwsem(&ns->persistent_keyring_register_sem);
>  #endif
> +	ret = -ENOMEM;
> +	if (!setup_userns_sysctls(ns))
> +		goto fail_keyring;
> +
> +	set_cred_user_ns(new, ns);
>  	return 0;
> +fail_keyring:
> +#ifdef CONFIG_PERSISTENT_KEYRINGS
> +	key_put(ns->persistent_keyring_register);
> +#endif
> +	ns_free_inum(&ns->ns);
> +	kmem_cache_free(user_ns_cachep, ns);
> +	return ret;
>  }
>  
>  int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
> @@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
>  
>  	do {
>  		parent = ns->parent;
> +		retire_userns_sysctls(ns);
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  		key_put(ns->persistent_keyring_register);
>  #endif
> @@ -1012,9 +1087,22 @@ const struct proc_ns_operations userns_operations = {
>  	.install	= userns_install,
>  };
>  
> +static struct ctl_table_header *userns_header;
>  static __init int user_namespaces_init(void)
>  {
> +	static struct ctl_table empty[1];
>  	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
> +
> +#ifdef CONFIG_SYSCTL
> +	/*
> +	 * Avoid limits in the sysctl limitation by registering
> +	 * "/proc/sys/userns" as an empty directory not in a network
> +	 * namespace.
> +	 */
The comment is not very clear to someone who is not intimately familiar
with the sysctl infrastructure, perhaps you can make it a bit more verbose?

> +	userns_header = register_sysctl("userns", empty);
> +	BUG_ON(!userns_header);
> +#endif
> +	BUG_ON(!setup_userns_sysctls(&init_user_ns));
Do we really want the extra code the BUG_ON adds even in the
!CONFIG_SYSCTL case, possibly the compiler could eliminate it but I
can't help it but wonder.

>  	return 0;
>  }
>  subsys_initcall(user_namespaces_init);
> 

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 03/10] userns: Add a limit on the number of user namespaces
       [not found]         ` <20160720012138.7894-3-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-20  7:03           ` Nikolay Borisov
       [not found]             ` <578F2233.2040708-6AxghH7DbtA@public.gmane.org>
  0 siblings, 1 reply; 113+ messages in thread
From: Nikolay Borisov @ 2016-07-20  7:03 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: Jann Horn, Seth Forshee, Kees Cook, Andy Lutomirski



On 07/20/2016 04:21 AM, Eric W. Biederman wrote:
> Export the export the maximum number of user namespaces as
> /proc/sys/userns/max_user_namespaces.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
>  include/linux/user_namespace.h |  2 ++
>  kernel/fork.c                  |  2 ++
>  kernel/user_namespace.c        | 68 +++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 64 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 7d59af1f08f1..ba6a995178f9 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -43,6 +43,8 @@ struct user_namespace {
>  	struct ctl_table_set	set;
>  	struct ctl_table_header *sysctls;
>  #endif
> +	int max_user_namespaces;
> +	atomic_t user_namespaces;
>  };
>  
>  extern struct user_namespace init_user_ns;
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 5c2c355aa97f..95d5498c463f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -323,6 +323,8 @@ void __init fork_init(void)
>  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
>  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
>  		init_task.signal->rlim[RLIMIT_NPROC];
> +
> +	init_user_ns.max_user_namespaces = max_threads;
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index b5b5cbb8fe92..a3cacf3f4747 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -63,7 +63,18 @@ static struct ctl_table_root set_root = {
>  	.permissions = set_permissions,
>  };
>  
> +static int zero = 0;
> +static int max = INT_MAX - 1;
>  static struct ctl_table userns_table[] = {
> +	{
> +		.procname	= "max_user_namespaces",
> +		.data		= &init_user_ns.max_user_namespaces,
> +		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &max,
> +	},
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -75,6 +86,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
>  	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>  	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>  	if (tbl) {
> +		tbl[0].data = &ns->max_user_namespaces;
> +
>  		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>  	}
>  	if (!ns->sysctls) {
> @@ -113,6 +126,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  	cred->user_ns = user_ns;
>  }
>  
> +static bool inc_user_namespaces(struct user_namespace *ns)
> +{
> +	struct user_namespace *pos, *bad;
> +	for (pos = ns; pos; pos = pos->parent) {
> +		int max = READ_ONCE(pos->max_user_namespaces);
> +		int sum = atomic_inc_return(&pos->user_namespaces);
> +		if (sum > max)
> +			goto fail;
> +	}
> +	return true;
> +fail:
> +	bad = pos;
> +	atomic_dec(&pos->user_namespaces);
> +	for (pos = ns; pos != bad; pos = pos->parent)
> +		atomic_dec(&pos->user_namespaces);
> +
> +	return false;
> +}
> +
> +static void dec_user_namespaces(struct user_namespace *ns)
> +{
> +	struct user_namespace *pos;
> +	for (pos = ns; pos; pos = pos->parent) {
> +		int dec = atomic_dec_if_positive(&pos->user_namespaces);
> +		WARN_ON_ONCE(dec < 0);
> +	}
> +}

Here you are essentially duplicating the logic of
page_counter_try_charge and page_counter_uncharge. Perhaps the code of
the page_counter can be moved away from mm/ and into kernel and the
renamed to something more generic? E.g. the removed resource_counters or
maybe "hierarchical counters"? If more namespaces are added in the
future we certainly wouldn't want proliferation of pairs of such
hierarchical inc/dec functions.


> +
>  /*
>   * Create a new user namespace, deriving the creator from the user in the
>   * passed credentials, and replacing that user with the new root user for the
> @@ -128,8 +169,12 @@ int create_user_ns(struct cred *new)
>  	kgid_t group = new->egid;
>  	int ret;
>  
> +	ret = -EUSERS;
>  	if (parent_ns->level > 32)
> -		return -EUSERS;
> +		goto fail;
> +
> +	if (!inc_user_namespaces(parent_ns))
> +		goto fail;

I think it would be better if the simple "return -EUSERS" semantics is
saved and the "fail" label entirely nuked. In this case you only leave
the more complex error handling.

>  
>  	/*
>  	 * Verify that we can not violate the policy of which files
> @@ -137,26 +182,27 @@ int create_user_ns(struct cred *new)
>  	 * by verifing that the root directory is at the root of the
>  	 * mount namespace which allows all files to be accessed.
>  	 */
> +	ret = -EPERM;
>  	if (current_chrooted())
> -		return -EPERM;
> +		goto fail_dec;
>  
>  	/* The creator needs a mapping in the parent user namespace
>  	 * or else we won't be able to reasonably tell userspace who
>  	 * created a user_namespace.
>  	 */
> +	ret = -EPERM;
>  	if (!kuid_has_mapping(parent_ns, owner) ||
>  	    !kgid_has_mapping(parent_ns, group))
> -		return -EPERM;
> +		goto fail_dec;
>  
> +	ret = -ENOMEM;
>  	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
>  	if (!ns)
> -		return -ENOMEM;
> +		goto fail_dec;
>  
>  	ret = ns_alloc_inum(&ns->ns);
> -	if (ret) {
> -		kmem_cache_free(user_ns_cachep, ns);
> -		return ret;
> -	}
> +	if (ret)
> +		goto fail_free;
>  	ns->ns.ops = &userns_operations;
>  
>  	atomic_set(&ns->count, 1);
> @@ -165,6 +211,7 @@ int create_user_ns(struct cred *new)
>  	ns->level = parent_ns->level + 1;
>  	ns->owner = owner;
>  	ns->group = group;
> +	ns->max_user_namespaces = INT_MAX - 1;

Do we want an INT_MAX -1 default initialization or inheritance from
parent's value?

> 
>  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
>  	mutex_lock(&userns_state_mutex);
> @@ -185,7 +232,11 @@ fail_keyring:
>  	key_put(ns->persistent_keyring_register);
>  #endif
>  	ns_free_inum(&ns->ns);
> +fail_free:
>  	kmem_cache_free(user_ns_cachep, ns);
> +fail_dec:
> +	dec_user_namespaces(parent_ns);
> +fail:
>  	return ret;
>  }
>  
> @@ -221,6 +272,7 @@ void free_user_ns(struct user_namespace *ns)
>  #endif
>  		ns_free_inum(&ns->ns);
>  		kmem_cache_free(user_ns_cachep, ns);
> +		dec_user_namespaces(parent);
>  		ns = parent;
>  	} while (atomic_dec_and_test(&parent->count));
>  }
> 

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]             ` <20160720051226.GC16885-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
@ 2016-07-20 15:55               ` Eric W. Biederman
       [not found]                 ` <87shv49ud0.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
  0 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-20 15:55 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, Jann Horn

"Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> writes:

> Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
>> +static bool setup_userns_sysctls(struct user_namespace *ns)
>> +{
>> +#ifdef CONFIG_SYSCTL
>> +	struct ctl_table *tbl;
>> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>> +	if (tbl) {
>> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>> +	}
>
> What happens if tbl is null due to oom?  Would it be better to just
> return false in that case here?

ns->sysctls is initialized to NULL and kfree(NULL) is a noop.
So I don't see any problems.

I admit it isn't a usual pattern for error handling.

>> +	if (!ns->sysctls) {
>> +		kfree(tbl);
>> +		retire_sysctl_set(&ns->set);
>> +		return false;
>> +	}
>> +#endif
>> +	return true;
>> +}

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]                 ` <87shv49ud0.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-20 16:43                   ` Serge E. Hallyn
       [not found]                     ` <20160720164347.GA24378-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
  0 siblings, 1 reply; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-20 16:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> "Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> writes:
> 
> > Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> >> +static bool setup_userns_sysctls(struct user_namespace *ns)
> >> +{
> >> +#ifdef CONFIG_SYSCTL
> >> +	struct ctl_table *tbl;
> >> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
> >> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
> >> +	if (tbl) {
> >> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
> >> +	}
> >
> > What happens if tbl is null due to oom?  Would it be better to just
> > return false in that case here?
> 
> ns->sysctls is initialized to NULL and kfree(NULL) is a noop.
> So I don't see any problems.
> 
> I admit it isn't a usual pattern for error handling.

Right I didn't mean that - I meant, is there a way that a namespace
could end up escaping its limits as a result?  I think not, since
either it has hierarchical limits which will still be enforced, or
the admin tries to set a new limit and that step will fail visibly,
which suffices.

thanks,
-serge

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]                     ` <20160720164347.GA24378-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
@ 2016-07-21 14:41                       ` Eric W. Biederman
       [not found]                         ` <87invz9hpz.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
  0 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 14:41 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, Jann Horn

"Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> writes:

> Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
>> "Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> writes:
>> 
>> > Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
>> >> +static bool setup_userns_sysctls(struct user_namespace *ns)
>> >> +{
>> >> +#ifdef CONFIG_SYSCTL
>> >> +	struct ctl_table *tbl;
>> >> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>> >> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>> >> +	if (tbl) {
>> >> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>> >> +	}
>> >
>> > What happens if tbl is null due to oom?  Would it be better to just
>> > return false in that case here?
>> 
>> ns->sysctls is initialized to NULL and kfree(NULL) is a noop.
>> So I don't see any problems.
>> 
>> I admit it isn't a usual pattern for error handling.
>
> Right I didn't mean that - I meant, is there a way that a namespace
> could end up escaping its limits as a result?  I think not, since
> either it has hierarchical limits which will still be enforced, or
> the admin tries to set a new limit and that step will fail visibly,
> which suffices.

And what I meant is that we do actually return false in the case you are
worrying about.  And if we return false we don't even create the user
namespace.


Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 0/10] userns: sysctl limits for namespaces
       [not found]     ` <CAGXu5jKWjHKxtPSGuogjR+XE+SbNWiUp2bOxugpD+hUex9QeZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-07-21 14:49       ` Eric W. Biederman
  2016-07-21 16:58       ` Eric W. Biederman
  1 sibling, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 14:49 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linux Containers, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Jann Horn

Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> writes:

> On Tue, Jul 19, 2016 at 6:13 PM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>>
>> The maximum sane case I can imagine is if every process is a fat
>> process, so I set the maximum number of namespaces to the maximum
>> number of threads.
>>
>> I make these limits recursive and per user namespace so that a
>> usernamespace root can reduce the limits further.  If a user namespace
>> root raises the limit the limit in the parent namespace will be honored.
>>
>> I have cut this implementation to the bare minimum needed to achieve
>> these objections.
>>
>> Assuming nothing problematic shows up in the review I will add these to
>> my user namespace tree.
>
> This looks great; thank you! I think the design is effective. One
> thought that pops to mind is how does an admin query the current
> number of active namespaces of a given type? (It's likely this is
> already exposed somewhere and I just don't know where to look...)

The current way would be to look at /proc/<pid>/ns/... and count
unique inodes that show up.  Which isn't very awesome but it is what
exists today.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]                         ` <87invz9hpz.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-21 15:07                           ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-21 15:07 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> "Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> writes:
> 
> > Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> >> "Serge E. Hallyn" <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> writes:
> >> 
> >> > Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> >> >> +static bool setup_userns_sysctls(struct user_namespace *ns)
> >> >> +{
> >> >> +#ifdef CONFIG_SYSCTL
> >> >> +	struct ctl_table *tbl;
> >> >> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
> >> >> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
> >> >> +	if (tbl) {
> >> >> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
> >> >> +	}
> >> >
> >> > What happens if tbl is null due to oom?  Would it be better to just
> >> > return false in that case here?
> >> 
> >> ns->sysctls is initialized to NULL and kfree(NULL) is a noop.
> >> So I don't see any problems.
> >> 
> >> I admit it isn't a usual pattern for error handling.
> >
> > Right I didn't mean that - I meant, is there a way that a namespace
> > could end up escaping its limits as a result?  I think not, since
> > either it has hierarchical limits which will still be enforced, or
> > the admin tries to set a new limit and that step will fail visibly,
> > which suffices.
> 
> And what I meant is that we do actually return false in the case you are
> worrying about.  And if we return false we don't even create the user
> namespace.

D'oh.  Right.  Thanks :)

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 03/10] userns: Add a limit on the number of user namespaces
       [not found]             ` <578F2233.2040708-6AxghH7DbtA@public.gmane.org>
@ 2016-07-21 15:23               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 15:23 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee, Jann Horn

Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org> writes:

> On 07/20/2016 04:21 AM, Eric W. Biederman wrote:
[snip]
>> +static bool inc_user_namespaces(struct user_namespace *ns)
>> +{
>> +	struct user_namespace *pos, *bad;
>> +	for (pos = ns; pos; pos = pos->parent) {
>> +		int max = READ_ONCE(pos->max_user_namespaces);
>> +		int sum = atomic_inc_return(&pos->user_namespaces);
>> +		if (sum > max)
>> +			goto fail;
>> +	}
>> +	return true;
>> +fail:
>> +	bad = pos;
>> +	atomic_dec(&pos->user_namespaces);
>> +	for (pos = ns; pos != bad; pos = pos->parent)
>> +		atomic_dec(&pos->user_namespaces);
>> +
>> +	return false;
>> +}
>> +
>> +static void dec_user_namespaces(struct user_namespace *ns)
>> +{
>> +	struct user_namespace *pos;
>> +	for (pos = ns; pos; pos = pos->parent) {
>> +		int dec = atomic_dec_if_positive(&pos->user_namespaces);
>> +		WARN_ON_ONCE(dec < 0);
>> +	}
>> +}
>
> Here you are essentially duplicating the logic of
> page_counter_try_charge and page_counter_uncharge. Perhaps the code of
> the page_counter can be moved away from mm/ and into kernel and the
> renamed to something more generic? E.g. the removed resource_counters or
> maybe "hierarchical counters"? If more namespaces are added in the
> future we certainly wouldn't want proliferation of pairs of such
> hierarchical inc/dec functions.


There are a couple of issues with using the logic of
page_counter_try_charge.  

- It is a maintenance issue.  AKA it ties the fate of two things that
  otherwise should be independent.

- Inc/dec is much easier to protect against interger underflow/overflow
  than a counter that adds/removes arbitrary charges.  While a great
  inspiration when I looked at page_counter_xxx I believe I saw
  issues with the handling of large charges that could cause problems
  in the general case.

- Where we can reuse the logic I actually do in the next patch.

- Having looked at some of the counter cases for things like this aiming
  for general reuse at this point is actually an anti-pattern.  The
  fundamental question is what is a reasonable number of instances for
  this resource and as such how do we know when an application is
  broken.  That really depends on the resource and inotify resources are
  very very different from these name space counters for example.  Which
  means there is no one size fits all design.  As such I oppose code
  that suggests that a one size fits all design is the way to go.

So I think the way this needs to go is for hierarchical resources to
have their own paired functions for readability if nothing else, and if
there is a common pattern we can abstract it out as it is observed.
Which limits the changes to those paired functions.

>> +
>>  /*
>>   * Create a new user namespace, deriving the creator from the user in the
>>   * passed credentials, and replacing that user with the new root user for the
>> @@ -128,8 +169,12 @@ int create_user_ns(struct cred *new)
>>  	kgid_t group = new->egid;
>>  	int ret;
>>  
>> +	ret = -EUSERS;
>>  	if (parent_ns->level > 32)
>> -		return -EUSERS;
>> +		goto fail;
>> +
>> +	if (!inc_user_namespaces(parent_ns))
>> +		goto fail;
>
> I think it would be better if the simple "return -EUSERS" semantics is
> saved and the "fail" label entirely nuked. In this case you only leave
> the more complex error handling.

I wrote it my preference. And this is all bike shedding.

[snip]
>> @@ -165,6 +211,7 @@ int create_user_ns(struct cred *new)
>>  	ns->level = parent_ns->level + 1;
>>  	ns->owner = owner;
>>  	ns->group = group;
>> +	ns->max_user_namespaces = INT_MAX - 1;
>
> Do we want an INT_MAX -1 default initialization or inheritance from
> parent's value?

I like (INT_MAX -1) because that is effectively a value meaning unlimited
and as such that is the simplest possible thing to implement.  When you
can set the value to anything in your namespace a default doesn't do
anything and showing that your level of the hierarchy isn't limiting
things but another level of the hiearchy is seems meaninful to me.

I admit I have argued for other behavior in the past but when I sat down
and looked at all of the alternatives I could not see a benefit from
other behavior.

That said I'm not opposed to evolving how things work and make things
work better.  This just looked like the best baseline to start the
design from that I could see.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 02/10] userns: Add per user namespace sysctls.
       [not found]             ` <578F1A00.6030701-6AxghH7DbtA@public.gmane.org>
@ 2016-07-21 15:37               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 15:37 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Kees Cook, Linux Containers, Andy Lutomirski, Seth Forshee, Jann Horn

Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org> writes:

> Just a couple of nits.
>
> On 07/20/2016 04:21 AM, Eric W. Biederman wrote:
>> Limit per userns sysctls to only be opened for write by a holder
>> of CAP_SYS_RESOURCE.
>> 
>> Add all of the necessary boilerplate for having per user namespace
>> sysctls.
>> 
>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>> ---
>>  include/linux/user_namespace.h |  4 ++
>>  kernel/user_namespace.c        | 92 +++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 94 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
>> index 9217169c64cb..7d59af1f08f1 100644
>> --- a/include/linux/user_namespace.h
>> +++ b/include/linux/user_namespace.h
>> @@ -39,6 +39,10 @@ struct user_namespace {
>>  	struct key		*persistent_keyring_register;
>>  	struct rw_semaphore	persistent_keyring_register_sem;
>>  #endif
>> +#ifdef CONFIG_SYSCTL
>> +	struct ctl_table_set	set;
>> +	struct ctl_table_header *sysctls;
>> +#endif
>>  };
>>  
>>  extern struct user_namespace init_user_ns;
>> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
>> index 68f594212759..b5b5cbb8fe92 100644
>> --- a/kernel/user_namespace.c
>> +++ b/kernel/user_namespace.c
>> @@ -30,6 +30,70 @@ static bool new_idmap_permitted(const struct file *file,
>>  				struct user_namespace *ns, int cap_setid,
>>  				struct uid_gid_map *map);
>>  
>> +#ifdef CONFIG_SYSCTL
>> +static struct ctl_table_set *
>> +set_lookup(struct ctl_table_root *root)
>> +{
>> +	return &current->cred->user_ns->set;
> Why not current_user_ns()->set?
>
>> +}
>> +
>> +static int set_is_seen(struct ctl_table_set *set)
>> +{
>> +	return &current->cred->user_ns->set == set;
> DITTO
>
>> +}
>> +
>> +static int set_permissions(struct ctl_table_header *head,
>> +				  struct ctl_table *table)
>> +{
>> +	struct user_namespace *user_ns =
>> +		container_of(head->set, struct user_namespace, set);
>> +	int mode;
>> +
>> +	/* Allow users with CAP_SYS_ADMIN unrestrainted access */
> Just a minor nit: The comment and following code don't match.
>
>> +	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
>> +		mode = (table->mode & S_IRWXU) >> 6;
>> +	else
>> +	/* Allow all others at most read-only access */
>> +		mode = table->mode & S_IROTH;
>> +	return (mode << 6) | (mode << 3) | mode;
>> +}
>> +
>> +static struct ctl_table_root set_root = {
>> +	.lookup = set_lookup,
>> +	.permissions = set_permissions,
>> +};
>> +
>> +static struct ctl_table userns_table[] = {
>> +	{ }
>> +};
>> +#endif /* CONFIG_SYSCTL */
>> +
>> +static bool setup_userns_sysctls(struct user_namespace *ns)
>> +{
>> +#ifdef CONFIG_SYSCTL
>> +	struct ctl_table *tbl;
>> +	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>> +	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>> +	if (tbl) {
>> +		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>> +	}
>> +	if (!ns->sysctls) {
>> +		kfree(tbl);
>> +		retire_sysctl_set(&ns->set);
>> +		return false;
>> +	}
>> +#endif
>> +	return true;
>> +}
>> +
>> +static void retire_userns_sysctls(struct user_namespace *ns)
>> +{
>> +#ifdef CONFIG_SYSCTL
>> +	unregister_sysctl_table(ns->sysctls);
>> +	retire_sysctl_set(&ns->set);
>> +#endif
>> +}
>> +
>>  static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>>  {
>>  	/* Start with the same capabilities as init but useless for doing
>> @@ -107,12 +171,22 @@ int create_user_ns(struct cred *new)
>>  	ns->flags = parent_ns->flags;
>>  	mutex_unlock(&userns_state_mutex);
>>  
>> -	set_cred_user_ns(new, ns);
>> -
>>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>>  	init_rwsem(&ns->persistent_keyring_register_sem);
>>  #endif
>> +	ret = -ENOMEM;
>> +	if (!setup_userns_sysctls(ns))
>> +		goto fail_keyring;
>> +
>> +	set_cred_user_ns(new, ns);
>>  	return 0;
>> +fail_keyring:
>> +#ifdef CONFIG_PERSISTENT_KEYRINGS
>> +	key_put(ns->persistent_keyring_register);
>> +#endif
>> +	ns_free_inum(&ns->ns);
>> +	kmem_cache_free(user_ns_cachep, ns);
>> +	return ret;
>>  }
>>  
>>  int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
>> @@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
>>  
>>  	do {
>>  		parent = ns->parent;
>> +		retire_userns_sysctls(ns);
>>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>>  		key_put(ns->persistent_keyring_register);
>>  #endif
>> @@ -1012,9 +1087,22 @@ const struct proc_ns_operations userns_operations = {
>>  	.install	= userns_install,
>>  };
>>  
>> +static struct ctl_table_header *userns_header;
>>  static __init int user_namespaces_init(void)
>>  {
>> +	static struct ctl_table empty[1];
>>  	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
>> +
>> +#ifdef CONFIG_SYSCTL
>> +	/*
>> +	 * Avoid limits in the sysctl limitation by registering
>> +	 * "/proc/sys/userns" as an empty directory not in a network
>> +	 * namespace.
>> +	 */
> The comment is not very clear to someone who is not intimately familiar
> with the sysctl infrastructure, perhaps you can make it a bit more
> verbose?

Good point.  I don't think I can say meaningfully say much more than
this is necessary, but I will see if I can improve the comment a little.

>> +	userns_header = register_sysctl("userns", empty);
>> +	BUG_ON(!userns_header);
>> +#endif
>> +	BUG_ON(!setup_userns_sysctls(&init_user_ns));
> Do we really want the extra code the BUG_ON adds even in the
> !CONFIG_SYSCTL case, possibly the compiler could eliminate it but I
> can't help it but wonder.

BUG_ON is about two instructions so I am not too concerned.  When
possible it is generally preferable to not have code in #ifdef XXX as
then weird cases come up in maintenance.  In this case I am reworking
things to do a bit better so that the code doesn't throw warnings when
!CONFIG_SYSCTL.  Unnecessary warnings are the larger problem.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-20  1:13 [PATCH 0/10] userns: sysctl limits for namespaces Eric W. Biederman
@ 2016-07-21 16:39     ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:39 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn


This patchset addresses two use cases:
- Implement a sane upper bound on the number of namespaces.
- Provide a way for sandboxes to limit the attack surface from
  namespaces.

The maximum sane case I can imagine is if every process is a fat
process, so I set the maximum number of namespaces to the maximum
number of threads.

I make these limits recursive and per user namespace so that a
usernamespace root can reduce the limits further.  If a user namespace
root raises the limit the limit in the parent namespace will be honored.

I have cut this implementation to the bare minimum needed to achieve
these objectives.

Does anyone know if there is a proper error code to return for resource
limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
those feel a little wrong.

Assuming nothing problematic shows up in the review I will add these to
my user namespace tree.

These patches are also available at:
    git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing

Changes since v1:
- Compilation issues with !CONFIG_SYSCTL and !CONFIG_USER_NS have been addressed
- Comment improvements.
- A few names have been changed to be clearer.

Eric W. Biederman (10):
      sysctl: Stop implicitly passing current into sysctl_table_root.lookup
      userns: Add per user namespace sysctls.
      userns: Add a limit on the number of user namespaces
      userns: Generalize the user namespace count into ucount
      pidns: Add a limit on the number of pid namespaces
      utsns: Add a limit on the number of uts namespaces
      ipcns: Add a  limit on the number of ipc namespaces
      cgroupns: Add a limit on the number of cgroup namespaces
      netns: Add a limit on the number of net namespaces
      mntns: Add a limit on the number of mount namespaces.

 fs/namespace.c                 |  19 ++++-
 fs/proc/proc_sysctl.c          |  14 +--
 include/linux/sysctl.h         |   3 +-
 include/linux/user_namespace.h |  40 +++++++++
 ipc/namespace.c                |  42 ++++++---
 kernel/cgroup.c                |  15 ++++
 kernel/fork.c                  |   5 ++
 kernel/pid_namespace.c         |  22 ++++-
 kernel/user_namespace.c        | 188 ++++++++++++++++++++++++++++++++++++++---
 kernel/utsname.c               |  31 +++++--
 net/core/net_namespace.c       |  15 ++++
 net/sysctl_net.c               |   4 +-
 12 files changed, 355 insertions(+), 43 deletions(-)

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-21 16:39     ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:39 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api


This patchset addresses two use cases:
- Implement a sane upper bound on the number of namespaces.
- Provide a way for sandboxes to limit the attack surface from
  namespaces.

The maximum sane case I can imagine is if every process is a fat
process, so I set the maximum number of namespaces to the maximum
number of threads.

I make these limits recursive and per user namespace so that a
usernamespace root can reduce the limits further.  If a user namespace
root raises the limit the limit in the parent namespace will be honored.

I have cut this implementation to the bare minimum needed to achieve
these objectives.

Does anyone know if there is a proper error code to return for resource
limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
those feel a little wrong.

Assuming nothing problematic shows up in the review I will add these to
my user namespace tree.

These patches are also available at:
    git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing

Changes since v1:
- Compilation issues with !CONFIG_SYSCTL and !CONFIG_USER_NS have been addressed
- Comment improvements.
- A few names have been changed to be clearer.

Eric W. Biederman (10):
      sysctl: Stop implicitly passing current into sysctl_table_root.lookup
      userns: Add per user namespace sysctls.
      userns: Add a limit on the number of user namespaces
      userns: Generalize the user namespace count into ucount
      pidns: Add a limit on the number of pid namespaces
      utsns: Add a limit on the number of uts namespaces
      ipcns: Add a  limit on the number of ipc namespaces
      cgroupns: Add a limit on the number of cgroup namespaces
      netns: Add a limit on the number of net namespaces
      mntns: Add a limit on the number of mount namespaces.

 fs/namespace.c                 |  19 ++++-
 fs/proc/proc_sysctl.c          |  14 +--
 include/linux/sysctl.h         |   3 +-
 include/linux/user_namespace.h |  40 +++++++++
 ipc/namespace.c                |  42 ++++++---
 kernel/cgroup.c                |  15 ++++
 kernel/fork.c                  |   5 ++
 kernel/pid_namespace.c         |  22 ++++-
 kernel/user_namespace.c        | 188 ++++++++++++++++++++++++++++++++++++++---
 kernel/utsname.c               |  31 +++++--
 net/core/net_namespace.c       |  15 ++++
 net/sysctl_net.c               |   4 +-
 12 files changed, 355 insertions(+), 43 deletions(-)

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* [PATCH v2 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup
  2016-07-21 16:39     ` Eric W. Biederman
@ 2016-07-21 16:40         ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Passing nsproxy into sysctl_table_root.lookup was a premature
optimization in attempt to avoid depending on current.  The
directory /proc/self/sys has not appeared and if and when
it does this code will need to be reviewed closely and reworked
anyway.  So remove the premature optimization.

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/proc/proc_sysctl.c  | 14 +++++++-------
 include/linux/sysctl.h |  3 +--
 net/sysctl_net.c       |  4 ++--
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5e57c3e46e1d..534630687489 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
 
 static void drop_sysctl_table(struct ctl_table_header *header);
 static int sysctl_follow_link(struct ctl_table_header **phead,
-	struct ctl_table **pentry, struct nsproxy *namespaces);
+	struct ctl_table **pentry);
 static int insert_links(struct ctl_table_header *head);
 static void put_links(struct ctl_table_header *header);
 
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
 }
 
 static struct ctl_table_set *
-lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+lookup_header_set(struct ctl_table_root *root)
 {
 	struct ctl_table_set *set = &root->default_set;
 	if (root->lookup)
-		set = root->lookup(root, namespaces);
+		set = root->lookup(root);
 	return set;
 }
 
@@ -491,7 +491,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	if (S_ISLNK(p->mode)) {
-		ret = sysctl_follow_link(&h, &p, current->nsproxy);
+		ret = sysctl_follow_link(&h, &p);
 		err = ERR_PTR(ret);
 		if (ret)
 			goto out;
@@ -659,7 +659,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
 
 	if (S_ISLNK(table->mode)) {
 		/* It is not an error if we can not follow the link ignore it */
-		int err = sysctl_follow_link(&head, &table, current->nsproxy);
+		int err = sysctl_follow_link(&head, &table);
 		if (err)
 			goto out;
 	}
@@ -976,7 +976,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
 }
 
 static int sysctl_follow_link(struct ctl_table_header **phead,
-	struct ctl_table **pentry, struct nsproxy *namespaces)
+	struct ctl_table **pentry)
 {
 	struct ctl_table_header *head;
 	struct ctl_table_root *root;
@@ -988,7 +988,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
 	ret = 0;
 	spin_lock(&sysctl_lock);
 	root = (*pentry)->data;
-	set = lookup_header_set(root, namespaces);
+	set = lookup_header_set(root);
 	dir = xlate_dir(set, (*phead)->parent);
 	if (IS_ERR(dir))
 		ret = PTR_ERR(dir);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index fa7bc29925c9..6385b331f2b9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -154,8 +154,7 @@ struct ctl_table_set {
 
 struct ctl_table_root {
 	struct ctl_table_set default_set;
-	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
-					   struct nsproxy *namespaces);
+	struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
 	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ed98c1fc3de1..2951f229a855 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -27,9 +27,9 @@
 #endif
 
 static struct ctl_table_set *
-net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
+net_ctl_header_lookup(struct ctl_table_root *root)
 {
-	return &namespaces->net_ns->sysctls;
+	return &current->nsproxy->net_ns->sysctls;
 }
 
 static int is_seen(struct ctl_table_set *set)
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup
@ 2016-07-21 16:40         ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Passing nsproxy into sysctl_table_root.lookup was a premature
optimization in attempt to avoid depending on current.  The
directory /proc/self/sys has not appeared and if and when
it does this code will need to be reviewed closely and reworked
anyway.  So remove the premature optimization.

Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/proc_sysctl.c  | 14 +++++++-------
 include/linux/sysctl.h |  3 +--
 net/sysctl_net.c       |  4 ++--
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5e57c3e46e1d..534630687489 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -72,7 +72,7 @@ static DEFINE_SPINLOCK(sysctl_lock);
 
 static void drop_sysctl_table(struct ctl_table_header *header);
 static int sysctl_follow_link(struct ctl_table_header **phead,
-	struct ctl_table **pentry, struct nsproxy *namespaces);
+	struct ctl_table **pentry);
 static int insert_links(struct ctl_table_header *head);
 static void put_links(struct ctl_table_header *header);
 
@@ -319,11 +319,11 @@ static void sysctl_head_finish(struct ctl_table_header *head)
 }
 
 static struct ctl_table_set *
-lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+lookup_header_set(struct ctl_table_root *root)
 {
 	struct ctl_table_set *set = &root->default_set;
 	if (root->lookup)
-		set = root->lookup(root, namespaces);
+		set = root->lookup(root);
 	return set;
 }
 
@@ -491,7 +491,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	if (S_ISLNK(p->mode)) {
-		ret = sysctl_follow_link(&h, &p, current->nsproxy);
+		ret = sysctl_follow_link(&h, &p);
 		err = ERR_PTR(ret);
 		if (ret)
 			goto out;
@@ -659,7 +659,7 @@ static bool proc_sys_link_fill_cache(struct file *file,
 
 	if (S_ISLNK(table->mode)) {
 		/* It is not an error if we can not follow the link ignore it */
-		int err = sysctl_follow_link(&head, &table, current->nsproxy);
+		int err = sysctl_follow_link(&head, &table);
 		if (err)
 			goto out;
 	}
@@ -976,7 +976,7 @@ static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
 }
 
 static int sysctl_follow_link(struct ctl_table_header **phead,
-	struct ctl_table **pentry, struct nsproxy *namespaces)
+	struct ctl_table **pentry)
 {
 	struct ctl_table_header *head;
 	struct ctl_table_root *root;
@@ -988,7 +988,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
 	ret = 0;
 	spin_lock(&sysctl_lock);
 	root = (*pentry)->data;
-	set = lookup_header_set(root, namespaces);
+	set = lookup_header_set(root);
 	dir = xlate_dir(set, (*phead)->parent);
 	if (IS_ERR(dir))
 		ret = PTR_ERR(dir);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index fa7bc29925c9..6385b331f2b9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -154,8 +154,7 @@ struct ctl_table_set {
 
 struct ctl_table_root {
 	struct ctl_table_set default_set;
-	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
-					   struct nsproxy *namespaces);
+	struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
 	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index ed98c1fc3de1..2951f229a855 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -27,9 +27,9 @@
 #endif
 
 static struct ctl_table_set *
-net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
+net_ctl_header_lookup(struct ctl_table_root *root)
 {
-	return &namespaces->net_ns->sysctls;
+	return &current->nsproxy->net_ns->sysctls;
 }
 
 static int is_seen(struct ctl_table_set *set)
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 02/10] userns: Add per user namespace sysctls.
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-21 16:40             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Limit per userns sysctls to only be opened for write by a holder
of CAP_SYS_RESOURCE.

Add all of the necessary boilerplate for having per user namespace
sysctls.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  4 ++
 kernel/user_namespace.c        | 96 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9217169c64cb..7d59af1f08f1 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -39,6 +39,10 @@ struct user_namespace {
 	struct key		*persistent_keyring_register;
 	struct rw_semaphore	persistent_keyring_register_sem;
 #endif
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_set	set;
+	struct ctl_table_header *sysctls;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 68f594212759..10afbb55dfc2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -30,6 +30,70 @@ static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_set *
+set_lookup(struct ctl_table_root *root)
+{
+	return &current_user_ns()->set;
+}
+
+static int set_is_seen(struct ctl_table_set *set)
+{
+	return &current_user_ns()->set == set;
+}
+
+static int set_permissions(struct ctl_table_header *head,
+				  struct ctl_table *table)
+{
+	struct user_namespace *user_ns =
+		container_of(head->set, struct user_namespace, set);
+	int mode;
+
+	/* Allow users with CAP_SYS_RESOURCE unrestrained access */
+	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+		mode = (table->mode & S_IRWXU) >> 6;
+	else
+	/* Allow all others at most read-only access */
+		mode = table->mode & S_IROTH;
+	return (mode << 6) | (mode << 3) | mode;
+}
+
+static struct ctl_table_root set_root = {
+	.lookup = set_lookup,
+	.permissions = set_permissions,
+};
+
+static struct ctl_table userns_table[] = {
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static bool setup_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	struct ctl_table *tbl;
+	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
+	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
+	if (tbl) {
+		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
+	}
+	if (!ns->sysctls) {
+		kfree(tbl);
+		retire_sysctl_set(&ns->set);
+		return false;
+	}
+#endif
+	return true;
+}
+
+static void retire_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(ns->sysctls);
+	retire_sysctl_set(&ns->set);
+#endif
+}
+
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 {
 	/* Start with the same capabilities as init but useless for doing
@@ -107,12 +171,22 @@ int create_user_ns(struct cred *new)
 	ns->flags = parent_ns->flags;
 	mutex_unlock(&userns_state_mutex);
 
-	set_cred_user_ns(new, ns);
-
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	init_rwsem(&ns->persistent_keyring_register_sem);
 #endif
+	ret = -ENOMEM;
+	if (!setup_userns_sysctls(ns))
+		goto fail_keyring;
+
+	set_cred_user_ns(new, ns);
 	return 0;
+fail_keyring:
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	key_put(ns->persistent_keyring_register);
+#endif
+	ns_free_inum(&ns->ns);
+	kmem_cache_free(user_ns_cachep, ns);
+	return ret;
 }
 
 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
@@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
 
 	do {
 		parent = ns->parent;
+		retire_userns_sysctls(ns);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif
@@ -1012,9 +1087,26 @@ const struct proc_ns_operations userns_operations = {
 	.install	= userns_install,
 };
 
+static __init void user_namespace_sysctl_init(void)
+{
+#ifdef CONFIG_SYSCTL
+	static struct ctl_table_header *userns_header;
+	static struct ctl_table empty[1];
+	/*
+	 * It is necessary to register the userns directory in the
+	 * default set so that registrations in the child sets work
+	 * properly.
+	 */
+	userns_header = register_sysctl("userns", empty);
+	BUG_ON(!userns_header);
+	BUG_ON(!setup_userns_sysctls(&init_user_ns));
+#endif
+}
+
 static __init int user_namespaces_init(void)
 {
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
+	user_namespace_sysctl_init();
 	return 0;
 }
 subsys_initcall(user_namespaces_init);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 02/10] userns: Add per user namespace sysctls.
@ 2016-07-21 16:40             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Limit per userns sysctls to only be opened for write by a holder
of CAP_SYS_RESOURCE.

Add all of the necessary boilerplate for having per user namespace
sysctls.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  4 ++
 kernel/user_namespace.c        | 96 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9217169c64cb..7d59af1f08f1 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -39,6 +39,10 @@ struct user_namespace {
 	struct key		*persistent_keyring_register;
 	struct rw_semaphore	persistent_keyring_register_sem;
 #endif
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_set	set;
+	struct ctl_table_header *sysctls;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 68f594212759..10afbb55dfc2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -30,6 +30,70 @@ static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_set *
+set_lookup(struct ctl_table_root *root)
+{
+	return &current_user_ns()->set;
+}
+
+static int set_is_seen(struct ctl_table_set *set)
+{
+	return &current_user_ns()->set == set;
+}
+
+static int set_permissions(struct ctl_table_header *head,
+				  struct ctl_table *table)
+{
+	struct user_namespace *user_ns =
+		container_of(head->set, struct user_namespace, set);
+	int mode;
+
+	/* Allow users with CAP_SYS_RESOURCE unrestrained access */
+	if (ns_capable(user_ns, CAP_SYS_RESOURCE))
+		mode = (table->mode & S_IRWXU) >> 6;
+	else
+	/* Allow all others at most read-only access */
+		mode = table->mode & S_IROTH;
+	return (mode << 6) | (mode << 3) | mode;
+}
+
+static struct ctl_table_root set_root = {
+	.lookup = set_lookup,
+	.permissions = set_permissions,
+};
+
+static struct ctl_table userns_table[] = {
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static bool setup_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	struct ctl_table *tbl;
+	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
+	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
+	if (tbl) {
+		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
+	}
+	if (!ns->sysctls) {
+		kfree(tbl);
+		retire_sysctl_set(&ns->set);
+		return false;
+	}
+#endif
+	return true;
+}
+
+static void retire_userns_sysctls(struct user_namespace *ns)
+{
+#ifdef CONFIG_SYSCTL
+	unregister_sysctl_table(ns->sysctls);
+	retire_sysctl_set(&ns->set);
+#endif
+}
+
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 {
 	/* Start with the same capabilities as init but useless for doing
@@ -107,12 +171,22 @@ int create_user_ns(struct cred *new)
 	ns->flags = parent_ns->flags;
 	mutex_unlock(&userns_state_mutex);
 
-	set_cred_user_ns(new, ns);
-
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	init_rwsem(&ns->persistent_keyring_register_sem);
 #endif
+	ret = -ENOMEM;
+	if (!setup_userns_sysctls(ns))
+		goto fail_keyring;
+
+	set_cred_user_ns(new, ns);
 	return 0;
+fail_keyring:
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+	key_put(ns->persistent_keyring_register);
+#endif
+	ns_free_inum(&ns->ns);
+	kmem_cache_free(user_ns_cachep, ns);
+	return ret;
 }
 
 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
@@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
 
 	do {
 		parent = ns->parent;
+		retire_userns_sysctls(ns);
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif
@@ -1012,9 +1087,26 @@ const struct proc_ns_operations userns_operations = {
 	.install	= userns_install,
 };
 
+static __init void user_namespace_sysctl_init(void)
+{
+#ifdef CONFIG_SYSCTL
+	static struct ctl_table_header *userns_header;
+	static struct ctl_table empty[1];
+	/*
+	 * It is necessary to register the userns directory in the
+	 * default set so that registrations in the child sets work
+	 * properly.
+	 */
+	userns_header = register_sysctl("userns", empty);
+	BUG_ON(!userns_header);
+	BUG_ON(!setup_userns_sysctls(&init_user_ns));
+#endif
+}
+
 static __init int user_namespaces_init(void)
 {
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
+	user_namespace_sysctl_init();
 	return 0;
 }
 subsys_initcall(user_namespaces_init);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 03/10] userns: Add a limit on the number of user namespaces
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-21 16:40             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Export the export the maximum number of user namespaces as
/proc/sys/userns/max_user_namespaces.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  2 ++
 kernel/fork.c                  |  2 ++
 kernel/user_namespace.c        | 69 +++++++++++++++++++++++++++++++++++++-----
 3 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7d59af1f08f1..ba6a995178f9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -43,6 +43,8 @@ struct user_namespace {
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
 #endif
+	int max_user_namespaces;
+	atomic_t user_namespaces;
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355aa97f..95d5498c463f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -323,6 +323,8 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
+
+	init_user_ns.max_user_namespaces = max_threads;
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 10afbb55dfc2..0061550e3282 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -29,6 +29,7 @@ static DEFINE_MUTEX(userns_state_mutex);
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
+#define COUNT_MAX (INT_MAX - 1)
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_set *
@@ -63,7 +64,18 @@ static struct ctl_table_root set_root = {
 	.permissions = set_permissions,
 };
 
+static int zero = 0;
+static int count_max = COUNT_MAX;
 static struct ctl_table userns_table[] = {
+	{
+		.procname	= "max_user_namespaces",
+		.data		= &init_user_ns.max_user_namespaces,
+		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &count_max,
+	},
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -75,6 +87,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
+		tbl[0].data = &ns->max_user_namespaces;
+
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -113,6 +127,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 	cred->user_ns = user_ns;
 }
 
+static bool inc_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos, *bad;
+	for (pos = ns; pos; pos = pos->parent) {
+		int max = READ_ONCE(pos->max_user_namespaces);
+		int sum = atomic_inc_return(&pos->user_namespaces);
+		if (sum > max)
+			goto fail;
+	}
+	return true;
+fail:
+	bad = pos;
+	atomic_dec(&pos->user_namespaces);
+	for (pos = ns; pos != bad; pos = pos->parent)
+		atomic_dec(&pos->user_namespaces);
+
+	return false;
+}
+
+static void dec_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos;
+	for (pos = ns; pos; pos = pos->parent) {
+		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		WARN_ON_ONCE(dec < 0);
+	}
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -128,8 +170,12 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	ret = -EUSERS;
 	if (parent_ns->level > 32)
-		return -EUSERS;
+		goto fail;
+
+	if (!inc_user_namespaces(parent_ns))
+		goto fail;
 
 	/*
 	 * Verify that we can not violate the policy of which files
@@ -137,26 +183,27 @@ int create_user_ns(struct cred *new)
 	 * by verifing that the root directory is at the root of the
 	 * mount namespace which allows all files to be accessed.
 	 */
+	ret = -EPERM;
 	if (current_chrooted())
-		return -EPERM;
+		goto fail_dec;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
 	 * created a user_namespace.
 	 */
+	ret = -EPERM;
 	if (!kuid_has_mapping(parent_ns, owner) ||
 	    !kgid_has_mapping(parent_ns, group))
-		return -EPERM;
+		goto fail_dec;
 
+	ret = -ENOMEM;
 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
 	if (!ns)
-		return -ENOMEM;
+		goto fail_dec;
 
 	ret = ns_alloc_inum(&ns->ns);
-	if (ret) {
-		kmem_cache_free(user_ns_cachep, ns);
-		return ret;
-	}
+	if (ret)
+		goto fail_free;
 	ns->ns.ops = &userns_operations;
 
 	atomic_set(&ns->count, 1);
@@ -165,6 +212,7 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
+	ns->max_user_namespaces = COUNT_MAX;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
@@ -185,7 +233,11 @@ fail_keyring:
 	key_put(ns->persistent_keyring_register);
 #endif
 	ns_free_inum(&ns->ns);
+fail_free:
 	kmem_cache_free(user_ns_cachep, ns);
+fail_dec:
+	dec_user_namespaces(parent_ns);
+fail:
 	return ret;
 }
 
@@ -221,6 +273,7 @@ void free_user_ns(struct user_namespace *ns)
 #endif
 		ns_free_inum(&ns->ns);
 		kmem_cache_free(user_ns_cachep, ns);
+		dec_user_namespaces(parent);
 		ns = parent;
 	} while (atomic_dec_and_test(&parent->count));
 }
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 03/10] userns: Add a limit on the number of user namespaces
@ 2016-07-21 16:40             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Export the export the maximum number of user namespaces as
/proc/sys/userns/max_user_namespaces.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  2 ++
 kernel/fork.c                  |  2 ++
 kernel/user_namespace.c        | 69 +++++++++++++++++++++++++++++++++++++-----
 3 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7d59af1f08f1..ba6a995178f9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -43,6 +43,8 @@ struct user_namespace {
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
 #endif
+	int max_user_namespaces;
+	atomic_t user_namespaces;
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355aa97f..95d5498c463f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -323,6 +323,8 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
+
+	init_user_ns.max_user_namespaces = max_threads;
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 10afbb55dfc2..0061550e3282 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -29,6 +29,7 @@ static DEFINE_MUTEX(userns_state_mutex);
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
+#define COUNT_MAX (INT_MAX - 1)
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_set *
@@ -63,7 +64,18 @@ static struct ctl_table_root set_root = {
 	.permissions = set_permissions,
 };
 
+static int zero = 0;
+static int count_max = COUNT_MAX;
 static struct ctl_table userns_table[] = {
+	{
+		.procname	= "max_user_namespaces",
+		.data		= &init_user_ns.max_user_namespaces,
+		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &count_max,
+	},
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -75,6 +87,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
+		tbl[0].data = &ns->max_user_namespaces;
+
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -113,6 +127,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 	cred->user_ns = user_ns;
 }
 
+static bool inc_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos, *bad;
+	for (pos = ns; pos; pos = pos->parent) {
+		int max = READ_ONCE(pos->max_user_namespaces);
+		int sum = atomic_inc_return(&pos->user_namespaces);
+		if (sum > max)
+			goto fail;
+	}
+	return true;
+fail:
+	bad = pos;
+	atomic_dec(&pos->user_namespaces);
+	for (pos = ns; pos != bad; pos = pos->parent)
+		atomic_dec(&pos->user_namespaces);
+
+	return false;
+}
+
+static void dec_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos;
+	for (pos = ns; pos; pos = pos->parent) {
+		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		WARN_ON_ONCE(dec < 0);
+	}
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -128,8 +170,12 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	ret = -EUSERS;
 	if (parent_ns->level > 32)
-		return -EUSERS;
+		goto fail;
+
+	if (!inc_user_namespaces(parent_ns))
+		goto fail;
 
 	/*
 	 * Verify that we can not violate the policy of which files
@@ -137,26 +183,27 @@ int create_user_ns(struct cred *new)
 	 * by verifing that the root directory is at the root of the
 	 * mount namespace which allows all files to be accessed.
 	 */
+	ret = -EPERM;
 	if (current_chrooted())
-		return -EPERM;
+		goto fail_dec;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
 	 * created a user_namespace.
 	 */
+	ret = -EPERM;
 	if (!kuid_has_mapping(parent_ns, owner) ||
 	    !kgid_has_mapping(parent_ns, group))
-		return -EPERM;
+		goto fail_dec;
 
+	ret = -ENOMEM;
 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
 	if (!ns)
-		return -ENOMEM;
+		goto fail_dec;
 
 	ret = ns_alloc_inum(&ns->ns);
-	if (ret) {
-		kmem_cache_free(user_ns_cachep, ns);
-		return ret;
-	}
+	if (ret)
+		goto fail_free;
 	ns->ns.ops = &userns_operations;
 
 	atomic_set(&ns->count, 1);
@@ -165,6 +212,7 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
+	ns->max_user_namespaces = COUNT_MAX;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
@@ -185,7 +233,11 @@ fail_keyring:
 	key_put(ns->persistent_keyring_register);
 #endif
 	ns_free_inum(&ns->ns);
+fail_free:
 	kmem_cache_free(user_ns_cachep, ns);
+fail_dec:
+	dec_user_namespaces(parent_ns);
+fail:
 	return ret;
 }
 
@@ -221,6 +273,7 @@ void free_user_ns(struct user_namespace *ns)
 #endif
 		ns_free_inum(&ns->ns);
 		kmem_cache_free(user_ns_cachep, ns);
+		dec_user_namespaces(parent);
 		ns = parent;
 	} while (atomic_dec_and_test(&parent->count));
 }
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 04/10] userns: Generalize the user namespace count into ucount
       [not found]         ` <20160721164014.17534-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-21 16:40             ` Eric W. Biederman
  2016-07-21 16:40             ` Eric W. Biederman
@ 2016-07-21 16:40           ` Eric W. Biederman
  2016-07-21 16:40             ` Eric W. Biederman
                             ` (5 subsequent siblings)
  8 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

The same kind of recursive sane default limit and policy
countrol that has been implemented for the user namespace
is desirable for the other namespaces, so generalize
the user namespace refernce count into a ucount.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h | 32 ++++++++++++++++++++++--
 kernel/fork.c                  |  5 +++-
 kernel/user_namespace.c        | 55 +++++++++++++++++++++++++++---------------
 3 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index ba6a995178f9..f74a0facc696 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -22,6 +22,16 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 
 #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
 
+enum ucounts {
+	UCOUNT_USER_NAMESPACES,
+	UCOUNT_COUNTS,
+};
+
+struct ucount {
+	int max;
+	atomic_t count;
+};
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -43,8 +53,7 @@ struct user_namespace {
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
 #endif
-	int max_user_namespaces;
-	atomic_t user_namespaces;
+	struct ucount ucount[UCOUNT_COUNTS];
 };
 
 extern struct user_namespace init_user_ns;
@@ -79,6 +88,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
 extern int proc_setgroups_show(struct seq_file *m, void *v);
 extern bool userns_may_setgroups(const struct user_namespace *ns);
 extern bool current_in_userns(const struct user_namespace *target_ns);
+extern bool inc_ucount(struct user_namespace *ns, enum ucounts type);
+extern void dec_ucount(struct user_namespace *ns, enum ucounts type);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -112,6 +123,23 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
 {
 	return true;
 }
+
+static inline bool inc_ucount(struct user_namespace *ns, enum ucounts type)
+{
+	int max = READ_ONCE(init_user_ns.ucount[type].max);
+	int sum = atomic_inc_return(&init_user_ns.ucount[type].count);
+	if (sum > max) {
+		atomic_dec(&init_user_ns.ucount[type].count);
+		return false;
+	}
+	return true;
+}
+
+static inline void dec_ucount(struct user_namespace *ns, enum ucounts type)
+{
+	int dec = atomic_dec_if_positive(&init_user_ns.ucount[type].count);
+	WARN_ON_ONCE(dec < 0);
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 95d5498c463f..30ca5eb4ca1e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,6 +304,7 @@ int arch_task_struct_size __read_mostly;
 
 void __init fork_init(void)
 {
+	int i;
 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
@@ -324,7 +325,9 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
 
-	init_user_ns.max_user_namespaces = max_threads;
+	for (i = 0; i < UCOUNT_COUNTS; i++) {
+		init_user_ns.ucount[i].max = max_threads;
+	}
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0061550e3282..728d7e4995ff 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -66,16 +66,17 @@ static struct ctl_table_root set_root = {
 
 static int zero = 0;
 static int count_max = COUNT_MAX;
+#define UCOUNT_ENTRY(name) 				\
+	{						\
+		.procname	= name,			\
+		.maxlen		= sizeof(int),		\
+		.mode		= 0644,			\
+		.proc_handler	= proc_dointvec_minmax,	\
+		.extra1		= &zero,		\
+		.extra2		= &count_max,		\
+	}
 static struct ctl_table userns_table[] = {
-	{
-		.procname	= "max_user_namespaces",
-		.data		= &init_user_ns.max_user_namespaces,
-		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &count_max,
-	},
+	UCOUNT_ENTRY("max_user_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -87,8 +88,10 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
-		tbl[0].data = &ns->max_user_namespaces;
-
+		int i;
+		for (i = 0; i < UCOUNT_COUNTS; i++) {
+			tbl[i].data = &ns->ucount[i].max;
+		}
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -127,34 +130,44 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 	cred->user_ns = user_ns;
 }
 
-static bool inc_user_namespaces(struct user_namespace *ns)
+bool inc_ucount(struct user_namespace *ns, enum ucounts type)
 {
 	struct user_namespace *pos, *bad;
 	for (pos = ns; pos; pos = pos->parent) {
-		int max = READ_ONCE(pos->max_user_namespaces);
-		int sum = atomic_inc_return(&pos->user_namespaces);
+		int max = READ_ONCE(pos->ucount[type].max);
+		int sum = atomic_inc_return(&pos->ucount[type].count);
 		if (sum > max)
 			goto fail;
 	}
 	return true;
 fail:
 	bad = pos;
-	atomic_dec(&pos->user_namespaces);
+	atomic_dec(&pos->ucount[type].count);
 	for (pos = ns; pos != bad; pos = pos->parent)
-		atomic_dec(&pos->user_namespaces);
+		atomic_dec(&pos->ucount[type].count);
 
 	return false;
 }
 
-static void dec_user_namespaces(struct user_namespace *ns)
+void dec_ucount(struct user_namespace *ns, enum ucounts type)
 {
 	struct user_namespace *pos;
 	for (pos = ns; pos; pos = pos->parent) {
-		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		int dec = atomic_dec_if_positive(&pos->ucount[type].count);
 		WARN_ON_ONCE(dec < 0);
 	}
 }
 
+static bool inc_user_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_USER_NAMESPACES);
+}
+
+static void dec_user_namespaces(struct user_namespace *ns)
+{
+	return dec_ucount(ns, UCOUNT_USER_NAMESPACES);
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -168,7 +181,7 @@ int create_user_ns(struct cred *new)
 	struct user_namespace *ns, *parent_ns = new->user_ns;
 	kuid_t owner = new->euid;
 	kgid_t group = new->egid;
-	int ret;
+	int ret, i;
 
 	ret = -EUSERS;
 	if (parent_ns->level > 32)
@@ -212,7 +225,9 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
-	ns->max_user_namespaces = COUNT_MAX;
+	for (i = 0; i < UCOUNT_COUNTS; i++) {
+		ns->ucount[i].max = COUNT_MAX;
+	}
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 04/10] userns: Generalize the user namespace count into ucount
  2016-07-21 16:40         ` Eric W. Biederman
  (?)
@ 2016-07-21 16:40         ` Eric W. Biederman
       [not found]           ` <20160721164014.17534-4-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2016-07-25 23:09             ` Serge E. Hallyn
  -1 siblings, 2 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

The same kind of recursive sane default limit and policy
countrol that has been implemented for the user namespace
is desirable for the other namespaces, so generalize
the user namespace refernce count into a ucount.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 32 ++++++++++++++++++++++--
 kernel/fork.c                  |  5 +++-
 kernel/user_namespace.c        | 55 +++++++++++++++++++++++++++---------------
 3 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index ba6a995178f9..f74a0facc696 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -22,6 +22,16 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 
 #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
 
+enum ucounts {
+	UCOUNT_USER_NAMESPACES,
+	UCOUNT_COUNTS,
+};
+
+struct ucount {
+	int max;
+	atomic_t count;
+};
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -43,8 +53,7 @@ struct user_namespace {
 	struct ctl_table_set	set;
 	struct ctl_table_header *sysctls;
 #endif
-	int max_user_namespaces;
-	atomic_t user_namespaces;
+	struct ucount ucount[UCOUNT_COUNTS];
 };
 
 extern struct user_namespace init_user_ns;
@@ -79,6 +88,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
 extern int proc_setgroups_show(struct seq_file *m, void *v);
 extern bool userns_may_setgroups(const struct user_namespace *ns);
 extern bool current_in_userns(const struct user_namespace *target_ns);
+extern bool inc_ucount(struct user_namespace *ns, enum ucounts type);
+extern void dec_ucount(struct user_namespace *ns, enum ucounts type);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -112,6 +123,23 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
 {
 	return true;
 }
+
+static inline bool inc_ucount(struct user_namespace *ns, enum ucounts type)
+{
+	int max = READ_ONCE(init_user_ns.ucount[type].max);
+	int sum = atomic_inc_return(&init_user_ns.ucount[type].count);
+	if (sum > max) {
+		atomic_dec(&init_user_ns.ucount[type].count);
+		return false;
+	}
+	return true;
+}
+
+static inline void dec_ucount(struct user_namespace *ns, enum ucounts type)
+{
+	int dec = atomic_dec_if_positive(&init_user_ns.ucount[type].count);
+	WARN_ON_ONCE(dec < 0);
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 95d5498c463f..30ca5eb4ca1e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,6 +304,7 @@ int arch_task_struct_size __read_mostly;
 
 void __init fork_init(void)
 {
+	int i;
 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
@@ -324,7 +325,9 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
 
-	init_user_ns.max_user_namespaces = max_threads;
+	for (i = 0; i < UCOUNT_COUNTS; i++) {
+		init_user_ns.ucount[i].max = max_threads;
+	}
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0061550e3282..728d7e4995ff 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -66,16 +66,17 @@ static struct ctl_table_root set_root = {
 
 static int zero = 0;
 static int count_max = COUNT_MAX;
+#define UCOUNT_ENTRY(name) 				\
+	{						\
+		.procname	= name,			\
+		.maxlen		= sizeof(int),		\
+		.mode		= 0644,			\
+		.proc_handler	= proc_dointvec_minmax,	\
+		.extra1		= &zero,		\
+		.extra2		= &count_max,		\
+	}
 static struct ctl_table userns_table[] = {
-	{
-		.procname	= "max_user_namespaces",
-		.data		= &init_user_ns.max_user_namespaces,
-		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &count_max,
-	},
+	UCOUNT_ENTRY("max_user_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -87,8 +88,10 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
-		tbl[0].data = &ns->max_user_namespaces;
-
+		int i;
+		for (i = 0; i < UCOUNT_COUNTS; i++) {
+			tbl[i].data = &ns->ucount[i].max;
+		}
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -127,34 +130,44 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 	cred->user_ns = user_ns;
 }
 
-static bool inc_user_namespaces(struct user_namespace *ns)
+bool inc_ucount(struct user_namespace *ns, enum ucounts type)
 {
 	struct user_namespace *pos, *bad;
 	for (pos = ns; pos; pos = pos->parent) {
-		int max = READ_ONCE(pos->max_user_namespaces);
-		int sum = atomic_inc_return(&pos->user_namespaces);
+		int max = READ_ONCE(pos->ucount[type].max);
+		int sum = atomic_inc_return(&pos->ucount[type].count);
 		if (sum > max)
 			goto fail;
 	}
 	return true;
 fail:
 	bad = pos;
-	atomic_dec(&pos->user_namespaces);
+	atomic_dec(&pos->ucount[type].count);
 	for (pos = ns; pos != bad; pos = pos->parent)
-		atomic_dec(&pos->user_namespaces);
+		atomic_dec(&pos->ucount[type].count);
 
 	return false;
 }
 
-static void dec_user_namespaces(struct user_namespace *ns)
+void dec_ucount(struct user_namespace *ns, enum ucounts type)
 {
 	struct user_namespace *pos;
 	for (pos = ns; pos; pos = pos->parent) {
-		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		int dec = atomic_dec_if_positive(&pos->ucount[type].count);
 		WARN_ON_ONCE(dec < 0);
 	}
 }
 
+static bool inc_user_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_USER_NAMESPACES);
+}
+
+static void dec_user_namespaces(struct user_namespace *ns)
+{
+	return dec_ucount(ns, UCOUNT_USER_NAMESPACES);
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -168,7 +181,7 @@ int create_user_ns(struct cred *new)
 	struct user_namespace *ns, *parent_ns = new->user_ns;
 	kuid_t owner = new->euid;
 	kgid_t group = new->egid;
-	int ret;
+	int ret, i;
 
 	ret = -EUSERS;
 	if (parent_ns->level > 32)
@@ -212,7 +225,9 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
-	ns->max_user_namespaces = COUNT_MAX;
+	for (i = 0; i < UCOUNT_COUNTS; i++) {
+		ns->ucount[i].max = COUNT_MAX;
+	}
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 05/10] pidns: Add a limit on the number of pid namespaces
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-21 16:40             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/pid_namespace.c         | 22 ++++++++++++++++++----
 kernel/user_namespace.c        |  1 +
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f74a0facc696..47733637741a 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -24,6 +24,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 
 enum ucounts {
 	UCOUNT_USER_NAMESPACES,
+	UCOUNT_PID_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..049cc14ae37a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -79,6 +79,16 @@ static void proc_cleanup_work(struct work_struct *work)
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
+static bool inc_pid_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_PID_NAMESPACES);
+}
+
+static void dec_pid_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_PID_NAMESPACES);
+}
+
 static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
 	struct pid_namespace *parent_pid_ns)
 {
@@ -87,15 +97,16 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	int i;
 	int err;
 
-	if (level > MAX_PID_NS_LEVEL) {
-		err = -EINVAL;
+	err = -EINVAL;
+	if (level > MAX_PID_NS_LEVEL)
+		goto out;
+	if (!inc_pid_namespaces(user_ns))
 		goto out;
-	}
 
 	err = -ENOMEM;
 	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
 	if (ns == NULL)
-		goto out;
+		goto out_dec;
 
 	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!ns->pidmap[0].page)
@@ -129,6 +140,8 @@ out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
 	kmem_cache_free(pid_ns_cachep, ns);
+out_dec:
+	dec_pid_namespaces(user_ns);
 out:
 	return ERR_PTR(err);
 }
@@ -146,6 +159,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
+	dec_pid_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 }
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 728d7e4995ff..02a03ead7afc 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -77,6 +77,7 @@ static int count_max = COUNT_MAX;
 	}
 static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
+	UCOUNT_ENTRY("max_pid_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 05/10] pidns: Add a limit on the number of pid namespaces
@ 2016-07-21 16:40             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  1 +
 kernel/pid_namespace.c         | 22 ++++++++++++++++++----
 kernel/user_namespace.c        |  1 +
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f74a0facc696..47733637741a 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -24,6 +24,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 
 enum ucounts {
 	UCOUNT_USER_NAMESPACES,
+	UCOUNT_PID_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..049cc14ae37a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -79,6 +79,16 @@ static void proc_cleanup_work(struct work_struct *work)
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
+static bool inc_pid_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_PID_NAMESPACES);
+}
+
+static void dec_pid_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_PID_NAMESPACES);
+}
+
 static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
 	struct pid_namespace *parent_pid_ns)
 {
@@ -87,15 +97,16 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	int i;
 	int err;
 
-	if (level > MAX_PID_NS_LEVEL) {
-		err = -EINVAL;
+	err = -EINVAL;
+	if (level > MAX_PID_NS_LEVEL)
+		goto out;
+	if (!inc_pid_namespaces(user_ns))
 		goto out;
-	}
 
 	err = -ENOMEM;
 	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
 	if (ns == NULL)
-		goto out;
+		goto out_dec;
 
 	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!ns->pidmap[0].page)
@@ -129,6 +140,8 @@ out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
 	kmem_cache_free(pid_ns_cachep, ns);
+out_dec:
+	dec_pid_namespaces(user_ns);
 out:
 	return ERR_PTR(err);
 }
@@ -146,6 +159,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
+	dec_pid_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	call_rcu(&ns->rcu, delayed_free_pidns);
 }
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 728d7e4995ff..02a03ead7afc 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -77,6 +77,7 @@ static int count_max = COUNT_MAX;
 	}
 static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
+	UCOUNT_ENTRY("max_pid_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces
       [not found]         ` <20160721164014.17534-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                             ` (3 preceding siblings ...)
  2016-07-21 16:40             ` Eric W. Biederman
@ 2016-07-21 16:40           ` Eric W. Biederman
  2016-07-21 16:40             ` Eric W. Biederman
                             ` (3 subsequent siblings)
  8 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 kernel/utsname.c               | 31 ++++++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 47733637741a..bed2506081fe 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 enum ucounts {
 	UCOUNT_USER_NAMESPACES,
 	UCOUNT_PID_NAMESPACES,
+	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 02a03ead7afc..6b205c24e888 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,6 +78,7 @@ static int count_max = COUNT_MAX;
 static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
 	UCOUNT_ENTRY("max_pid_namespaces"),
+	UCOUNT_ENTRY("max_uts_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 831ea7108232..4f13c0419d64 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,16 @@
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
+static bool inc_uts_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_UTS_NAMESPACES);
+}
+
+static void dec_uts_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_UTS_NAMESPACES);
+}
+
 static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
@@ -38,15 +48,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	struct uts_namespace *ns;
 	int err;
 
+	err = -ENFILE;
+	if (!inc_uts_namespaces(user_ns))
+		goto fail;
+
+	err = -ENOMEM;
 	ns = create_uts_ns();
 	if (!ns)
-		return ERR_PTR(-ENOMEM);
+		goto fail_dec;
 
 	err = ns_alloc_inum(&ns->ns);
-	if (err) {
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_free;
 
 	ns->ns.ops = &utsns_operations;
 
@@ -55,6 +68,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	ns->user_ns = get_user_ns(user_ns);
 	up_read(&uts_sem);
 	return ns;
+
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_uts_namespaces(user_ns);
+fail:
+	return ERR_PTR(err);
 }
 
 /*
@@ -85,6 +105,7 @@ void free_uts_ns(struct kref *kref)
 	struct uts_namespace *ns;
 
 	ns = container_of(kref, struct uts_namespace, kref);
+	dec_uts_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces
  2016-07-21 16:40         ` Eric W. Biederman
  (?)
  (?)
@ 2016-07-21 16:40         ` Eric W. Biederman
       [not found]           ` <20160721164014.17534-6-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  -1 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 kernel/utsname.c               | 31 ++++++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 47733637741a..bed2506081fe 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 enum ucounts {
 	UCOUNT_USER_NAMESPACES,
 	UCOUNT_PID_NAMESPACES,
+	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 02a03ead7afc..6b205c24e888 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,6 +78,7 @@ static int count_max = COUNT_MAX;
 static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
 	UCOUNT_ENTRY("max_pid_namespaces"),
+	UCOUNT_ENTRY("max_uts_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 831ea7108232..4f13c0419d64 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,16 @@
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
+static bool inc_uts_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_UTS_NAMESPACES);
+}
+
+static void dec_uts_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_UTS_NAMESPACES);
+}
+
 static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
@@ -38,15 +48,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	struct uts_namespace *ns;
 	int err;
 
+	err = -ENFILE;
+	if (!inc_uts_namespaces(user_ns))
+		goto fail;
+
+	err = -ENOMEM;
 	ns = create_uts_ns();
 	if (!ns)
-		return ERR_PTR(-ENOMEM);
+		goto fail_dec;
 
 	err = ns_alloc_inum(&ns->ns);
-	if (err) {
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_free;
 
 	ns->ns.ops = &utsns_operations;
 
@@ -55,6 +68,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	ns->user_ns = get_user_ns(user_ns);
 	up_read(&uts_sem);
 	return ns;
+
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_uts_namespaces(user_ns);
+fail:
+	return ERR_PTR(err);
 }
 
 /*
@@ -85,6 +105,7 @@ void free_uts_ns(struct kref *kref)
 	struct uts_namespace *ns;
 
 	ns = container_of(kref, struct uts_namespace, kref);
+	dec_uts_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 07/10] ipcns: Add a  limit on the number of ipc namespaces
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-21 16:40             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 ipc/namespace.c                | 42 +++++++++++++++++++++++++++++++-----------
 kernel/user_namespace.c        |  1 +
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index bed2506081fe..367cf08ff63d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,6 +26,7 @@ enum ucounts {
 	UCOUNT_USER_NAMESPACES,
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
+	UCOUNT_IPC_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 04cb07eb81f1..3996a1e41a1d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,33 +16,42 @@
 
 #include "util.h"
 
+static bool inc_ipc_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_IPC_NAMESPACES);
+}
+
+static void dec_ipc_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_IPC_NAMESPACES);
+}
+
 static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 					   struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
 	int err;
 
+	err = -ENFILE;
+	if (!inc_ipc_namespaces(user_ns))
+		goto fail;
+
+	err = -ENOMEM;
 	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
 	if (ns == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto fail_dec;
 
 	err = ns_alloc_inum(&ns->ns);
-	if (err) {
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_free;
 	ns->ns.ops = &ipcns_operations;
 
 	atomic_set(&ns->count, 1);
 	ns->user_ns = get_user_ns(user_ns);
 
 	err = mq_init_ns(ns);
-	if (err) {
-		put_user_ns(ns->user_ns);
-		ns_free_inum(&ns->ns);
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_put;
 	atomic_inc(&nr_ipc_ns);
 
 	sem_init_ns(ns);
@@ -50,6 +59,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	shm_init_ns(ns);
 
 	return ns;
+
+fail_put:
+	put_user_ns(ns->user_ns);
+	ns_free_inum(&ns->ns);
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_ipc_namespaces(user_ns);
+fail:
+	return ERR_PTR(err);
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -98,6 +117,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	shm_exit_ns(ns);
 	atomic_dec(&nr_ipc_ns);
 
+	dec_ipc_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b205c24e888..060d3e099f87 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -79,6 +79,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
+	UCOUNT_ENTRY("max_ipc_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 07/10] ipcns: Add a  limit on the number of ipc namespaces
@ 2016-07-21 16:40             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  1 +
 ipc/namespace.c                | 42 +++++++++++++++++++++++++++++++-----------
 kernel/user_namespace.c        |  1 +
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index bed2506081fe..367cf08ff63d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,6 +26,7 @@ enum ucounts {
 	UCOUNT_USER_NAMESPACES,
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
+	UCOUNT_IPC_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 04cb07eb81f1..3996a1e41a1d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,33 +16,42 @@
 
 #include "util.h"
 
+static bool inc_ipc_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_IPC_NAMESPACES);
+}
+
+static void dec_ipc_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_IPC_NAMESPACES);
+}
+
 static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 					   struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
 	int err;
 
+	err = -ENFILE;
+	if (!inc_ipc_namespaces(user_ns))
+		goto fail;
+
+	err = -ENOMEM;
 	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
 	if (ns == NULL)
-		return ERR_PTR(-ENOMEM);
+		goto fail_dec;
 
 	err = ns_alloc_inum(&ns->ns);
-	if (err) {
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_free;
 	ns->ns.ops = &ipcns_operations;
 
 	atomic_set(&ns->count, 1);
 	ns->user_ns = get_user_ns(user_ns);
 
 	err = mq_init_ns(ns);
-	if (err) {
-		put_user_ns(ns->user_ns);
-		ns_free_inum(&ns->ns);
-		kfree(ns);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail_put;
 	atomic_inc(&nr_ipc_ns);
 
 	sem_init_ns(ns);
@@ -50,6 +59,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	shm_init_ns(ns);
 
 	return ns;
+
+fail_put:
+	put_user_ns(ns->user_ns);
+	ns_free_inum(&ns->ns);
+fail_free:
+	kfree(ns);
+fail_dec:
+	dec_ipc_namespaces(user_ns);
+fail:
+	return ERR_PTR(err);
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -98,6 +117,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	shm_exit_ns(ns);
 	atomic_dec(&nr_ipc_ns);
 
+	dec_ipc_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b205c24e888..060d3e099f87 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -79,6 +79,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_user_namespaces"),
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
+	UCOUNT_ENTRY("max_ipc_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 08/10] cgroupns: Add a limit on the number of cgroup namespaces
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-21 16:40             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/cgroup.c                | 15 +++++++++++++++
 kernel/user_namespace.c        |  1 +
 3 files changed, 17 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 367cf08ff63d..1a3a9cb93277 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ enum ucounts {
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
+	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86cb5c6e8932..240eec43390b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6260,6 +6260,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 
 /* cgroup namespaces */
 
+static bool inc_cgroup_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
+}
+
+static void dec_cgroup_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
+}
+
 static struct cgroup_namespace *alloc_cgroup_ns(void)
 {
 	struct cgroup_namespace *new_ns;
@@ -6281,6 +6291,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 void free_cgroup_ns(struct cgroup_namespace *ns)
 {
 	put_css_set(ns->root_cset);
+	dec_cgroup_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
@@ -6305,6 +6316,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 
+	if (!inc_cgroup_namespaces(user_ns))
+		return ERR_PTR(-ENFILE);
+
 	mutex_lock(&cgroup_mutex);
 	spin_lock_bh(&css_set_lock);
 
@@ -6317,6 +6331,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 	new_ns = alloc_cgroup_ns();
 	if (IS_ERR(new_ns)) {
 		put_css_set(cset);
+		dec_cgroup_namespaces(user_ns);
 		return new_ns;
 	}
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 060d3e099f87..1cf074cb47e2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
+	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 08/10] cgroupns: Add a limit on the number of cgroup namespaces
@ 2016-07-21 16:40             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  1 +
 kernel/cgroup.c                | 15 +++++++++++++++
 kernel/user_namespace.c        |  1 +
 3 files changed, 17 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 367cf08ff63d..1a3a9cb93277 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ enum ucounts {
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
+	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86cb5c6e8932..240eec43390b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6260,6 +6260,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 
 /* cgroup namespaces */
 
+static bool inc_cgroup_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
+}
+
+static void dec_cgroup_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
+}
+
 static struct cgroup_namespace *alloc_cgroup_ns(void)
 {
 	struct cgroup_namespace *new_ns;
@@ -6281,6 +6291,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 void free_cgroup_ns(struct cgroup_namespace *ns)
 {
 	put_css_set(ns->root_cset);
+	dec_cgroup_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
 	kfree(ns);
@@ -6305,6 +6316,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 
+	if (!inc_cgroup_namespaces(user_ns))
+		return ERR_PTR(-ENFILE);
+
 	mutex_lock(&cgroup_mutex);
 	spin_lock_bh(&css_set_lock);
 
@@ -6317,6 +6331,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 	new_ns = alloc_cgroup_ns();
 	if (IS_ERR(new_ns)) {
 		put_css_set(cset);
+		dec_cgroup_namespaces(user_ns);
 		return new_ns;
 	}
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 060d3e099f87..1cf074cb47e2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
+	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-21 16:40             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 net/core/net_namespace.c       | 15 +++++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 1a3a9cb93277..f86afa536baf 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ enum ucounts {
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
+	UCOUNT_NET_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_COUNTS,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1cf074cb47e2..e326ca722ae0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
+	UCOUNT_ENTRY("max_net_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..a489f192d619 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
 	return peer;
 }
 
+static bool inc_net_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
+}
+
+static void dec_net_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_NET_NAMESPACES);
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	int error = 0;
 	LIST_HEAD(net_exit_list);
 
+	if (!inc_net_namespaces(user_ns))
+		return -ENFILE;
+
 	atomic_set(&net->count, 1);
 	atomic_set(&net->passive, 1);
 	net->dev_base_seq = 1;
@@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
 	}
 	mutex_unlock(&net_mutex);
 	if (rv < 0) {
+		dec_net_namespaces(user_ns);
 		put_user_ns(user_ns);
 		net_drop_ns(net);
 		return ERR_PTR(rv);
@@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
 	/* Finally it is safe to free my network namespace structure */
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 		list_del_init(&net->exit_list);
+		dec_net_namespaces(net->user_ns);
 		put_user_ns(net->user_ns);
 		net_drop_ns(net);
 	}
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
@ 2016-07-21 16:40             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 net/core/net_namespace.c       | 15 +++++++++++++++
 3 files changed, 17 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 1a3a9cb93277..f86afa536baf 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ enum ucounts {
 	UCOUNT_PID_NAMESPACES,
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
+	UCOUNT_NET_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_COUNTS,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1cf074cb47e2..e326ca722ae0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_pid_namespaces"),
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
+	UCOUNT_ENTRY("max_net_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b629b1..a489f192d619 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
 	return peer;
 }
 
+static bool inc_net_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
+}
+
+static void dec_net_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_NET_NAMESPACES);
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	int error = 0;
 	LIST_HEAD(net_exit_list);
 
+	if (!inc_net_namespaces(user_ns))
+		return -ENFILE;
+
 	atomic_set(&net->count, 1);
 	atomic_set(&net->passive, 1);
 	net->dev_base_seq = 1;
@@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
 	}
 	mutex_unlock(&net_mutex);
 	if (rv < 0) {
+		dec_net_namespaces(user_ns);
 		put_user_ns(user_ns);
 		net_drop_ns(net);
 		return ERR_PTR(rv);
@@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
 	/* Finally it is safe to free my network namespace structure */
 	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 		list_del_init(&net->exit_list);
+		dec_net_namespaces(net->user_ns);
 		put_user_ns(net->user_ns);
 		net_drop_ns(net);
 	}
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 10/10] mntns: Add a limit on the number of mount namespaces.
       [not found]         ` <20160721164014.17534-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
                             ` (7 preceding siblings ...)
  2016-07-21 16:40             ` Eric W. Biederman
@ 2016-07-21 16:40           ` Eric W. Biederman
  8 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/namespace.c                 | 19 ++++++++++++++++++-
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index aabe8e397fc3..3942ae6c34f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2718,9 +2718,20 @@ dput_out:
 	return retval;
 }
 
+static bool inc_mnt_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_MNT_NAMESPACES);
+}
+
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
 	ns_free_inum(&ns->ns);
+	dec_mnt_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2739,12 +2750,18 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	struct mnt_namespace *new_ns;
 	int ret;
 
+	if (!inc_mnt_namespaces(user_ns))
+		return ERR_PTR(-ENFILE);
+
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-	if (!new_ns)
+	if (!new_ns) {
+		dec_mnt_namespaces(user_ns);
 		return ERR_PTR(-ENOMEM);
+	}
 	ret = ns_alloc_inum(&new_ns->ns);
 	if (ret) {
 		kfree(new_ns);
+		dec_mnt_namespaces(user_ns);
 		return ERR_PTR(ret);
 	}
 	new_ns->ns.ops = &mntns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f86afa536baf..ee0e9d7b2e67 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -28,6 +28,7 @@ enum ucounts {
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
 	UCOUNT_NET_NAMESPACES,
+	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_COUNTS,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e326ca722ae0..f6702d84afab 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -81,6 +81,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
 	UCOUNT_ENTRY("max_net_namespaces"),
+	UCOUNT_ENTRY("max_mnt_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* [PATCH v2 10/10] mntns: Add a limit on the number of mount namespaces.
  2016-07-21 16:40         ` Eric W. Biederman
                           ` (3 preceding siblings ...)
  (?)
@ 2016-07-21 16:40         ` Eric W. Biederman
       [not found]           ` <20160721164014.17534-10-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  -1 siblings, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:40 UTC (permalink / raw)
  To: Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api, Eric W. Biederman

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c                 | 19 ++++++++++++++++++-
 include/linux/user_namespace.h |  1 +
 kernel/user_namespace.c        |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index aabe8e397fc3..3942ae6c34f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2718,9 +2718,20 @@ dput_out:
 	return retval;
 }
 
+static bool inc_mnt_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct user_namespace *ns)
+{
+	dec_ucount(ns, UCOUNT_MNT_NAMESPACES);
+}
+
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
 	ns_free_inum(&ns->ns);
+	dec_mnt_namespaces(ns->user_ns);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2739,12 +2750,18 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	struct mnt_namespace *new_ns;
 	int ret;
 
+	if (!inc_mnt_namespaces(user_ns))
+		return ERR_PTR(-ENFILE);
+
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-	if (!new_ns)
+	if (!new_ns) {
+		dec_mnt_namespaces(user_ns);
 		return ERR_PTR(-ENOMEM);
+	}
 	ret = ns_alloc_inum(&new_ns->ns);
 	if (ret) {
 		kfree(new_ns);
+		dec_mnt_namespaces(user_ns);
 		return ERR_PTR(ret);
 	}
 	new_ns->ns.ops = &mntns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f86afa536baf..ee0e9d7b2e67 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -28,6 +28,7 @@ enum ucounts {
 	UCOUNT_UTS_NAMESPACES,
 	UCOUNT_IPC_NAMESPACES,
 	UCOUNT_NET_NAMESPACES,
+	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_COUNTS,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e326ca722ae0..f6702d84afab 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -81,6 +81,7 @@ static struct ctl_table userns_table[] = {
 	UCOUNT_ENTRY("max_uts_namespaces"),
 	UCOUNT_ENTRY("max_ipc_namespaces"),
 	UCOUNT_ENTRY("max_net_namespaces"),
+	UCOUNT_ENTRY("max_mnt_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	{ }
 };
-- 
2.8.3

^ permalink raw reply related	[flat|nested] 113+ messages in thread

* Re: [PATCH 0/10] userns: sysctl limits for namespaces
       [not found]     ` <CAGXu5jKWjHKxtPSGuogjR+XE+SbNWiUp2bOxugpD+hUex9QeZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2016-07-21 14:49       ` Eric W. Biederman
@ 2016-07-21 16:58       ` Eric W. Biederman
       [not found]         ` <871t2n53o5.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
  1 sibling, 1 reply; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-21 16:58 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linux Containers, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Jann Horn

Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> writes:

> On Tue, Jul 19, 2016 at 6:13 PM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>>
>> The maximum sane case I can imagine is if every process is a fat
>> process, so I set the maximum number of namespaces to the maximum
>> number of threads.
>>
>> I make these limits recursive and per user namespace so that a
>> usernamespace root can reduce the limits further.  If a user namespace
>> root raises the limit the limit in the parent namespace will be honored.
>>
>> I have cut this implementation to the bare minimum needed to achieve
>> these objections.
>>
>> Assuming nothing problematic shows up in the review I will add these to
>> my user namespace tree.
>
> This looks great; thank you! I think the design is effective. One
> thought that pops to mind is how does an admin query the current
> number of active namespaces of a given type? (It's likely this is
> already exposed somewhere and I just don't know where to look...)

You want to give me your acked by on the patches?

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH 0/10] userns: sysctl limits for namespaces
       [not found]         ` <871t2n53o5.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-21 18:46           ` Kees Cook
  0 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-21 18:46 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Seth Forshee, Nikolay Borisov,
	Jann Horn

On Thu, Jul 21, 2016 at 9:58 AM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> writes:
>
>> On Tue, Jul 19, 2016 at 6:13 PM, Eric W. Biederman
>> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>>
>>> This patchset addresses two use cases:
>>> - Implement a sane upper bound on the number of namespaces.
>>> - Provide a way for sandboxes to limit the attack surface from
>>>   namespaces.
>>>
>>> The maximum sane case I can imagine is if every process is a fat
>>> process, so I set the maximum number of namespaces to the maximum
>>> number of threads.
>>>
>>> I make these limits recursive and per user namespace so that a
>>> usernamespace root can reduce the limits further.  If a user namespace
>>> root raises the limit the limit in the parent namespace will be honored.
>>>
>>> I have cut this implementation to the bare minimum needed to achieve
>>> these objections.
>>>
>>> Assuming nothing problematic shows up in the review I will add these to
>>> my user namespace tree.
>>
>> This looks great; thank you! I think the design is effective. One
>> thought that pops to mind is how does an admin query the current
>> number of active namespaces of a given type? (It's likely this is
>> already exposed somewhere and I just don't know where to look...)
>
> You want to give me your acked by on the patches?

Sure thing, consider the series:

Acked-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]     ` <87d1m754jc.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-22 13:33       ` Colin Walters
  2016-07-26 10:27         ` Michael Kerrisk (man-pages)
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 113+ messages in thread
From: Colin Walters @ 2016-07-22 13:33 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
> 
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.

Perhaps this is obvious, but since you didn't quite explicitly state it;
do you see this as obsoleting the existing downstream patches
mentioned in:
https://lwn.net/Articles/673597/
It seems conceptually similar to Kees' original approach, right?

The high level makes sense to me...most interesting is
per-userns sysctls.  I'll note most current container managers
mount /proc/sys read-only, and Docker specifically drops
CAP_SYS_RESOURCE by default, so they'd likely need to learn
how to undo that if one wanted to support recursive container usage.
We'd probably need to evaluate the safety of having /proc/sys
writable generally.  (Also it's rather common to filter out CLONE_NEWUSER
via seccomp, but that's easy to undo)

But that's the flip side - if we're aiming primarily for an upstreamable
way to *limit* namespace usage, it seems sane to me.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-21 16:39     ` Eric W. Biederman
  (?)
@ 2016-07-22 13:33     ` Colin Walters
       [not found]       ` <1469194399.3817016.673814953.7581706C-2RFepEojUI2N1INw9kWLP6GC3tUn3ZHUQQ4Iyu8u01E@public.gmane.org>
  -1 siblings, 1 reply; 113+ messages in thread
From: Colin Walters @ 2016-07-22 13:33 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: Andy Lutomirski, Jann Horn, Kees Cook, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel, netdev,
	linux-kernel, linux-api

On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
> 
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.

Perhaps this is obvious, but since you didn't quite explicitly state it;
do you see this as obsoleting the existing downstream patches
mentioned in:
https://lwn.net/Articles/673597/
It seems conceptually similar to Kees' original approach, right?

The high level makes sense to me...most interesting is
per-userns sysctls.  I'll note most current container managers
mount /proc/sys read-only, and Docker specifically drops
CAP_SYS_RESOURCE by default, so they'd likely need to learn
how to undo that if one wanted to support recursive container usage.
We'd probably need to evaluate the safety of having /proc/sys
writable generally.  (Also it's rather common to filter out CLONE_NEWUSER
via seccomp, but that's easy to undo)

But that's the flip side - if we're aiming primarily for an upstreamable
way to *limit* namespace usage, it seems sane to me.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-22 13:33     ` Colin Walters
@ 2016-07-22 18:45           ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-22 18:45 UTC (permalink / raw)
  To: Colin Walters
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Colin Walters <walters-gPq2gbYjIk8dnm+yROfE0A@public.gmane.org> writes:

> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>> 
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>
> Perhaps this is obvious, but since you didn't quite explicitly state it;
> do you see this as obsoleting the existing downstream patches
> mentioned in:
> https://lwn.net/Articles/673597/
> It seems conceptually similar to Kees' original approach, right?

Similar yes, and I expect it fills the need.  My primary difference is
that I believe this approach makes sense from a perspective of assuming
that user namespaces or other namespaces are not any buggier than any
other piece of kernel code and that people will use them.

I don't see these limits making sense from a perspective that user
namespaces are flawed and distro kernels should not have enabled them in
the first place.  That was my perception right or wrong of Kees patches
and the related patches that landed in Ubuntu and Debian.

With Kees approach I could not see how to handle the case where some
applications on the system wanted user namespaces and others don't.
Which made it very nasty for future evolution and more deployment of
user namespaces.  Being per user namespace these limits can be used to
sandbox applications without affecting the rest of the system.

> The high level makes sense to me...most interesting is
> per-userns sysctls.  I'll note most current container managers
> mount /proc/sys read-only, and Docker specifically drops
> CAP_SYS_RESOURCE by default, so they'd likely need to learn
> how to undo that if one wanted to support recursive container usage.
> We'd probably need to evaluate the safety of having /proc/sys
> writable generally.  (Also it's rather common to filter out CLONE_NEWUSER
> via seccomp, but that's easy to undo)

Just using a user namespace replaces most of those precautions.

> But that's the flip side - if we're aiming primarily for an upstreamable
> way to *limit* namespace usage, it seems sane to me.

Yes.  The primary target is to stop applications that have gone buggy
and allocated a crazy number of namespaces.  The secondary target
is to allow sandboxes to disable creation of additional namespaces.
Just set the limit to 0 and drop caps, or similarly set the limit
to 1 and create another fresh set of nested namespaces.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-22 18:45           ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-22 18:45 UTC (permalink / raw)
  To: Colin Walters
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Colin Walters <walters@verbum.org> writes:

> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>> 
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>
> Perhaps this is obvious, but since you didn't quite explicitly state it;
> do you see this as obsoleting the existing downstream patches
> mentioned in:
> https://lwn.net/Articles/673597/
> It seems conceptually similar to Kees' original approach, right?

Similar yes, and I expect it fills the need.  My primary difference is
that I believe this approach makes sense from a perspective of assuming
that user namespaces or other namespaces are not any buggier than any
other piece of kernel code and that people will use them.

I don't see these limits making sense from a perspective that user
namespaces are flawed and distro kernels should not have enabled them in
the first place.  That was my perception right or wrong of Kees patches
and the related patches that landed in Ubuntu and Debian.

With Kees approach I could not see how to handle the case where some
applications on the system wanted user namespaces and others don't.
Which made it very nasty for future evolution and more deployment of
user namespaces.  Being per user namespace these limits can be used to
sandbox applications without affecting the rest of the system.

> The high level makes sense to me...most interesting is
> per-userns sysctls.  I'll note most current container managers
> mount /proc/sys read-only, and Docker specifically drops
> CAP_SYS_RESOURCE by default, so they'd likely need to learn
> how to undo that if one wanted to support recursive container usage.
> We'd probably need to evaluate the safety of having /proc/sys
> writable generally.  (Also it's rather common to filter out CLONE_NEWUSER
> via seccomp, but that's easy to undo)

Just using a user namespace replaces most of those precautions.

> But that's the flip side - if we're aiming primarily for an upstreamable
> way to *limit* namespace usage, it seems sane to me.

Yes.  The primary target is to stop applications that have gone buggy
and allocated a crazy number of namespaces.  The secondary target
is to allow sandboxes to disable creation of additional namespaces.
Just set the limit to 0 and drop caps, or similarly set the limit
to 1 and create another fresh set of nested namespaces.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]           ` <87poq5y0jw.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-22 21:46             ` Kees Cook
  0 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-22 21:46 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Colin Walters, Network Development, Linux Containers, LKML,
	Andy Lutomirski, Seth Forshee, Nikolay Borisov, Linux API,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

On Fri, Jul 22, 2016 at 11:45 AM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> Colin Walters <walters-gPq2gbYjIk8dnm+yROfE0A@public.gmane.org> writes:
>
>> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>>>
>>> This patchset addresses two use cases:
>>> - Implement a sane upper bound on the number of namespaces.
>>> - Provide a way for sandboxes to limit the attack surface from
>>>   namespaces.
>>
>> Perhaps this is obvious, but since you didn't quite explicitly state it;
>> do you see this as obsoleting the existing downstream patches
>> mentioned in:
>> https://lwn.net/Articles/673597/
>> It seems conceptually similar to Kees' original approach, right?
>
> Similar yes, and I expect it fills the need.  My primary difference is
> that I believe this approach makes sense from a perspective of assuming
> that user namespaces or other namespaces are not any buggier than any
> other piece of kernel code and that people will use them.
>
> I don't see these limits making sense from a perspective that user
> namespaces are flawed and distro kernels should not have enabled them in
> the first place.  That was my perception right or wrong of Kees patches
> and the related patches that landed in Ubuntu and Debian.
>
> With Kees approach I could not see how to handle the case where some
> applications on the system wanted user namespaces and others don't.
> Which made it very nasty for future evolution and more deployment of
> user namespaces.  Being per user namespace these limits can be used to
> sandbox applications without affecting the rest of the system.

While it certainly works for my use-case (init ns
max_usernamespaces=0), I don't see how this helps the case of "let
user foobar open 1 userns, but everyone else is 0", which is likely
the middle ground between "just turn it off" and "everyone gets to
create usernamespaces". I'm personally not interested in that level of
granularity, but in earlier discussions it sounded like this was
something you wanted?

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-22 18:45           ` Eric W. Biederman
  (?)
@ 2016-07-22 21:46           ` Kees Cook
  2016-07-23  2:11               ` Eric W. Biederman
       [not found]             ` <CAGXu5j+X7eWggkwpBpABsFe4hqK5LN1mYJ2TH91qj3iSe6rtcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  -1 siblings, 2 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-22 21:46 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Colin Walters, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	Network Development, LKML, Linux API

On Fri, Jul 22, 2016 at 11:45 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Colin Walters <walters@verbum.org> writes:
>
>> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>>>
>>> This patchset addresses two use cases:
>>> - Implement a sane upper bound on the number of namespaces.
>>> - Provide a way for sandboxes to limit the attack surface from
>>>   namespaces.
>>
>> Perhaps this is obvious, but since you didn't quite explicitly state it;
>> do you see this as obsoleting the existing downstream patches
>> mentioned in:
>> https://lwn.net/Articles/673597/
>> It seems conceptually similar to Kees' original approach, right?
>
> Similar yes, and I expect it fills the need.  My primary difference is
> that I believe this approach makes sense from a perspective of assuming
> that user namespaces or other namespaces are not any buggier than any
> other piece of kernel code and that people will use them.
>
> I don't see these limits making sense from a perspective that user
> namespaces are flawed and distro kernels should not have enabled them in
> the first place.  That was my perception right or wrong of Kees patches
> and the related patches that landed in Ubuntu and Debian.
>
> With Kees approach I could not see how to handle the case where some
> applications on the system wanted user namespaces and others don't.
> Which made it very nasty for future evolution and more deployment of
> user namespaces.  Being per user namespace these limits can be used to
> sandbox applications without affecting the rest of the system.

While it certainly works for my use-case (init ns
max_usernamespaces=0), I don't see how this helps the case of "let
user foobar open 1 userns, but everyone else is 0", which is likely
the middle ground between "just turn it off" and "everyone gets to
create usernamespaces". I'm personally not interested in that level of
granularity, but in earlier discussions it sounded like this was
something you wanted?

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]             ` <CAGXu5j+X7eWggkwpBpABsFe4hqK5LN1mYJ2TH91qj3iSe6rtcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-07-23  2:11               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-23  2:11 UTC (permalink / raw)
  To: Kees Cook
  Cc: Colin Walters, Network Development, Linux Containers, LKML,
	Andy Lutomirski, Seth Forshee, Nikolay Borisov, Linux API,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> writes:

> On Fri, Jul 22, 2016 at 11:45 AM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> Colin Walters <walters-gPq2gbYjIk8dnm+yROfE0A@public.gmane.org> writes:
>>
>>> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Perhaps this is obvious, but since you didn't quite explicitly state it;
>>> do you see this as obsoleting the existing downstream patches
>>> mentioned in:
>>> https://lwn.net/Articles/673597/
>>> It seems conceptually similar to Kees' original approach, right?
>>
>> Similar yes, and I expect it fills the need.  My primary difference is
>> that I believe this approach makes sense from a perspective of assuming
>> that user namespaces or other namespaces are not any buggier than any
>> other piece of kernel code and that people will use them.
>>
>> I don't see these limits making sense from a perspective that user
>> namespaces are flawed and distro kernels should not have enabled them in
>> the first place.  That was my perception right or wrong of Kees patches
>> and the related patches that landed in Ubuntu and Debian.
>>
>> With Kees approach I could not see how to handle the case where some
>> applications on the system wanted user namespaces and others don't.
>> Which made it very nasty for future evolution and more deployment of
>> user namespaces.  Being per user namespace these limits can be used to
>> sandbox applications without affecting the rest of the system.
>
> While it certainly works for my use-case (init ns
> max_usernamespaces=0), I don't see how this helps the case of "let
> user foobar open 1 userns, but everyone else is 0", which is likely
> the middle ground between "just turn it off" and "everyone gets to
> create usernamespaces". I'm personally not interested in that level of
> granularity, but in earlier discussions it sounded like this was
> something you wanted?

So the case I really care about is when there is limited use, so people
don't have to redesign their applications.  In this case if you want to
disable things in a sandbox like way you just create a user namespace
and set the count to 0 in that user namespace.

A whole system disable I tend to think is a stupid configuration for a
new system.  It gets into people negotiating for what they need, and I
don't see that as sustainable.  I prefer good usable defaults.

I would have loved to have done something with per user limits so it
could be disabled for a selection of users, but it turns out the kernel
doesn't have appropriate data structures for to hold limits for users
that have not logged in.

And in practice I don't care the case where 1 user is allowed but not
the others, I care about disallow this user/program that is in a
sandbox.  I also seem to recall people have problems with using seccomp
to disable things.  All of that said a per user policy is easily
implemented in pam by setting the size count for a specific user to 0.

I do think a limit to catch applications that go crazy is very sane, and
that is primarily what is implemented here.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]             ` <CAGXu5j+X7eWggkwpBpABsFe4hqK5LN1mYJ2TH91qj3iSe6rtcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2016-07-23  2:11               ` Eric W. Biederman
@ 2016-07-23  2:11               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-23  2:11 UTC (permalink / raw)
  To: Kees Cook
  Cc: Colin Walters, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	Network Development, LKML, Linux API

Kees Cook <keescook@chromium.org> writes:

> On Fri, Jul 22, 2016 at 11:45 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> Colin Walters <walters@verbum.org> writes:
>>
>>> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Perhaps this is obvious, but since you didn't quite explicitly state it;
>>> do you see this as obsoleting the existing downstream patches
>>> mentioned in:
>>> https://lwn.net/Articles/673597/
>>> It seems conceptually similar to Kees' original approach, right?
>>
>> Similar yes, and I expect it fills the need.  My primary difference is
>> that I believe this approach makes sense from a perspective of assuming
>> that user namespaces or other namespaces are not any buggier than any
>> other piece of kernel code and that people will use them.
>>
>> I don't see these limits making sense from a perspective that user
>> namespaces are flawed and distro kernels should not have enabled them in
>> the first place.  That was my perception right or wrong of Kees patches
>> and the related patches that landed in Ubuntu and Debian.
>>
>> With Kees approach I could not see how to handle the case where some
>> applications on the system wanted user namespaces and others don't.
>> Which made it very nasty for future evolution and more deployment of
>> user namespaces.  Being per user namespace these limits can be used to
>> sandbox applications without affecting the rest of the system.
>
> While it certainly works for my use-case (init ns
> max_usernamespaces=0), I don't see how this helps the case of "let
> user foobar open 1 userns, but everyone else is 0", which is likely
> the middle ground between "just turn it off" and "everyone gets to
> create usernamespaces". I'm personally not interested in that level of
> granularity, but in earlier discussions it sounded like this was
> something you wanted?

So the case I really care about is when there is limited use, so people
don't have to redesign their applications.  In this case if you want to
disable things in a sandbox like way you just create a user namespace
and set the count to 0 in that user namespace.

A whole system disable I tend to think is a stupid configuration for a
new system.  It gets into people negotiating for what they need, and I
don't see that as sustainable.  I prefer good usable defaults.

I would have loved to have done something with per user limits so it
could be disabled for a selection of users, but it turns out the kernel
doesn't have appropriate data structures for to hold limits for users
that have not logged in.

And in practice I don't care the case where 1 user is allowed but not
the others, I care about disallow this user/program that is in a
sandbox.  I also seem to recall people have problems with using seccomp
to disable things.  All of that said a per user policy is easily
implemented in pam by setting the size count for a specific user to 0.

I do think a limit to catch applications that go crazy is very sane, and
that is primarily what is implemented here.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-23  2:11               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-23  2:11 UTC (permalink / raw)
  To: Kees Cook
  Cc: Colin Walters, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel@vger.kernel.org, Network Development, LKML,
	Linux API

Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> writes:

> On Fri, Jul 22, 2016 at 11:45 AM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> Colin Walters <walters-gPq2gbYjIk8dnm+yROfE0A@public.gmane.org> writes:
>>
>>> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Perhaps this is obvious, but since you didn't quite explicitly state it;
>>> do you see this as obsoleting the existing downstream patches
>>> mentioned in:
>>> https://lwn.net/Articles/673597/
>>> It seems conceptually similar to Kees' original approach, right?
>>
>> Similar yes, and I expect it fills the need.  My primary difference is
>> that I believe this approach makes sense from a perspective of assuming
>> that user namespaces or other namespaces are not any buggier than any
>> other piece of kernel code and that people will use them.
>>
>> I don't see these limits making sense from a perspective that user
>> namespaces are flawed and distro kernels should not have enabled them in
>> the first place.  That was my perception right or wrong of Kees patches
>> and the related patches that landed in Ubuntu and Debian.
>>
>> With Kees approach I could not see how to handle the case where some
>> applications on the system wanted user namespaces and others don't.
>> Which made it very nasty for future evolution and more deployment of
>> user namespaces.  Being per user namespace these limits can be used to
>> sandbox applications without affecting the rest of the system.
>
> While it certainly works for my use-case (init ns
> max_usernamespaces=0), I don't see how this helps the case of "let
> user foobar open 1 userns, but everyone else is 0", which is likely
> the middle ground between "just turn it off" and "everyone gets to
> create usernamespaces". I'm personally not interested in that level of
> granularity, but in earlier discussions it sounded like this was
> something you wanted?

So the case I really care about is when there is limited use, so people
don't have to redesign their applications.  In this case if you want to
disable things in a sandbox like way you just create a user namespace
and set the count to 0 in that user namespace.

A whole system disable I tend to think is a stupid configuration for a
new system.  It gets into people negotiating for what they need, and I
don't see that as sustainable.  I prefer good usable defaults.

I would have loved to have done something with per user limits so it
could be disabled for a selection of users, but it turns out the kernel
doesn't have appropriate data structures for to hold limits for users
that have not logged in.

And in practice I don't care the case where 1 user is allowed but not
the others, I care about disallow this user/program that is in a
sandbox.  I also seem to recall people have problems with using seccomp
to disable things.  All of that said a per user policy is easily
implemented in pam by setting the size count for a specific user to 0.

I do think a limit to catch applications that go crazy is very sane, and
that is primarily what is implemented here.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-23  2:11               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-23  2:11 UTC (permalink / raw)
  To: Kees Cook
  Cc: Colin Walters, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel@vger.kernel.org, Network Development, LKML,
	Linux API

Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> writes:

> On Fri, Jul 22, 2016 at 11:45 AM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> Colin Walters <walters-gPq2gbYjIk8dnm+yROfE0A@public.gmane.org> writes:
>>
>>> On Thu, Jul 21, 2016, at 12:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Perhaps this is obvious, but since you didn't quite explicitly state it;
>>> do you see this as obsoleting the existing downstream patches
>>> mentioned in:
>>> https://lwn.net/Articles/673597/
>>> It seems conceptually similar to Kees' original approach, right?
>>
>> Similar yes, and I expect it fills the need.  My primary difference is
>> that I believe this approach makes sense from a perspective of assuming
>> that user namespaces or other namespaces are not any buggier than any
>> other piece of kernel code and that people will use them.
>>
>> I don't see these limits making sense from a perspective that user
>> namespaces are flawed and distro kernels should not have enabled them in
>> the first place.  That was my perception right or wrong of Kees patches
>> and the related patches that landed in Ubuntu and Debian.
>>
>> With Kees approach I could not see how to handle the case where some
>> applications on the system wanted user namespaces and others don't.
>> Which made it very nasty for future evolution and more deployment of
>> user namespaces.  Being per user namespace these limits can be used to
>> sandbox applications without affecting the rest of the system.
>
> While it certainly works for my use-case (init ns
> max_usernamespaces=0), I don't see how this helps the case of "let
> user foobar open 1 userns, but everyone else is 0", which is likely
> the middle ground between "just turn it off" and "everyone gets to
> create usernamespaces". I'm personally not interested in that level of
> granularity, but in earlier discussions it sounded like this was
> something you wanted?

So the case I really care about is when there is limited use, so people
don't have to redesign their applications.  In this case if you want to
disable things in a sandbox like way you just create a user namespace
and set the count to 0 in that user namespace.

A whole system disable I tend to think is a stupid configuration for a
new system.  It gets into people negotiating for what they need, and I
don't see that as sustainable.  I prefer good usable defaults.

I would have loved to have done something with per user limits so it
could be disabled for a selection of users, but it turns out the kernel
doesn't have appropriate data structures for to hold limits for users
that have not logged in.

And in practice I don't care the case where 1 user is allowed but not
the others, I care about disallow this user/program that is in a
sandbox.  I also seem to recall people have problems with using seccomp
to disable things.  All of that said a per user policy is easily
implemented in pam by setting the size count for a specific user to 0.

I do think a limit to catch applications that go crazy is very sane, and
that is primarily what is implemented here.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 03/10] userns: Add a limit on the number of user namespaces
       [not found]             ` <20160721164014.17534-3-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:05               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:05 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Export the export the maximum number of user namespaces as

^ note if you resend, duplicate "export the"

> /proc/sys/userns/max_user_namespaces.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  2 ++
>  kernel/fork.c                  |  2 ++
>  kernel/user_namespace.c        | 69 +++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 65 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 7d59af1f08f1..ba6a995178f9 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -43,6 +43,8 @@ struct user_namespace {
>  	struct ctl_table_set	set;
>  	struct ctl_table_header *sysctls;
>  #endif
> +	int max_user_namespaces;
> +	atomic_t user_namespaces;
>  };
>  
>  extern struct user_namespace init_user_ns;
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 5c2c355aa97f..95d5498c463f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -323,6 +323,8 @@ void __init fork_init(void)
>  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
>  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
>  		init_task.signal->rlim[RLIMIT_NPROC];
> +
> +	init_user_ns.max_user_namespaces = max_threads;
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 10afbb55dfc2..0061550e3282 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -29,6 +29,7 @@ static DEFINE_MUTEX(userns_state_mutex);
>  static bool new_idmap_permitted(const struct file *file,
>  				struct user_namespace *ns, int cap_setid,
>  				struct uid_gid_map *map);
> +#define COUNT_MAX (INT_MAX - 1)
>  
>  #ifdef CONFIG_SYSCTL
>  static struct ctl_table_set *
> @@ -63,7 +64,18 @@ static struct ctl_table_root set_root = {
>  	.permissions = set_permissions,
>  };
>  
> +static int zero = 0;
> +static int count_max = COUNT_MAX;
>  static struct ctl_table userns_table[] = {
> +	{
> +		.procname	= "max_user_namespaces",
> +		.data		= &init_user_ns.max_user_namespaces,
> +		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &count_max,
> +	},
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -75,6 +87,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
>  	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>  	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>  	if (tbl) {
> +		tbl[0].data = &ns->max_user_namespaces;
> +
>  		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>  	}
>  	if (!ns->sysctls) {
> @@ -113,6 +127,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  	cred->user_ns = user_ns;
>  }
>  
> +static bool inc_user_namespaces(struct user_namespace *ns)
> +{
> +	struct user_namespace *pos, *bad;
> +	for (pos = ns; pos; pos = pos->parent) {
> +		int max = READ_ONCE(pos->max_user_namespaces);
> +		int sum = atomic_inc_return(&pos->user_namespaces);
> +		if (sum > max)
> +			goto fail;
> +	}
> +	return true;
> +fail:
> +	bad = pos;
> +	atomic_dec(&pos->user_namespaces);
> +	for (pos = ns; pos != bad; pos = pos->parent)
> +		atomic_dec(&pos->user_namespaces);
> +
> +	return false;
> +}
> +
> +static void dec_user_namespaces(struct user_namespace *ns)
> +{
> +	struct user_namespace *pos;
> +	for (pos = ns; pos; pos = pos->parent) {
> +		int dec = atomic_dec_if_positive(&pos->user_namespaces);
> +		WARN_ON_ONCE(dec < 0);
> +	}
> +}
> +
>  /*
>   * Create a new user namespace, deriving the creator from the user in the
>   * passed credentials, and replacing that user with the new root user for the
> @@ -128,8 +170,12 @@ int create_user_ns(struct cred *new)
>  	kgid_t group = new->egid;
>  	int ret;
>  
> +	ret = -EUSERS;
>  	if (parent_ns->level > 32)
> -		return -EUSERS;
> +		goto fail;
> +
> +	if (!inc_user_namespaces(parent_ns))
> +		goto fail;
>  
>  	/*
>  	 * Verify that we can not violate the policy of which files
> @@ -137,26 +183,27 @@ int create_user_ns(struct cred *new)
>  	 * by verifing that the root directory is at the root of the
>  	 * mount namespace which allows all files to be accessed.
>  	 */
> +	ret = -EPERM;
>  	if (current_chrooted())
> -		return -EPERM;
> +		goto fail_dec;
>  
>  	/* The creator needs a mapping in the parent user namespace
>  	 * or else we won't be able to reasonably tell userspace who
>  	 * created a user_namespace.
>  	 */
> +	ret = -EPERM;
>  	if (!kuid_has_mapping(parent_ns, owner) ||
>  	    !kgid_has_mapping(parent_ns, group))
> -		return -EPERM;
> +		goto fail_dec;
>  
> +	ret = -ENOMEM;
>  	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
>  	if (!ns)
> -		return -ENOMEM;
> +		goto fail_dec;
>  
>  	ret = ns_alloc_inum(&ns->ns);
> -	if (ret) {
> -		kmem_cache_free(user_ns_cachep, ns);
> -		return ret;
> -	}
> +	if (ret)
> +		goto fail_free;
>  	ns->ns.ops = &userns_operations;
>  
>  	atomic_set(&ns->count, 1);
> @@ -165,6 +212,7 @@ int create_user_ns(struct cred *new)
>  	ns->level = parent_ns->level + 1;
>  	ns->owner = owner;
>  	ns->group = group;
> +	ns->max_user_namespaces = COUNT_MAX;
>  
>  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
>  	mutex_lock(&userns_state_mutex);
> @@ -185,7 +233,11 @@ fail_keyring:
>  	key_put(ns->persistent_keyring_register);
>  #endif
>  	ns_free_inum(&ns->ns);
> +fail_free:
>  	kmem_cache_free(user_ns_cachep, ns);
> +fail_dec:
> +	dec_user_namespaces(parent_ns);
> +fail:
>  	return ret;
>  }
>  
> @@ -221,6 +273,7 @@ void free_user_ns(struct user_namespace *ns)
>  #endif
>  		ns_free_inum(&ns->ns);
>  		kmem_cache_free(user_ns_cachep, ns);
> +		dec_user_namespaces(parent);
>  		ns = parent;
>  	} while (atomic_dec_and_test(&parent->count));
>  }
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 03/10] userns: Add a limit on the number of user namespaces
  2016-07-21 16:40             ` Eric W. Biederman
  (?)
  (?)
@ 2016-07-25 23:05             ` Serge E. Hallyn
  -1 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:05 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Export the export the maximum number of user namespaces as

^ note if you resend, duplicate "export the"

> /proc/sys/userns/max_user_namespaces.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h |  2 ++
>  kernel/fork.c                  |  2 ++
>  kernel/user_namespace.c        | 69 +++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 65 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 7d59af1f08f1..ba6a995178f9 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -43,6 +43,8 @@ struct user_namespace {
>  	struct ctl_table_set	set;
>  	struct ctl_table_header *sysctls;
>  #endif
> +	int max_user_namespaces;
> +	atomic_t user_namespaces;
>  };
>  
>  extern struct user_namespace init_user_ns;
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 5c2c355aa97f..95d5498c463f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -323,6 +323,8 @@ void __init fork_init(void)
>  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
>  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
>  		init_task.signal->rlim[RLIMIT_NPROC];
> +
> +	init_user_ns.max_user_namespaces = max_threads;
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 10afbb55dfc2..0061550e3282 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -29,6 +29,7 @@ static DEFINE_MUTEX(userns_state_mutex);
>  static bool new_idmap_permitted(const struct file *file,
>  				struct user_namespace *ns, int cap_setid,
>  				struct uid_gid_map *map);
> +#define COUNT_MAX (INT_MAX - 1)
>  
>  #ifdef CONFIG_SYSCTL
>  static struct ctl_table_set *
> @@ -63,7 +64,18 @@ static struct ctl_table_root set_root = {
>  	.permissions = set_permissions,
>  };
>  
> +static int zero = 0;
> +static int count_max = COUNT_MAX;
>  static struct ctl_table userns_table[] = {
> +	{
> +		.procname	= "max_user_namespaces",
> +		.data		= &init_user_ns.max_user_namespaces,
> +		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &count_max,
> +	},
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -75,6 +87,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
>  	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>  	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>  	if (tbl) {
> +		tbl[0].data = &ns->max_user_namespaces;
> +
>  		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>  	}
>  	if (!ns->sysctls) {
> @@ -113,6 +127,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  	cred->user_ns = user_ns;
>  }
>  
> +static bool inc_user_namespaces(struct user_namespace *ns)
> +{
> +	struct user_namespace *pos, *bad;
> +	for (pos = ns; pos; pos = pos->parent) {
> +		int max = READ_ONCE(pos->max_user_namespaces);
> +		int sum = atomic_inc_return(&pos->user_namespaces);
> +		if (sum > max)
> +			goto fail;
> +	}
> +	return true;
> +fail:
> +	bad = pos;
> +	atomic_dec(&pos->user_namespaces);
> +	for (pos = ns; pos != bad; pos = pos->parent)
> +		atomic_dec(&pos->user_namespaces);
> +
> +	return false;
> +}
> +
> +static void dec_user_namespaces(struct user_namespace *ns)
> +{
> +	struct user_namespace *pos;
> +	for (pos = ns; pos; pos = pos->parent) {
> +		int dec = atomic_dec_if_positive(&pos->user_namespaces);
> +		WARN_ON_ONCE(dec < 0);
> +	}
> +}
> +
>  /*
>   * Create a new user namespace, deriving the creator from the user in the
>   * passed credentials, and replacing that user with the new root user for the
> @@ -128,8 +170,12 @@ int create_user_ns(struct cred *new)
>  	kgid_t group = new->egid;
>  	int ret;
>  
> +	ret = -EUSERS;
>  	if (parent_ns->level > 32)
> -		return -EUSERS;
> +		goto fail;
> +
> +	if (!inc_user_namespaces(parent_ns))
> +		goto fail;
>  
>  	/*
>  	 * Verify that we can not violate the policy of which files
> @@ -137,26 +183,27 @@ int create_user_ns(struct cred *new)
>  	 * by verifing that the root directory is at the root of the
>  	 * mount namespace which allows all files to be accessed.
>  	 */
> +	ret = -EPERM;
>  	if (current_chrooted())
> -		return -EPERM;
> +		goto fail_dec;
>  
>  	/* The creator needs a mapping in the parent user namespace
>  	 * or else we won't be able to reasonably tell userspace who
>  	 * created a user_namespace.
>  	 */
> +	ret = -EPERM;
>  	if (!kuid_has_mapping(parent_ns, owner) ||
>  	    !kgid_has_mapping(parent_ns, group))
> -		return -EPERM;
> +		goto fail_dec;
>  
> +	ret = -ENOMEM;
>  	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
>  	if (!ns)
> -		return -ENOMEM;
> +		goto fail_dec;
>  
>  	ret = ns_alloc_inum(&ns->ns);
> -	if (ret) {
> -		kmem_cache_free(user_ns_cachep, ns);
> -		return ret;
> -	}
> +	if (ret)
> +		goto fail_free;
>  	ns->ns.ops = &userns_operations;
>  
>  	atomic_set(&ns->count, 1);
> @@ -165,6 +212,7 @@ int create_user_ns(struct cred *new)
>  	ns->level = parent_ns->level + 1;
>  	ns->owner = owner;
>  	ns->group = group;
> +	ns->max_user_namespaces = COUNT_MAX;
>  
>  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
>  	mutex_lock(&userns_state_mutex);
> @@ -185,7 +233,11 @@ fail_keyring:
>  	key_put(ns->persistent_keyring_register);
>  #endif
>  	ns_free_inum(&ns->ns);
> +fail_free:
>  	kmem_cache_free(user_ns_cachep, ns);
> +fail_dec:
> +	dec_user_namespaces(parent_ns);
> +fail:
>  	return ret;
>  }
>  
> @@ -221,6 +273,7 @@ void free_user_ns(struct user_namespace *ns)
>  #endif
>  		ns_free_inum(&ns->ns);
>  		kmem_cache_free(user_ns_cachep, ns);
> +		dec_user_namespaces(parent);
>  		ns = parent;
>  	} while (atomic_dec_and_test(&parent->count));
>  }
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 04/10] userns: Generalize the user namespace count into ucount
       [not found]           ` <20160721164014.17534-4-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:09             ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> The same kind of recursive sane default limit and policy
> countrol that has been implemented for the user namespace
> is desirable for the other namespaces, so generalize
> the user namespace refernce count into a ucount.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h | 32 ++++++++++++++++++++++--
>  kernel/fork.c                  |  5 +++-
>  kernel/user_namespace.c        | 55 +++++++++++++++++++++++++++---------------
>  3 files changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index ba6a995178f9..f74a0facc696 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -22,6 +22,16 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  
>  #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
>  
> +enum ucounts {
> +	UCOUNT_USER_NAMESPACES,
> +	UCOUNT_COUNTS,
> +};
> +
> +struct ucount {
> +	int max;
> +	atomic_t count;
> +};
> +
>  struct user_namespace {
>  	struct uid_gid_map	uid_map;
>  	struct uid_gid_map	gid_map;
> @@ -43,8 +53,7 @@ struct user_namespace {
>  	struct ctl_table_set	set;
>  	struct ctl_table_header *sysctls;
>  #endif
> -	int max_user_namespaces;
> -	atomic_t user_namespaces;
> +	struct ucount ucount[UCOUNT_COUNTS];
>  };
>  
>  extern struct user_namespace init_user_ns;
> @@ -79,6 +88,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
>  extern int proc_setgroups_show(struct seq_file *m, void *v);
>  extern bool userns_may_setgroups(const struct user_namespace *ns);
>  extern bool current_in_userns(const struct user_namespace *target_ns);
> +extern bool inc_ucount(struct user_namespace *ns, enum ucounts type);
> +extern void dec_ucount(struct user_namespace *ns, enum ucounts type);
>  #else
>  
>  static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
> @@ -112,6 +123,23 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
>  {
>  	return true;
>  }
> +
> +static inline bool inc_ucount(struct user_namespace *ns, enum ucounts type)
> +{
> +	int max = READ_ONCE(init_user_ns.ucount[type].max);
> +	int sum = atomic_inc_return(&init_user_ns.ucount[type].count);
> +	if (sum > max) {
> +		atomic_dec(&init_user_ns.ucount[type].count);
> +		return false;
> +	}
> +	return true;
> +}
> +
> +static inline void dec_ucount(struct user_namespace *ns, enum ucounts type)
> +{
> +	int dec = atomic_dec_if_positive(&init_user_ns.ucount[type].count);
> +	WARN_ON_ONCE(dec < 0);
> +}
>  #endif
>  
>  #endif /* _LINUX_USER_H */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 95d5498c463f..30ca5eb4ca1e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -304,6 +304,7 @@ int arch_task_struct_size __read_mostly;
>  
>  void __init fork_init(void)
>  {
> +	int i;
>  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
>  #ifndef ARCH_MIN_TASKALIGN
>  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
> @@ -324,7 +325,9 @@ void __init fork_init(void)
>  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
>  		init_task.signal->rlim[RLIMIT_NPROC];
>  
> -	init_user_ns.max_user_namespaces = max_threads;
> +	for (i = 0; i < UCOUNT_COUNTS; i++) {
> +		init_user_ns.ucount[i].max = max_threads;
> +	}
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 0061550e3282..728d7e4995ff 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -66,16 +66,17 @@ static struct ctl_table_root set_root = {
>  
>  static int zero = 0;
>  static int count_max = COUNT_MAX;
> +#define UCOUNT_ENTRY(name) 				\
> +	{						\
> +		.procname	= name,			\
> +		.maxlen		= sizeof(int),		\
> +		.mode		= 0644,			\
> +		.proc_handler	= proc_dointvec_minmax,	\
> +		.extra1		= &zero,		\
> +		.extra2		= &count_max,		\
> +	}
>  static struct ctl_table userns_table[] = {
> -	{
> -		.procname	= "max_user_namespaces",
> -		.data		= &init_user_ns.max_user_namespaces,
> -		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &zero,
> -		.extra2		= &count_max,
> -	},
> +	UCOUNT_ENTRY("max_user_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -87,8 +88,10 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
>  	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>  	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>  	if (tbl) {
> -		tbl[0].data = &ns->max_user_namespaces;
> -
> +		int i;
> +		for (i = 0; i < UCOUNT_COUNTS; i++) {
> +			tbl[i].data = &ns->ucount[i].max;
> +		}
>  		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>  	}
>  	if (!ns->sysctls) {
> @@ -127,34 +130,44 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  	cred->user_ns = user_ns;
>  }
>  
> -static bool inc_user_namespaces(struct user_namespace *ns)
> +bool inc_ucount(struct user_namespace *ns, enum ucounts type)
>  {
>  	struct user_namespace *pos, *bad;
>  	for (pos = ns; pos; pos = pos->parent) {
> -		int max = READ_ONCE(pos->max_user_namespaces);
> -		int sum = atomic_inc_return(&pos->user_namespaces);
> +		int max = READ_ONCE(pos->ucount[type].max);
> +		int sum = atomic_inc_return(&pos->ucount[type].count);
>  		if (sum > max)
>  			goto fail;
>  	}
>  	return true;
>  fail:
>  	bad = pos;
> -	atomic_dec(&pos->user_namespaces);
> +	atomic_dec(&pos->ucount[type].count);
>  	for (pos = ns; pos != bad; pos = pos->parent)
> -		atomic_dec(&pos->user_namespaces);
> +		atomic_dec(&pos->ucount[type].count);
>  
>  	return false;
>  }
>  
> -static void dec_user_namespaces(struct user_namespace *ns)
> +void dec_ucount(struct user_namespace *ns, enum ucounts type)
>  {
>  	struct user_namespace *pos;
>  	for (pos = ns; pos; pos = pos->parent) {
> -		int dec = atomic_dec_if_positive(&pos->user_namespaces);
> +		int dec = atomic_dec_if_positive(&pos->ucount[type].count);
>  		WARN_ON_ONCE(dec < 0);
>  	}
>  }
>  
> +static bool inc_user_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_USER_NAMESPACES);
> +}
> +
> +static void dec_user_namespaces(struct user_namespace *ns)
> +{
> +	return dec_ucount(ns, UCOUNT_USER_NAMESPACES);
> +}
> +
>  /*
>   * Create a new user namespace, deriving the creator from the user in the
>   * passed credentials, and replacing that user with the new root user for the
> @@ -168,7 +181,7 @@ int create_user_ns(struct cred *new)
>  	struct user_namespace *ns, *parent_ns = new->user_ns;
>  	kuid_t owner = new->euid;
>  	kgid_t group = new->egid;
> -	int ret;
> +	int ret, i;
>  
>  	ret = -EUSERS;
>  	if (parent_ns->level > 32)
> @@ -212,7 +225,9 @@ int create_user_ns(struct cred *new)
>  	ns->level = parent_ns->level + 1;
>  	ns->owner = owner;
>  	ns->group = group;
> -	ns->max_user_namespaces = COUNT_MAX;
> +	for (i = 0; i < UCOUNT_COUNTS; i++) {
> +		ns->ucount[i].max = COUNT_MAX;
> +	}
>  
>  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
>  	mutex_lock(&userns_state_mutex);
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 04/10] userns: Generalize the user namespace count into ucount
       [not found]           ` <20160721164014.17534-4-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:09             ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> The same kind of recursive sane default limit and policy
> countrol that has been implemented for the user namespace
> is desirable for the other namespaces, so generalize
> the user namespace refernce count into a ucount.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h | 32 ++++++++++++++++++++++--
>  kernel/fork.c                  |  5 +++-
>  kernel/user_namespace.c        | 55 +++++++++++++++++++++++++++---------------
>  3 files changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index ba6a995178f9..f74a0facc696 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -22,6 +22,16 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  
>  #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
>  
> +enum ucounts {
> +	UCOUNT_USER_NAMESPACES,
> +	UCOUNT_COUNTS,
> +};
> +
> +struct ucount {
> +	int max;
> +	atomic_t count;
> +};
> +
>  struct user_namespace {
>  	struct uid_gid_map	uid_map;
>  	struct uid_gid_map	gid_map;
> @@ -43,8 +53,7 @@ struct user_namespace {
>  	struct ctl_table_set	set;
>  	struct ctl_table_header *sysctls;
>  #endif
> -	int max_user_namespaces;
> -	atomic_t user_namespaces;
> +	struct ucount ucount[UCOUNT_COUNTS];
>  };
>  
>  extern struct user_namespace init_user_ns;
> @@ -79,6 +88,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
>  extern int proc_setgroups_show(struct seq_file *m, void *v);
>  extern bool userns_may_setgroups(const struct user_namespace *ns);
>  extern bool current_in_userns(const struct user_namespace *target_ns);
> +extern bool inc_ucount(struct user_namespace *ns, enum ucounts type);
> +extern void dec_ucount(struct user_namespace *ns, enum ucounts type);
>  #else
>  
>  static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
> @@ -112,6 +123,23 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
>  {
>  	return true;
>  }
> +
> +static inline bool inc_ucount(struct user_namespace *ns, enum ucounts type)
> +{
> +	int max = READ_ONCE(init_user_ns.ucount[type].max);
> +	int sum = atomic_inc_return(&init_user_ns.ucount[type].count);
> +	if (sum > max) {
> +		atomic_dec(&init_user_ns.ucount[type].count);
> +		return false;
> +	}
> +	return true;
> +}
> +
> +static inline void dec_ucount(struct user_namespace *ns, enum ucounts type)
> +{
> +	int dec = atomic_dec_if_positive(&init_user_ns.ucount[type].count);
> +	WARN_ON_ONCE(dec < 0);
> +}
>  #endif
>  
>  #endif /* _LINUX_USER_H */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 95d5498c463f..30ca5eb4ca1e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -304,6 +304,7 @@ int arch_task_struct_size __read_mostly;
>  
>  void __init fork_init(void)
>  {
> +	int i;
>  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
>  #ifndef ARCH_MIN_TASKALIGN
>  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
> @@ -324,7 +325,9 @@ void __init fork_init(void)
>  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
>  		init_task.signal->rlim[RLIMIT_NPROC];
>  
> -	init_user_ns.max_user_namespaces = max_threads;
> +	for (i = 0; i < UCOUNT_COUNTS; i++) {
> +		init_user_ns.ucount[i].max = max_threads;
> +	}
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 0061550e3282..728d7e4995ff 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -66,16 +66,17 @@ static struct ctl_table_root set_root = {
>  
>  static int zero = 0;
>  static int count_max = COUNT_MAX;
> +#define UCOUNT_ENTRY(name) 				\
> +	{						\
> +		.procname	= name,			\
> +		.maxlen		= sizeof(int),		\
> +		.mode		= 0644,			\
> +		.proc_handler	= proc_dointvec_minmax,	\
> +		.extra1		= &zero,		\
> +		.extra2		= &count_max,		\
> +	}
>  static struct ctl_table userns_table[] = {
> -	{
> -		.procname	= "max_user_namespaces",
> -		.data		= &init_user_ns.max_user_namespaces,
> -		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &zero,
> -		.extra2		= &count_max,
> -	},
> +	UCOUNT_ENTRY("max_user_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -87,8 +88,10 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
>  	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>  	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>  	if (tbl) {
> -		tbl[0].data = &ns->max_user_namespaces;
> -
> +		int i;
> +		for (i = 0; i < UCOUNT_COUNTS; i++) {
> +			tbl[i].data = &ns->ucount[i].max;
> +		}
>  		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>  	}
>  	if (!ns->sysctls) {
> @@ -127,34 +130,44 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  	cred->user_ns = user_ns;
>  }
>  
> -static bool inc_user_namespaces(struct user_namespace *ns)
> +bool inc_ucount(struct user_namespace *ns, enum ucounts type)
>  {
>  	struct user_namespace *pos, *bad;
>  	for (pos = ns; pos; pos = pos->parent) {
> -		int max = READ_ONCE(pos->max_user_namespaces);
> -		int sum = atomic_inc_return(&pos->user_namespaces);
> +		int max = READ_ONCE(pos->ucount[type].max);
> +		int sum = atomic_inc_return(&pos->ucount[type].count);
>  		if (sum > max)
>  			goto fail;
>  	}
>  	return true;
>  fail:
>  	bad = pos;
> -	atomic_dec(&pos->user_namespaces);
> +	atomic_dec(&pos->ucount[type].count);
>  	for (pos = ns; pos != bad; pos = pos->parent)
> -		atomic_dec(&pos->user_namespaces);
> +		atomic_dec(&pos->ucount[type].count);
>  
>  	return false;
>  }
>  
> -static void dec_user_namespaces(struct user_namespace *ns)
> +void dec_ucount(struct user_namespace *ns, enum ucounts type)
>  {
>  	struct user_namespace *pos;
>  	for (pos = ns; pos; pos = pos->parent) {
> -		int dec = atomic_dec_if_positive(&pos->user_namespaces);
> +		int dec = atomic_dec_if_positive(&pos->ucount[type].count);
>  		WARN_ON_ONCE(dec < 0);
>  	}
>  }
>  
> +static bool inc_user_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_USER_NAMESPACES);
> +}
> +
> +static void dec_user_namespaces(struct user_namespace *ns)
> +{
> +	return dec_ucount(ns, UCOUNT_USER_NAMESPACES);
> +}
> +
>  /*
>   * Create a new user namespace, deriving the creator from the user in the
>   * passed credentials, and replacing that user with the new root user for the
> @@ -168,7 +181,7 @@ int create_user_ns(struct cred *new)
>  	struct user_namespace *ns, *parent_ns = new->user_ns;
>  	kuid_t owner = new->euid;
>  	kgid_t group = new->egid;
> -	int ret;
> +	int ret, i;
>  
>  	ret = -EUSERS;
>  	if (parent_ns->level > 32)
> @@ -212,7 +225,9 @@ int create_user_ns(struct cred *new)
>  	ns->level = parent_ns->level + 1;
>  	ns->owner = owner;
>  	ns->group = group;
> -	ns->max_user_namespaces = COUNT_MAX;
> +	for (i = 0; i < UCOUNT_COUNTS; i++) {
> +		ns->ucount[i].max = COUNT_MAX;
> +	}
>  
>  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
>  	mutex_lock(&userns_state_mutex);
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 04/10] userns: Generalize the user namespace count into ucount
@ 2016-07-25 23:09             ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> The same kind of recursive sane default limit and policy
> countrol that has been implemented for the user namespace
> is desirable for the other namespaces, so generalize
> the user namespace refernce count into a ucount.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h | 32 ++++++++++++++++++++++--
>  kernel/fork.c                  |  5 +++-
>  kernel/user_namespace.c        | 55 +++++++++++++++++++++++++++---------------
>  3 files changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index ba6a995178f9..f74a0facc696 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -22,6 +22,16 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  
>  #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
>  
> +enum ucounts {
> +	UCOUNT_USER_NAMESPACES,
> +	UCOUNT_COUNTS,
> +};
> +
> +struct ucount {
> +	int max;
> +	atomic_t count;
> +};
> +
>  struct user_namespace {
>  	struct uid_gid_map	uid_map;
>  	struct uid_gid_map	gid_map;
> @@ -43,8 +53,7 @@ struct user_namespace {
>  	struct ctl_table_set	set;
>  	struct ctl_table_header *sysctls;
>  #endif
> -	int max_user_namespaces;
> -	atomic_t user_namespaces;
> +	struct ucount ucount[UCOUNT_COUNTS];
>  };
>  
>  extern struct user_namespace init_user_ns;
> @@ -79,6 +88,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
>  extern int proc_setgroups_show(struct seq_file *m, void *v);
>  extern bool userns_may_setgroups(const struct user_namespace *ns);
>  extern bool current_in_userns(const struct user_namespace *target_ns);
> +extern bool inc_ucount(struct user_namespace *ns, enum ucounts type);
> +extern void dec_ucount(struct user_namespace *ns, enum ucounts type);
>  #else
>  
>  static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
> @@ -112,6 +123,23 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
>  {
>  	return true;
>  }
> +
> +static inline bool inc_ucount(struct user_namespace *ns, enum ucounts type)
> +{
> +	int max = READ_ONCE(init_user_ns.ucount[type].max);
> +	int sum = atomic_inc_return(&init_user_ns.ucount[type].count);
> +	if (sum > max) {
> +		atomic_dec(&init_user_ns.ucount[type].count);
> +		return false;
> +	}
> +	return true;
> +}
> +
> +static inline void dec_ucount(struct user_namespace *ns, enum ucounts type)
> +{
> +	int dec = atomic_dec_if_positive(&init_user_ns.ucount[type].count);
> +	WARN_ON_ONCE(dec < 0);
> +}
>  #endif
>  
>  #endif /* _LINUX_USER_H */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 95d5498c463f..30ca5eb4ca1e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -304,6 +304,7 @@ int arch_task_struct_size __read_mostly;
>  
>  void __init fork_init(void)
>  {
> +	int i;
>  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
>  #ifndef ARCH_MIN_TASKALIGN
>  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
> @@ -324,7 +325,9 @@ void __init fork_init(void)
>  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
>  		init_task.signal->rlim[RLIMIT_NPROC];
>  
> -	init_user_ns.max_user_namespaces = max_threads;
> +	for (i = 0; i < UCOUNT_COUNTS; i++) {
> +		init_user_ns.ucount[i].max = max_threads;
> +	}
>  }
>  
>  int __weak arch_dup_task_struct(struct task_struct *dst,
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 0061550e3282..728d7e4995ff 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -66,16 +66,17 @@ static struct ctl_table_root set_root = {
>  
>  static int zero = 0;
>  static int count_max = COUNT_MAX;
> +#define UCOUNT_ENTRY(name) 				\
> +	{						\
> +		.procname	= name,			\
> +		.maxlen		= sizeof(int),		\
> +		.mode		= 0644,			\
> +		.proc_handler	= proc_dointvec_minmax,	\
> +		.extra1		= &zero,		\
> +		.extra2		= &count_max,		\
> +	}
>  static struct ctl_table userns_table[] = {
> -	{
> -		.procname	= "max_user_namespaces",
> -		.data		= &init_user_ns.max_user_namespaces,
> -		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &zero,
> -		.extra2		= &count_max,
> -	},
> +	UCOUNT_ENTRY("max_user_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> @@ -87,8 +88,10 @@ static bool setup_userns_sysctls(struct user_namespace *ns)
>  	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
>  	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
>  	if (tbl) {
> -		tbl[0].data = &ns->max_user_namespaces;
> -
> +		int i;
> +		for (i = 0; i < UCOUNT_COUNTS; i++) {
> +			tbl[i].data = &ns->ucount[i].max;
> +		}
>  		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
>  	}
>  	if (!ns->sysctls) {
> @@ -127,34 +130,44 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
>  	cred->user_ns = user_ns;
>  }
>  
> -static bool inc_user_namespaces(struct user_namespace *ns)
> +bool inc_ucount(struct user_namespace *ns, enum ucounts type)
>  {
>  	struct user_namespace *pos, *bad;
>  	for (pos = ns; pos; pos = pos->parent) {
> -		int max = READ_ONCE(pos->max_user_namespaces);
> -		int sum = atomic_inc_return(&pos->user_namespaces);
> +		int max = READ_ONCE(pos->ucount[type].max);
> +		int sum = atomic_inc_return(&pos->ucount[type].count);
>  		if (sum > max)
>  			goto fail;
>  	}
>  	return true;
>  fail:
>  	bad = pos;
> -	atomic_dec(&pos->user_namespaces);
> +	atomic_dec(&pos->ucount[type].count);
>  	for (pos = ns; pos != bad; pos = pos->parent)
> -		atomic_dec(&pos->user_namespaces);
> +		atomic_dec(&pos->ucount[type].count);
>  
>  	return false;
>  }
>  
> -static void dec_user_namespaces(struct user_namespace *ns)
> +void dec_ucount(struct user_namespace *ns, enum ucounts type)
>  {
>  	struct user_namespace *pos;
>  	for (pos = ns; pos; pos = pos->parent) {
> -		int dec = atomic_dec_if_positive(&pos->user_namespaces);
> +		int dec = atomic_dec_if_positive(&pos->ucount[type].count);
>  		WARN_ON_ONCE(dec < 0);
>  	}
>  }
>  
> +static bool inc_user_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_USER_NAMESPACES);
> +}
> +
> +static void dec_user_namespaces(struct user_namespace *ns)
> +{
> +	return dec_ucount(ns, UCOUNT_USER_NAMESPACES);
> +}
> +
>  /*
>   * Create a new user namespace, deriving the creator from the user in the
>   * passed credentials, and replacing that user with the new root user for the
> @@ -168,7 +181,7 @@ int create_user_ns(struct cred *new)
>  	struct user_namespace *ns, *parent_ns = new->user_ns;
>  	kuid_t owner = new->euid;
>  	kgid_t group = new->egid;
> -	int ret;
> +	int ret, i;
>  
>  	ret = -EUSERS;
>  	if (parent_ns->level > 32)
> @@ -212,7 +225,9 @@ int create_user_ns(struct cred *new)
>  	ns->level = parent_ns->level + 1;
>  	ns->owner = owner;
>  	ns->group = group;
> -	ns->max_user_namespaces = COUNT_MAX;
> +	for (i = 0; i < UCOUNT_COUNTS; i++) {
> +		ns->ucount[i].max = COUNT_MAX;
> +	}
>  
>  	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
>  	mutex_lock(&userns_state_mutex);
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 05/10] pidns: Add a limit on the number of pid namespaces
  2016-07-21 16:40             ` Eric W. Biederman
@ 2016-07-25 23:09                 ` Serge E. Hallyn
  -1 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/pid_namespace.c         | 22 ++++++++++++++++++----
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index f74a0facc696..47733637741a 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -24,6 +24,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  
>  enum ucounts {
>  	UCOUNT_USER_NAMESPACES,
> +	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index a65ba137fd15..049cc14ae37a 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -79,6 +79,16 @@ static void proc_cleanup_work(struct work_struct *work)
>  /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
>  #define MAX_PID_NS_LEVEL 32
>  
> +static bool inc_pid_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_PID_NAMESPACES);
> +}
> +
> +static void dec_pid_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_PID_NAMESPACES);
> +}
> +
>  static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
>  	struct pid_namespace *parent_pid_ns)
>  {
> @@ -87,15 +97,16 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
>  	int i;
>  	int err;
>  
> -	if (level > MAX_PID_NS_LEVEL) {
> -		err = -EINVAL;
> +	err = -EINVAL;
> +	if (level > MAX_PID_NS_LEVEL)
> +		goto out;
> +	if (!inc_pid_namespaces(user_ns))
>  		goto out;
> -	}
>  
>  	err = -ENOMEM;
>  	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
>  	if (ns == NULL)
> -		goto out;
> +		goto out_dec;
>  
>  	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
>  	if (!ns->pidmap[0].page)
> @@ -129,6 +140,8 @@ out_free_map:
>  	kfree(ns->pidmap[0].page);
>  out_free:
>  	kmem_cache_free(pid_ns_cachep, ns);
> +out_dec:
> +	dec_pid_namespaces(user_ns);
>  out:
>  	return ERR_PTR(err);
>  }
> @@ -146,6 +159,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
>  	ns_free_inum(&ns->ns);
>  	for (i = 0; i < PIDMAP_ENTRIES; i++)
>  		kfree(ns->pidmap[i].page);
> +	dec_pid_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	call_rcu(&ns->rcu, delayed_free_pidns);
>  }
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 728d7e4995ff..02a03ead7afc 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -77,6 +77,7 @@ static int count_max = COUNT_MAX;
>  	}
>  static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_user_namespaces"),
> +	UCOUNT_ENTRY("max_pid_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 05/10] pidns: Add a limit on the number of pid namespaces
@ 2016-07-25 23:09                 ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/pid_namespace.c         | 22 ++++++++++++++++++----
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index f74a0facc696..47733637741a 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -24,6 +24,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  
>  enum ucounts {
>  	UCOUNT_USER_NAMESPACES,
> +	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index a65ba137fd15..049cc14ae37a 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -79,6 +79,16 @@ static void proc_cleanup_work(struct work_struct *work)
>  /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
>  #define MAX_PID_NS_LEVEL 32
>  
> +static bool inc_pid_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_PID_NAMESPACES);
> +}
> +
> +static void dec_pid_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_PID_NAMESPACES);
> +}
> +
>  static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
>  	struct pid_namespace *parent_pid_ns)
>  {
> @@ -87,15 +97,16 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
>  	int i;
>  	int err;
>  
> -	if (level > MAX_PID_NS_LEVEL) {
> -		err = -EINVAL;
> +	err = -EINVAL;
> +	if (level > MAX_PID_NS_LEVEL)
> +		goto out;
> +	if (!inc_pid_namespaces(user_ns))
>  		goto out;
> -	}
>  
>  	err = -ENOMEM;
>  	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
>  	if (ns == NULL)
> -		goto out;
> +		goto out_dec;
>  
>  	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
>  	if (!ns->pidmap[0].page)
> @@ -129,6 +140,8 @@ out_free_map:
>  	kfree(ns->pidmap[0].page);
>  out_free:
>  	kmem_cache_free(pid_ns_cachep, ns);
> +out_dec:
> +	dec_pid_namespaces(user_ns);
>  out:
>  	return ERR_PTR(err);
>  }
> @@ -146,6 +159,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
>  	ns_free_inum(&ns->ns);
>  	for (i = 0; i < PIDMAP_ENTRIES; i++)
>  		kfree(ns->pidmap[i].page);
> +	dec_pid_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	call_rcu(&ns->rcu, delayed_free_pidns);
>  }
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 728d7e4995ff..02a03ead7afc 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -77,6 +77,7 @@ static int count_max = COUNT_MAX;
>  	}
>  static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_user_namespaces"),
> +	UCOUNT_ENTRY("max_pid_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces
  2016-07-21 16:40         ` [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces Eric W. Biederman
@ 2016-07-25 23:09               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  kernel/utsname.c               | 31 ++++++++++++++++++++++++++-----
>  3 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 47733637741a..bed2506081fe 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -25,6 +25,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  enum ucounts {
>  	UCOUNT_USER_NAMESPACES,
>  	UCOUNT_PID_NAMESPACES,
> +	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 02a03ead7afc..6b205c24e888 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -78,6 +78,7 @@ static int count_max = COUNT_MAX;
>  static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_user_namespaces"),
>  	UCOUNT_ENTRY("max_pid_namespaces"),
> +	UCOUNT_ENTRY("max_uts_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> diff --git a/kernel/utsname.c b/kernel/utsname.c
> index 831ea7108232..4f13c0419d64 100644
> --- a/kernel/utsname.c
> +++ b/kernel/utsname.c
> @@ -17,6 +17,16 @@
>  #include <linux/user_namespace.h>
>  #include <linux/proc_ns.h>
>  
> +static bool inc_uts_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_UTS_NAMESPACES);
> +}
> +
> +static void dec_uts_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_UTS_NAMESPACES);
> +}
> +
>  static struct uts_namespace *create_uts_ns(void)
>  {
>  	struct uts_namespace *uts_ns;
> @@ -38,15 +48,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
>  	struct uts_namespace *ns;
>  	int err;
>  
> +	err = -ENFILE;
> +	if (!inc_uts_namespaces(user_ns))
> +		goto fail;
> +
> +	err = -ENOMEM;
>  	ns = create_uts_ns();
>  	if (!ns)
> -		return ERR_PTR(-ENOMEM);
> +		goto fail_dec;
>  
>  	err = ns_alloc_inum(&ns->ns);
> -	if (err) {
> -		kfree(ns);
> -		return ERR_PTR(err);
> -	}
> +	if (err)
> +		goto fail_free;
>  
>  	ns->ns.ops = &utsns_operations;
>  
> @@ -55,6 +68,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
>  	ns->user_ns = get_user_ns(user_ns);
>  	up_read(&uts_sem);
>  	return ns;
> +
> +fail_free:
> +	kfree(ns);
> +fail_dec:
> +	dec_uts_namespaces(user_ns);
> +fail:
> +	return ERR_PTR(err);
>  }
>  
>  /*
> @@ -85,6 +105,7 @@ void free_uts_ns(struct kref *kref)
>  	struct uts_namespace *ns;
>  
>  	ns = container_of(kref, struct uts_namespace, kref);
> +	dec_uts_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces
@ 2016-07-25 23:09               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  kernel/utsname.c               | 31 ++++++++++++++++++++++++++-----
>  3 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 47733637741a..bed2506081fe 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -25,6 +25,7 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
>  enum ucounts {
>  	UCOUNT_USER_NAMESPACES,
>  	UCOUNT_PID_NAMESPACES,
> +	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 02a03ead7afc..6b205c24e888 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -78,6 +78,7 @@ static int count_max = COUNT_MAX;
>  static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_user_namespaces"),
>  	UCOUNT_ENTRY("max_pid_namespaces"),
> +	UCOUNT_ENTRY("max_uts_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> diff --git a/kernel/utsname.c b/kernel/utsname.c
> index 831ea7108232..4f13c0419d64 100644
> --- a/kernel/utsname.c
> +++ b/kernel/utsname.c
> @@ -17,6 +17,16 @@
>  #include <linux/user_namespace.h>
>  #include <linux/proc_ns.h>
>  
> +static bool inc_uts_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_UTS_NAMESPACES);
> +}
> +
> +static void dec_uts_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_UTS_NAMESPACES);
> +}
> +
>  static struct uts_namespace *create_uts_ns(void)
>  {
>  	struct uts_namespace *uts_ns;
> @@ -38,15 +48,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
>  	struct uts_namespace *ns;
>  	int err;
>  
> +	err = -ENFILE;
> +	if (!inc_uts_namespaces(user_ns))
> +		goto fail;
> +
> +	err = -ENOMEM;
>  	ns = create_uts_ns();
>  	if (!ns)
> -		return ERR_PTR(-ENOMEM);
> +		goto fail_dec;
>  
>  	err = ns_alloc_inum(&ns->ns);
> -	if (err) {
> -		kfree(ns);
> -		return ERR_PTR(err);
> -	}
> +	if (err)
> +		goto fail_free;
>  
>  	ns->ns.ops = &utsns_operations;
>  
> @@ -55,6 +68,13 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
>  	ns->user_ns = get_user_ns(user_ns);
>  	up_read(&uts_sem);
>  	return ns;
> +
> +fail_free:
> +	kfree(ns);
> +fail_dec:
> +	dec_uts_namespaces(user_ns);
> +fail:
> +	return ERR_PTR(err);
>  }
>  
>  /*
> @@ -85,6 +105,7 @@ void free_uts_ns(struct kref *kref)
>  	struct uts_namespace *ns;
>  
>  	ns = container_of(kref, struct uts_namespace, kref);
> +	dec_uts_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 07/10] ipcns: Add a  limit on the number of ipc namespaces
       [not found]             ` <20160721164014.17534-7-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:10               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:10 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  1 +
>  ipc/namespace.c                | 42 +++++++++++++++++++++++++++++++-----------
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 33 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index bed2506081fe..367cf08ff63d 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -26,6 +26,7 @@ enum ucounts {
>  	UCOUNT_USER_NAMESPACES,
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
> +	UCOUNT_IPC_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 04cb07eb81f1..3996a1e41a1d 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -16,33 +16,42 @@
>  
>  #include "util.h"
>  
> +static bool inc_ipc_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_IPC_NAMESPACES);
> +}
> +
> +static void dec_ipc_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_IPC_NAMESPACES);
> +}
> +
>  static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>  					   struct ipc_namespace *old_ns)
>  {
>  	struct ipc_namespace *ns;
>  	int err;
>  
> +	err = -ENFILE;
> +	if (!inc_ipc_namespaces(user_ns))
> +		goto fail;
> +
> +	err = -ENOMEM;
>  	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
>  	if (ns == NULL)
> -		return ERR_PTR(-ENOMEM);
> +		goto fail_dec;
>  
>  	err = ns_alloc_inum(&ns->ns);
> -	if (err) {
> -		kfree(ns);
> -		return ERR_PTR(err);
> -	}
> +	if (err)
> +		goto fail_free;
>  	ns->ns.ops = &ipcns_operations;
>  
>  	atomic_set(&ns->count, 1);
>  	ns->user_ns = get_user_ns(user_ns);
>  
>  	err = mq_init_ns(ns);
> -	if (err) {
> -		put_user_ns(ns->user_ns);
> -		ns_free_inum(&ns->ns);
> -		kfree(ns);
> -		return ERR_PTR(err);
> -	}
> +	if (err)
> +		goto fail_put;
>  	atomic_inc(&nr_ipc_ns);
>  
>  	sem_init_ns(ns);
> @@ -50,6 +59,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>  	shm_init_ns(ns);
>  
>  	return ns;
> +
> +fail_put:
> +	put_user_ns(ns->user_ns);
> +	ns_free_inum(&ns->ns);
> +fail_free:
> +	kfree(ns);
> +fail_dec:
> +	dec_ipc_namespaces(user_ns);
> +fail:
> +	return ERR_PTR(err);
>  }
>  
>  struct ipc_namespace *copy_ipcs(unsigned long flags,
> @@ -98,6 +117,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>  	shm_exit_ns(ns);
>  	atomic_dec(&nr_ipc_ns);
>  
> +	dec_ipc_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 6b205c24e888..060d3e099f87 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -79,6 +79,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_user_namespaces"),
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
> +	UCOUNT_ENTRY("max_ipc_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 07/10] ipcns: Add a  limit on the number of ipc namespaces
  2016-07-21 16:40             ` Eric W. Biederman
  (?)
@ 2016-07-25 23:10             ` Serge E. Hallyn
  -1 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:10 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h |  1 +
>  ipc/namespace.c                | 42 +++++++++++++++++++++++++++++++-----------
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 33 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index bed2506081fe..367cf08ff63d 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -26,6 +26,7 @@ enum ucounts {
>  	UCOUNT_USER_NAMESPACES,
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
> +	UCOUNT_IPC_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 04cb07eb81f1..3996a1e41a1d 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -16,33 +16,42 @@
>  
>  #include "util.h"
>  
> +static bool inc_ipc_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_IPC_NAMESPACES);
> +}
> +
> +static void dec_ipc_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_IPC_NAMESPACES);
> +}
> +
>  static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>  					   struct ipc_namespace *old_ns)
>  {
>  	struct ipc_namespace *ns;
>  	int err;
>  
> +	err = -ENFILE;
> +	if (!inc_ipc_namespaces(user_ns))
> +		goto fail;
> +
> +	err = -ENOMEM;
>  	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
>  	if (ns == NULL)
> -		return ERR_PTR(-ENOMEM);
> +		goto fail_dec;
>  
>  	err = ns_alloc_inum(&ns->ns);
> -	if (err) {
> -		kfree(ns);
> -		return ERR_PTR(err);
> -	}
> +	if (err)
> +		goto fail_free;
>  	ns->ns.ops = &ipcns_operations;
>  
>  	atomic_set(&ns->count, 1);
>  	ns->user_ns = get_user_ns(user_ns);
>  
>  	err = mq_init_ns(ns);
> -	if (err) {
> -		put_user_ns(ns->user_ns);
> -		ns_free_inum(&ns->ns);
> -		kfree(ns);
> -		return ERR_PTR(err);
> -	}
> +	if (err)
> +		goto fail_put;
>  	atomic_inc(&nr_ipc_ns);
>  
>  	sem_init_ns(ns);
> @@ -50,6 +59,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>  	shm_init_ns(ns);
>  
>  	return ns;
> +
> +fail_put:
> +	put_user_ns(ns->user_ns);
> +	ns_free_inum(&ns->ns);
> +fail_free:
> +	kfree(ns);
> +fail_dec:
> +	dec_ipc_namespaces(user_ns);
> +fail:
> +	return ERR_PTR(err);
>  }
>  
>  struct ipc_namespace *copy_ipcs(unsigned long flags,
> @@ -98,6 +117,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>  	shm_exit_ns(ns);
>  	atomic_dec(&nr_ipc_ns);
>  
> +	dec_ipc_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 6b205c24e888..060d3e099f87 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -79,6 +79,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_user_namespaces"),
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
> +	UCOUNT_ENTRY("max_ipc_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 08/10] cgroupns: Add a limit on the number of cgroup namespaces
       [not found]             ` <20160721164014.17534-8-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:12               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/cgroup.c                | 15 +++++++++++++++
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 367cf08ff63d..1a3a9cb93277 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
> +	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 86cb5c6e8932..240eec43390b 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -6260,6 +6260,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
>  
>  /* cgroup namespaces */
>  
> +static bool inc_cgroup_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
> +}
> +
> +static void dec_cgroup_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
> +}
> +
>  static struct cgroup_namespace *alloc_cgroup_ns(void)
>  {
>  	struct cgroup_namespace *new_ns;
> @@ -6281,6 +6291,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
>  void free_cgroup_ns(struct cgroup_namespace *ns)
>  {
>  	put_css_set(ns->root_cset);
> +	dec_cgroup_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> @@ -6305,6 +6316,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
>  	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
>  		return ERR_PTR(-EPERM);
>  
> +	if (!inc_cgroup_namespaces(user_ns))
> +		return ERR_PTR(-ENFILE);
> +
>  	mutex_lock(&cgroup_mutex);
>  	spin_lock_bh(&css_set_lock);
>  
> @@ -6317,6 +6331,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
>  	new_ns = alloc_cgroup_ns();
>  	if (IS_ERR(new_ns)) {
>  		put_css_set(cset);
> +		dec_cgroup_namespaces(user_ns);
>  		return new_ns;
>  	}
>  
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 060d3e099f87..1cf074cb47e2 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
> +	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 08/10] cgroupns: Add a limit on the number of cgroup namespaces
       [not found]             ` <20160721164014.17534-8-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:12               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/cgroup.c                | 15 +++++++++++++++
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 367cf08ff63d..1a3a9cb93277 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
> +	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 86cb5c6e8932..240eec43390b 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -6260,6 +6260,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
>  
>  /* cgroup namespaces */
>  
> +static bool inc_cgroup_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
> +}
> +
> +static void dec_cgroup_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
> +}
> +
>  static struct cgroup_namespace *alloc_cgroup_ns(void)
>  {
>  	struct cgroup_namespace *new_ns;
> @@ -6281,6 +6291,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
>  void free_cgroup_ns(struct cgroup_namespace *ns)
>  {
>  	put_css_set(ns->root_cset);
> +	dec_cgroup_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> @@ -6305,6 +6316,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
>  	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
>  		return ERR_PTR(-EPERM);
>  
> +	if (!inc_cgroup_namespaces(user_ns))
> +		return ERR_PTR(-ENFILE);
> +
>  	mutex_lock(&cgroup_mutex);
>  	spin_lock_bh(&css_set_lock);
>  
> @@ -6317,6 +6331,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
>  	new_ns = alloc_cgroup_ns();
>  	if (IS_ERR(new_ns)) {
>  		put_css_set(cset);
> +		dec_cgroup_namespaces(user_ns);
>  		return new_ns;
>  	}
>  
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 060d3e099f87..1cf074cb47e2 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
> +	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 08/10] cgroupns: Add a limit on the number of cgroup namespaces
@ 2016-07-25 23:12               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/cgroup.c                | 15 +++++++++++++++
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 367cf08ff63d..1a3a9cb93277 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
> +	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 86cb5c6e8932..240eec43390b 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -6260,6 +6260,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
>  
>  /* cgroup namespaces */
>  
> +static bool inc_cgroup_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
> +}
> +
> +static void dec_cgroup_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_CGROUP_NAMESPACES);
> +}
> +
>  static struct cgroup_namespace *alloc_cgroup_ns(void)
>  {
>  	struct cgroup_namespace *new_ns;
> @@ -6281,6 +6291,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
>  void free_cgroup_ns(struct cgroup_namespace *ns)
>  {
>  	put_css_set(ns->root_cset);
> +	dec_cgroup_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	ns_free_inum(&ns->ns);
>  	kfree(ns);
> @@ -6305,6 +6316,9 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
>  	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
>  		return ERR_PTR(-EPERM);
>  
> +	if (!inc_cgroup_namespaces(user_ns))
> +		return ERR_PTR(-ENFILE);
> +
>  	mutex_lock(&cgroup_mutex);
>  	spin_lock_bh(&css_set_lock);
>  
> @@ -6317,6 +6331,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
>  	new_ns = alloc_cgroup_ns();
>  	if (IS_ERR(new_ns)) {
>  		put_css_set(cset);
> +		dec_cgroup_namespaces(user_ns);
>  		return new_ns;
>  	}
>  
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 060d3e099f87..1cf074cb47e2 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
> +	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
>  #endif /* CONFIG_SYSCTL */
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
       [not found]             ` <20160721164014.17534-9-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2016-07-25 23:13               ` Serge E. Hallyn
  2016-07-26  6:01                 ` Andrei Vagin
  1 sibling, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:13 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  net/core/net_namespace.c       | 15 +++++++++++++++
>  3 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 1a3a9cb93277..f86afa536baf 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
> +	UCOUNT_NET_NAMESPACES,
>  	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 1cf074cb47e2..e326ca722ae0 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
> +	UCOUNT_ENTRY("max_net_namespaces"),
>  	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 2c2eb1b629b1..a489f192d619 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>  	return peer;
>  }
>  
> +static bool inc_net_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
> +static void dec_net_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
>  /*
>   * setup_net runs the initializers for the network namespace object.
>   */
> @@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>  	int error = 0;
>  	LIST_HEAD(net_exit_list);
>  
> +	if (!inc_net_namespaces(user_ns))
> +		return -ENFILE;
> +
>  	atomic_set(&net->count, 1);
>  	atomic_set(&net->passive, 1);
>  	net->dev_base_seq = 1;
> @@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
>  	}
>  	mutex_unlock(&net_mutex);
>  	if (rv < 0) {
> +		dec_net_namespaces(user_ns);
>  		put_user_ns(user_ns);
>  		net_drop_ns(net);
>  		return ERR_PTR(rv);
> @@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
>  	/* Finally it is safe to free my network namespace structure */
>  	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
>  		list_del_init(&net->exit_list);
> +		dec_net_namespaces(net->user_ns);
>  		put_user_ns(net->user_ns);
>  		net_drop_ns(net);
>  	}
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
  2016-07-21 16:40             ` Eric W. Biederman
  (?)
@ 2016-07-25 23:13             ` Serge E. Hallyn
  -1 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:13 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  net/core/net_namespace.c       | 15 +++++++++++++++
>  3 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 1a3a9cb93277..f86afa536baf 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>  	UCOUNT_PID_NAMESPACES,
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
> +	UCOUNT_NET_NAMESPACES,
>  	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 1cf074cb47e2..e326ca722ae0 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_pid_namespaces"),
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
> +	UCOUNT_ENTRY("max_net_namespaces"),
>  	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 2c2eb1b629b1..a489f192d619 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>  	return peer;
>  }
>  
> +static bool inc_net_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
> +static void dec_net_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
>  /*
>   * setup_net runs the initializers for the network namespace object.
>   */
> @@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>  	int error = 0;
>  	LIST_HEAD(net_exit_list);
>  
> +	if (!inc_net_namespaces(user_ns))
> +		return -ENFILE;
> +
>  	atomic_set(&net->count, 1);
>  	atomic_set(&net->passive, 1);
>  	net->dev_base_seq = 1;
> @@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
>  	}
>  	mutex_unlock(&net_mutex);
>  	if (rv < 0) {
> +		dec_net_namespaces(user_ns);
>  		put_user_ns(user_ns);
>  		net_drop_ns(net);
>  		return ERR_PTR(rv);
> @@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
>  	/* Finally it is safe to free my network namespace structure */
>  	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
>  		list_del_init(&net->exit_list);
> +		dec_net_namespaces(net->user_ns);
>  		put_user_ns(net->user_ns);
>  		net_drop_ns(net);
>  	}
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 10/10] mntns: Add a limit on the number of mount namespaces.
  2016-07-21 16:40         ` Eric W. Biederman
@ 2016-07-25 23:15               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:15 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Quoting Eric W. Biederman (ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org):
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Serge Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>

Thanks, Eric.

> ---
>  fs/namespace.c                 | 19 ++++++++++++++++++-
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index aabe8e397fc3..3942ae6c34f5 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -2718,9 +2718,20 @@ dput_out:
>  	return retval;
>  }
>  
> +static bool inc_mnt_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_MNT_NAMESPACES);
> +}
> +
> +static void dec_mnt_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_MNT_NAMESPACES);
> +}
> +
>  static void free_mnt_ns(struct mnt_namespace *ns)
>  {
>  	ns_free_inum(&ns->ns);
> +	dec_mnt_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	kfree(ns);
>  }
> @@ -2739,12 +2750,18 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
>  	struct mnt_namespace *new_ns;
>  	int ret;
>  
> +	if (!inc_mnt_namespaces(user_ns))
> +		return ERR_PTR(-ENFILE);
> +
>  	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
> -	if (!new_ns)
> +	if (!new_ns) {
> +		dec_mnt_namespaces(user_ns);
>  		return ERR_PTR(-ENOMEM);
> +	}
>  	ret = ns_alloc_inum(&new_ns->ns);
>  	if (ret) {
>  		kfree(new_ns);
> +		dec_mnt_namespaces(user_ns);
>  		return ERR_PTR(ret);
>  	}
>  	new_ns->ns.ops = &mntns_operations;
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index f86afa536baf..ee0e9d7b2e67 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -28,6 +28,7 @@ enum ucounts {
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
>  	UCOUNT_NET_NAMESPACES,
> +	UCOUNT_MNT_NAMESPACES,
>  	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index e326ca722ae0..f6702d84afab 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -81,6 +81,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
>  	UCOUNT_ENTRY("max_net_namespaces"),
> +	UCOUNT_ENTRY("max_mnt_namespaces"),
>  	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 10/10] mntns: Add a limit on the number of mount namespaces.
@ 2016-07-25 23:15               ` Serge E. Hallyn
  0 siblings, 0 replies; 113+ messages in thread
From: Serge E. Hallyn @ 2016-07-25 23:15 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Acked-by: Serge Hallyn <serge@hallyn.com>

Thanks, Eric.

> ---
>  fs/namespace.c                 | 19 ++++++++++++++++++-
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  3 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index aabe8e397fc3..3942ae6c34f5 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -2718,9 +2718,20 @@ dput_out:
>  	return retval;
>  }
>  
> +static bool inc_mnt_namespaces(struct user_namespace *ns)
> +{
> +	return inc_ucount(ns, UCOUNT_MNT_NAMESPACES);
> +}
> +
> +static void dec_mnt_namespaces(struct user_namespace *ns)
> +{
> +	dec_ucount(ns, UCOUNT_MNT_NAMESPACES);
> +}
> +
>  static void free_mnt_ns(struct mnt_namespace *ns)
>  {
>  	ns_free_inum(&ns->ns);
> +	dec_mnt_namespaces(ns->user_ns);
>  	put_user_ns(ns->user_ns);
>  	kfree(ns);
>  }
> @@ -2739,12 +2750,18 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
>  	struct mnt_namespace *new_ns;
>  	int ret;
>  
> +	if (!inc_mnt_namespaces(user_ns))
> +		return ERR_PTR(-ENFILE);
> +
>  	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
> -	if (!new_ns)
> +	if (!new_ns) {
> +		dec_mnt_namespaces(user_ns);
>  		return ERR_PTR(-ENOMEM);
> +	}
>  	ret = ns_alloc_inum(&new_ns->ns);
>  	if (ret) {
>  		kfree(new_ns);
> +		dec_mnt_namespaces(user_ns);
>  		return ERR_PTR(ret);
>  	}
>  	new_ns->ns.ops = &mntns_operations;
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index f86afa536baf..ee0e9d7b2e67 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -28,6 +28,7 @@ enum ucounts {
>  	UCOUNT_UTS_NAMESPACES,
>  	UCOUNT_IPC_NAMESPACES,
>  	UCOUNT_NET_NAMESPACES,
> +	UCOUNT_MNT_NAMESPACES,
>  	UCOUNT_CGROUP_NAMESPACES,
>  	UCOUNT_COUNTS,
>  };
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index e326ca722ae0..f6702d84afab 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -81,6 +81,7 @@ static struct ctl_table userns_table[] = {
>  	UCOUNT_ENTRY("max_uts_namespaces"),
>  	UCOUNT_ENTRY("max_ipc_namespaces"),
>  	UCOUNT_ENTRY("max_net_namespaces"),
> +	UCOUNT_ENTRY("max_mnt_namespaces"),
>  	UCOUNT_ENTRY("max_cgroup_namespaces"),
>  	{ }
>  };
> -- 
> 2.8.3

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
  2016-07-21 16:40             ` Eric W. Biederman
@ 2016-07-26  0:02                 ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  0:02 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

"Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> writes:

> Limit per userns sysctls to only be opened for write by a holder
> of CAP_SYS_RESOURCE.
>
> Add all of the necessary boilerplate for having per user namespace
> sysctls.

> @@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
>  
>  	do {
>  		parent = ns->parent;
> +		retire_userns_sysctls(ns);
^^^^^^^^^^ Unfortunately it is not safe to call a sleeping function here
           so this part needs to be taken back to the drawing board.
 
           Which means this change gets has to wait for next cycle.
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  		key_put(ns->persistent_keyring_register);
>  #endif

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
@ 2016-07-26  0:02                 ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  0:02 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev, linux-kernel, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, linux-api, linux-fsdevel, Jann Horn

"Eric W. Biederman" <ebiederm@xmission.com> writes:

> Limit per userns sysctls to only be opened for write by a holder
> of CAP_SYS_RESOURCE.
>
> Add all of the necessary boilerplate for having per user namespace
> sysctls.

> @@ -141,6 +215,7 @@ void free_user_ns(struct user_namespace *ns)
>  
>  	do {
>  		parent = ns->parent;
> +		retire_userns_sysctls(ns);
^^^^^^^^^^ Unfortunately it is not safe to call a sleeping function here
           so this part needs to be taken back to the drawing board.
 
           Which means this change gets has to wait for next cycle.
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  		key_put(ns->persistent_keyring_register);
>  #endif

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
  2016-07-26  0:02                 ` Eric W. Biederman
@ 2016-07-26  0:24                     ` David Miller
  -1 siblings, 0 replies; 113+ messages in thread
From: David Miller @ 2016-07-26  0:24 UTC (permalink / raw)
  To: ebiederm-aS9lmoZGLiVWk0Htik3J/w
  Cc: keescook-F7+t8E8rja9g9hUCZPvPmw, netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, luto-kltTT9wpgjJwATOyAt5JVQ,
	seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw, kernel-6AxghH7DbtA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	jann-XZ1E9jl8jIdeoWH0uzbU5w

From: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman)
Date: Mon, 25 Jul 2016 19:02:01 -0500

>            Which means this change gets has to wait for next cycle.

Ok.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
@ 2016-07-26  0:24                     ` David Miller
  0 siblings, 0 replies; 113+ messages in thread
From: David Miller @ 2016-07-26  0:24 UTC (permalink / raw)
  To: ebiederm
  Cc: containers, keescook, netdev, linux-kernel, luto, seth.forshee,
	kernel, linux-api, linux-fsdevel, jann

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Mon, 25 Jul 2016 19:02:01 -0500

>            Which means this change gets has to wait for next cycle.

Ok.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
  2016-07-26  0:24                     ` David Miller
@ 2016-07-26  0:44                         ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  0:44 UTC (permalink / raw)
  To: David Miller
  Cc: keescook-F7+t8E8rja9g9hUCZPvPmw, netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, luto-kltTT9wpgjJwATOyAt5JVQ,
	seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw, kernel-6AxghH7DbtA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	jann-XZ1E9jl8jIdeoWH0uzbU5w

David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> writes:

> From: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman)
> Date: Mon, 25 Jul 2016 19:02:01 -0500
>
>>            Which means this change gets has to wait for next cycle.
>
> Ok.

For clarity I intend to merge these changes through the userns tree,
when the issues are resolved.

I Cc'd netdev as there is a limit on the number of network namespaces in
this set which may be of interest to networking folks.

I expect there will be some follow on about adding sanity checking
limits to other kernel data structures like a maximum number of mounts
in a mount namespace, and perhaps a maximum number of routes in a
network namespace.

User namespaces have enabled unprivileged users access to a lot more
data structures and so to catch programs that go crazy we need a lot
more limits.  I believe some of those limits make sense per namespace.
As it is easy in some cases to say any more than Y number of those
per namespace is excessive.   For example a limit of 1,000,000 ipv4
routes per network namespaces is a sanity check as there are
currently 621,649 ipv4 prefixes advertized in bgp.

But that is something to worry about after the merge window.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
@ 2016-07-26  0:44                         ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  0:44 UTC (permalink / raw)
  To: David Miller
  Cc: containers, keescook, netdev, linux-kernel, luto, seth.forshee,
	kernel, linux-api, linux-fsdevel, jann

David Miller <davem@davemloft.net> writes:

> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Mon, 25 Jul 2016 19:02:01 -0500
>
>>            Which means this change gets has to wait for next cycle.
>
> Ok.

For clarity I intend to merge these changes through the userns tree,
when the issues are resolved.

I Cc'd netdev as there is a limit on the number of network namespaces in
this set which may be of interest to networking folks.

I expect there will be some follow on about adding sanity checking
limits to other kernel data structures like a maximum number of mounts
in a mount namespace, and perhaps a maximum number of routes in a
network namespace.

User namespaces have enabled unprivileged users access to a lot more
data structures and so to catch programs that go crazy we need a lot
more limits.  I believe some of those limits make sense per namespace.
As it is easy in some cases to say any more than Y number of those
per namespace is excessive.   For example a limit of 1,000,000 ipv4
routes per network namespaces is a sanity check as there are
currently 621,649 ipv4 prefixes advertized in bgp.

But that is something to worry about after the merge window.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
  2016-07-26  0:44                         ` Eric W. Biederman
@ 2016-07-26  2:58                             ` David Miller
  -1 siblings, 0 replies; 113+ messages in thread
From: David Miller @ 2016-07-26  2:58 UTC (permalink / raw)
  To: ebiederm-aS9lmoZGLiVWk0Htik3J/w
  Cc: keescook-F7+t8E8rja9g9hUCZPvPmw, netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, luto-kltTT9wpgjJwATOyAt5JVQ,
	seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw, kernel-6AxghH7DbtA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	jann-XZ1E9jl8jIdeoWH0uzbU5w

From: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman)
Date: Mon, 25 Jul 2016 19:44:50 -0500

> User namespaces have enabled unprivileged users access to a lot more
> data structures and so to catch programs that go crazy we need a lot
> more limits.  I believe some of those limits make sense per namespace.
> As it is easy in some cases to say any more than Y number of those
> per namespace is excessive.   For example a limit of 1,000,000 ipv4
> routes per network namespaces is a sanity check as there are
> currently 621,649 ipv4 prefixes advertized in bgp.

When we give a new namespace to unprivileged users, we honestly should
make the sysctl settings we give to them become "limits".  They can
further constrain the sysctl settings but may not raise them.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
@ 2016-07-26  2:58                             ` David Miller
  0 siblings, 0 replies; 113+ messages in thread
From: David Miller @ 2016-07-26  2:58 UTC (permalink / raw)
  To: ebiederm
  Cc: containers, keescook, netdev, linux-kernel, luto, seth.forshee,
	kernel, linux-api, linux-fsdevel, jann

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Mon, 25 Jul 2016 19:44:50 -0500

> User namespaces have enabled unprivileged users access to a lot more
> data structures and so to catch programs that go crazy we need a lot
> more limits.  I believe some of those limits make sense per namespace.
> As it is easy in some cases to say any more than Y number of those
> per namespace is excessive.   For example a limit of 1,000,000 ipv4
> routes per network namespaces is a sanity check as there are
> currently 621,649 ipv4 prefixes advertized in bgp.

When we give a new namespace to unprivileged users, we honestly should
make the sysctl settings we give to them become "limits".  They can
further constrain the sysctl settings but may not raise them.

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
       [not found]                             ` <20160725.195836.1881375122073635224.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
@ 2016-07-26  4:00                               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  4:00 UTC (permalink / raw)
  To: David Miller
  Cc: keescook-F7+t8E8rja9g9hUCZPvPmw, netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, luto-kltTT9wpgjJwATOyAt5JVQ,
	seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw, kernel-6AxghH7DbtA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	jann-XZ1E9jl8jIdeoWH0uzbU5w

David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> writes:

> From: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman)
> Date: Mon, 25 Jul 2016 19:44:50 -0500
>
>> User namespaces have enabled unprivileged users access to a lot more
>> data structures and so to catch programs that go crazy we need a lot
>> more limits.  I believe some of those limits make sense per namespace.
>> As it is easy in some cases to say any more than Y number of those
>> per namespace is excessive.   For example a limit of 1,000,000 ipv4
>> routes per network namespaces is a sanity check as there are
>> currently 621,649 ipv4 prefixes advertized in bgp.
>
> When we give a new namespace to unprivileged users, we honestly should
> make the sysctl settings we give to them become "limits".  They can
> further constrain the sysctl settings but may not raise them.

I won't disagree.  I was thinking in terms of global setting that
hold the limits for per namespace counters.  As we are talking sanity
check limits.

Perhaps we could get sophisticated and do something more but the simpler
we can make things and get the job done the better.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
       [not found]                             ` <20160725.195836.1881375122073635224.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
@ 2016-07-26  4:00                               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  4:00 UTC (permalink / raw)
  To: David Miller
  Cc: containers, keescook, netdev, linux-kernel, luto, seth.forshee,
	kernel, linux-api, linux-fsdevel, jann

David Miller <davem@davemloft.net> writes:

> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Mon, 25 Jul 2016 19:44:50 -0500
>
>> User namespaces have enabled unprivileged users access to a lot more
>> data structures and so to catch programs that go crazy we need a lot
>> more limits.  I believe some of those limits make sense per namespace.
>> As it is easy in some cases to say any more than Y number of those
>> per namespace is excessive.   For example a limit of 1,000,000 ipv4
>> routes per network namespaces is a sanity check as there are
>> currently 621,649 ipv4 prefixes advertized in bgp.
>
> When we give a new namespace to unprivileged users, we honestly should
> make the sysctl settings we give to them become "limits".  They can
> further constrain the sysctl settings but may not raise them.

I won't disagree.  I was thinking in terms of global setting that
hold the limits for per namespace counters.  As we are talking sanity
check limits.

Perhaps we could get sophisticated and do something more but the simpler
we can make things and get the job done the better.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 02/10] userns: Add per user namespace sysctls.
@ 2016-07-26  4:00                               ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26  4:00 UTC (permalink / raw)
  To: David Miller
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	keescook-F7+t8E8rja9g9hUCZPvPmw, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, luto-kltTT9wpgjJwATOyAt5JVQ,
	seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw, kernel-6AxghH7DbtA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	jann-XZ1E9jl8jIdeoWH0uzbU5w

David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> writes:

> From: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman)
> Date: Mon, 25 Jul 2016 19:44:50 -0500
>
>> User namespaces have enabled unprivileged users access to a lot more
>> data structures and so to catch programs that go crazy we need a lot
>> more limits.  I believe some of those limits make sense per namespace.
>> As it is easy in some cases to say any more than Y number of those
>> per namespace is excessive.   For example a limit of 1,000,000 ipv4
>> routes per network namespaces is a sanity check as there are
>> currently 621,649 ipv4 prefixes advertized in bgp.
>
> When we give a new namespace to unprivileged users, we honestly should
> make the sysctl settings we give to them become "limits".  They can
> further constrain the sysctl settings but may not raise them.

I won't disagree.  I was thinking in terms of global setting that
hold the limits for per namespace counters.  As we are talking sanity
check limits.

Perhaps we could get sophisticated and do something more but the simpler
we can make things and get the job done the better.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
  2016-07-21 16:40             ` Eric W. Biederman
@ 2016-07-26  6:01                 ` Andrei Vagin
  -1 siblings, 0 replies; 113+ messages in thread
From: Andrei Vagin @ 2016-07-26  6:01 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers, LKML,
	Andy Lutomirski, Seth Forshee, Nikolay Borisov, Linux API,
	linux-fsdevel, Jann Horn

On Thu, Jul 21, 2016 at 9:40 AM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  net/core/net_namespace.c       | 15 +++++++++++++++
>  3 files changed, 17 insertions(+)
>
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 1a3a9cb93277..f86afa536baf 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>         UCOUNT_PID_NAMESPACES,
>         UCOUNT_UTS_NAMESPACES,
>         UCOUNT_IPC_NAMESPACES,
> +       UCOUNT_NET_NAMESPACES,
>         UCOUNT_CGROUP_NAMESPACES,
>         UCOUNT_COUNTS,
>  };
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 1cf074cb47e2..e326ca722ae0 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>         UCOUNT_ENTRY("max_pid_namespaces"),
>         UCOUNT_ENTRY("max_uts_namespaces"),
>         UCOUNT_ENTRY("max_ipc_namespaces"),
> +       UCOUNT_ENTRY("max_net_namespaces"),
>         UCOUNT_ENTRY("max_cgroup_namespaces"),
>         { }
>  };
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 2c2eb1b629b1..a489f192d619 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>         return peer;
>  }
>
> +static bool inc_net_namespaces(struct user_namespace *ns)
> +{
> +       return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
> +static void dec_net_namespaces(struct user_namespace *ns)
> +{
> +       dec_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
>  /*
>   * setup_net runs the initializers for the network namespace object.
>   */
> @@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>         int error = 0;
>         LIST_HEAD(net_exit_list);
>
> +       if (!inc_net_namespaces(user_ns))
> +               return -ENFILE;

I think you need to move this check after initilizing  net->passive.
When setup_net returns an error, net_drop_ns is called:

void net_drop_ns(void *p)
{
        struct net *ns = p;
        if (ns && atomic_dec_and_test(&ns->passive))
                net_free(ns);
}

Actually, I think it would be better to make this check before net_alloc().

> +
>         atomic_set(&net->count, 1);
>         atomic_set(&net->passive, 1);
>         net->dev_base_seq = 1;
> @@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
>         }
>         mutex_unlock(&net_mutex);
>         if (rv < 0) {
> +               dec_net_namespaces(user_ns);
>                 put_user_ns(user_ns);
>                 net_drop_ns(net);
>                 return ERR_PTR(rv);
> @@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
>         /* Finally it is safe to free my network namespace structure */
>         list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
>                 list_del_init(&net->exit_list);
> +               dec_net_namespaces(net->user_ns);
>                 put_user_ns(net->user_ns);
>                 net_drop_ns(net);
>         }
> --
> 2.8.3
>
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
@ 2016-07-26  6:01                 ` Andrei Vagin
  0 siblings, 0 replies; 113+ messages in thread
From: Andrei Vagin @ 2016-07-26  6:01 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Kees Cook, netdev, LKML, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Linux API, linux-fsdevel,
	Jann Horn

On Thu, Jul 21, 2016 at 9:40 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>  include/linux/user_namespace.h |  1 +
>  kernel/user_namespace.c        |  1 +
>  net/core/net_namespace.c       | 15 +++++++++++++++
>  3 files changed, 17 insertions(+)
>
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index 1a3a9cb93277..f86afa536baf 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -27,6 +27,7 @@ enum ucounts {
>         UCOUNT_PID_NAMESPACES,
>         UCOUNT_UTS_NAMESPACES,
>         UCOUNT_IPC_NAMESPACES,
> +       UCOUNT_NET_NAMESPACES,
>         UCOUNT_CGROUP_NAMESPACES,
>         UCOUNT_COUNTS,
>  };
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index 1cf074cb47e2..e326ca722ae0 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -80,6 +80,7 @@ static struct ctl_table userns_table[] = {
>         UCOUNT_ENTRY("max_pid_namespaces"),
>         UCOUNT_ENTRY("max_uts_namespaces"),
>         UCOUNT_ENTRY("max_ipc_namespaces"),
> +       UCOUNT_ENTRY("max_net_namespaces"),
>         UCOUNT_ENTRY("max_cgroup_namespaces"),
>         { }
>  };
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 2c2eb1b629b1..a489f192d619 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>         return peer;
>  }
>
> +static bool inc_net_namespaces(struct user_namespace *ns)
> +{
> +       return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
> +static void dec_net_namespaces(struct user_namespace *ns)
> +{
> +       dec_ucount(ns, UCOUNT_NET_NAMESPACES);
> +}
> +
>  /*
>   * setup_net runs the initializers for the network namespace object.
>   */
> @@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>         int error = 0;
>         LIST_HEAD(net_exit_list);
>
> +       if (!inc_net_namespaces(user_ns))
> +               return -ENFILE;

I think you need to move this check after initilizing  net->passive.
When setup_net returns an error, net_drop_ns is called:

void net_drop_ns(void *p)
{
        struct net *ns = p;
        if (ns && atomic_dec_and_test(&ns->passive))
                net_free(ns);
}

Actually, I think it would be better to make this check before net_alloc().

> +
>         atomic_set(&net->count, 1);
>         atomic_set(&net->passive, 1);
>         net->dev_base_seq = 1;
> @@ -372,6 +385,7 @@ struct net *copy_net_ns(unsigned long flags,
>         }
>         mutex_unlock(&net_mutex);
>         if (rv < 0) {
> +               dec_net_namespaces(user_ns);
>                 put_user_ns(user_ns);
>                 net_drop_ns(net);
>                 return ERR_PTR(rv);
> @@ -444,6 +458,7 @@ static void cleanup_net(struct work_struct *work)
>         /* Finally it is safe to free my network namespace structure */
>         list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
>                 list_del_init(&net->exit_list);
> +               dec_net_namespaces(net->user_ns);
>                 put_user_ns(net->user_ns);
>                 net_drop_ns(net);
>         }
> --
> 2.8.3
>
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-21 16:39     ` Eric W. Biederman
@ 2016-07-26 10:27         ` Michael Kerrisk (man-pages)
  -1 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 10:27 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Hello Eric,

On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.
>
> The maximum sane case I can imagine is if every process is a fat
> process, so I set the maximum number of namespaces to the maximum
> number of threads.
>
> I make these limits recursive and per user namespace so that a
> usernamespace root can reduce the limits further.  If a user namespace
> root raises the limit the limit in the parent namespace will be honored.
>
> I have cut this implementation to the bare minimum needed to achieve
> these objectives.
>
> Does anyone know if there is a proper error code to return for resource
> limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
> those feel a little wrong.

ENFILE certainly seems weird. I suppose my first question is: why two
different errors?

Some alternatives you might want to consider: E2BIG, EOVERFLOW,
or (maybe) ERANGE.

Cheers,

Michael

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 10:27         ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 10:27 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: mtk.manpages, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Hello Eric,

On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.
>
> The maximum sane case I can imagine is if every process is a fat
> process, so I set the maximum number of namespaces to the maximum
> number of threads.
>
> I make these limits recursive and per user namespace so that a
> usernamespace root can reduce the limits further.  If a user namespace
> root raises the limit the limit in the parent namespace will be honored.
>
> I have cut this implementation to the bare minimum needed to achieve
> these objectives.
>
> Does anyone know if there is a proper error code to return for resource
> limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
> those feel a little wrong.

ENFILE certainly seems weird. I suppose my first question is: why two
different errors?

Some alternatives you might want to consider: E2BIG, EOVERFLOW,
or (maybe) ERANGE.

Cheers,

Michael

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-21 16:39     ` Eric W. Biederman
@ 2016-07-26 10:30         ` Michael Kerrisk (man-pages)
  -1 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 10:30 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

Hello Eric,

I realized I had a question after the last mail.

On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.

Can you say more about the second point? What exactly is the
problem that is being addressed, and how does the patch series
address it? (It would be good to have those details in the
revised commit message...)

Cheers,

Michael

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 10:30         ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 10:30 UTC (permalink / raw)
  To: Eric W. Biederman, Linux Containers
  Cc: mtk.manpages, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

Hello Eric,

I realized I had a question after the last mail.

On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>
> This patchset addresses two use cases:
> - Implement a sane upper bound on the number of namespaces.
> - Provide a way for sandboxes to limit the attack surface from
>   namespaces.

Can you say more about the second point? What exactly is the
problem that is being addressed, and how does the patch series
address it? (It would be good to have those details in the
revised commit message...)

Cheers,

Michael

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]         ` <6be70177-a81d-7ed8-d2c9-a596d4d6a165-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-07-26 15:06           ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:06 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

"Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Hello Eric,
>
> I realized I had a question after the last mail.
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>
> Can you say more about the second point? What exactly is the
> problem that is being addressed, and how does the patch series
> address it? (It would be good to have those details in the
> revised commit message...)

At some point it was reported that seccomp was not sufficient to disable
namespace creation.  I need to go back and look at that claim to see
which set of circumstances that was referring to.  Seccomp doesn't stack
so I can see why it is an issue.

The general problem is that namespaces by their nature (and especially
in combination with the user namespaces) allow unprivileged users to use
more of the kernel than a user would have access to without them.  This
in turn allows malicious users more kernel calls they can use in attempt
to find an exploitable bug.

So if you are building a sandbox/chroot jail/chromium tab or anything
like that and you know you won't be needing a kernel feature having an
easy way to disable the feature is useful for making the kernel
marginally more secure, as certain attack vectors are no longer
possible.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]         ` <6be70177-a81d-7ed8-d2c9-a596d4d6a165-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2016-07-26 15:06           ` Eric W. Biederman
@ 2016-07-26 15:06           ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:06 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

"Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> writes:

> Hello Eric,
>
> I realized I had a question after the last mail.
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>
> Can you say more about the second point? What exactly is the
> problem that is being addressed, and how does the patch series
> address it? (It would be good to have those details in the
> revised commit message...)

At some point it was reported that seccomp was not sufficient to disable
namespace creation.  I need to go back and look at that claim to see
which set of circumstances that was referring to.  Seccomp doesn't stack
so I can see why it is an issue.

The general problem is that namespaces by their nature (and especially
in combination with the user namespaces) allow unprivileged users to use
more of the kernel than a user would have access to without them.  This
in turn allows malicious users more kernel calls they can use in attempt
to find an exploitable bug.

So if you are building a sandbox/chroot jail/chromium tab or anything
like that and you know you won't be needing a kernel feature having an
easy way to disable the feature is useful for making the kernel
marginally more secure, as certain attack vectors are no longer
possible.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 15:06           ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:06 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA

"Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Hello Eric,
>
> I realized I had a question after the last mail.
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>
> Can you say more about the second point? What exactly is the
> problem that is being addressed, and how does the patch series
> address it? (It would be good to have those details in the
> revised commit message...)

At some point it was reported that seccomp was not sufficient to disable
namespace creation.  I need to go back and look at that claim to see
which set of circumstances that was referring to.  Seccomp doesn't stack
so I can see why it is an issue.

The general problem is that namespaces by their nature (and especially
in combination with the user namespaces) allow unprivileged users to use
more of the kernel than a user would have access to without them.  This
in turn allows malicious users more kernel calls they can use in attempt
to find an exploitable bug.

So if you are building a sandbox/chroot jail/chromium tab or anything
like that and you know you won't be needing a kernel feature having an
easy way to disable the feature is useful for making the kernel
marginally more secure, as certain attack vectors are no longer
possible.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 15:06           ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:06 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA

"Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Hello Eric,
>
> I realized I had a question after the last mail.
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>
> Can you say more about the second point? What exactly is the
> problem that is being addressed, and how does the patch series
> address it? (It would be good to have those details in the
> revised commit message...)

At some point it was reported that seccomp was not sufficient to disable
namespace creation.  I need to go back and look at that claim to see
which set of circumstances that was referring to.  Seccomp doesn't stack
so I can see why it is an issue.

The general problem is that namespaces by their nature (and especially
in combination with the user namespaces) allow unprivileged users to use
more of the kernel than a user would have access to without them.  This
in turn allows malicious users more kernel calls they can use in attempt
to find an exploitable bug.

So if you are building a sandbox/chroot jail/chromium tab or anything
like that and you know you won't be needing a kernel feature having an
easy way to disable the feature is useful for making the kernel
marginally more secure, as certain attack vectors are no longer
possible.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-26 10:27         ` Michael Kerrisk (man-pages)
  (?)
@ 2016-07-26 15:14             ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:14 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

"Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Hello Eric,
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>>
>> The maximum sane case I can imagine is if every process is a fat
>> process, so I set the maximum number of namespaces to the maximum
>> number of threads.
>>
>> I make these limits recursive and per user namespace so that a
>> usernamespace root can reduce the limits further.  If a user namespace
>> root raises the limit the limit in the parent namespace will be honored.
>>
>> I have cut this implementation to the bare minimum needed to achieve
>> these objectives.
>>
>> Does anyone know if there is a proper error code to return for resource
>> limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
>> those feel a little wrong.
>
> ENFILE certainly seems weird. I suppose my first question is: why two
> different errors?

EUSERS was there in the user namespace case for this condition (well
nesting depth but same principle) so it made sense to start with.  But
too many users doesn't really make sense.

ENFILE is actually slightly less insane.  It actually is about hitting a
resource limit, and seemed the most generic catchall we had.

> Some alternatives you might want to consider: E2BIG, EOVERFLOW,
> or (maybe) ERANGE.

My problem with those is typically those are about the values in fields,
not about creating too many things.  So those really don't feel right.
EACCESS or EPERM might be better but those are very very generic.

I really want ETOOMANY or something like that.

I am actually surprised given how common this pattern is, and the fact that
rlimits have existed forever, that we don't have a resource limit
exceeded error code.  I suppose it might be worth looking at Posix to
see if they have any suggestions.  Perhaps it may even be time to add an
appropriate error code to the list.

Not the most important thing out of this patch series but something
worth looking at.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 15:14             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:14 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Linux Containers, Andy Lutomirski, Jann Horn, Kees Cook,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	netdev, linux-kernel, linux-api

"Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> writes:

> Hello Eric,
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>>
>> The maximum sane case I can imagine is if every process is a fat
>> process, so I set the maximum number of namespaces to the maximum
>> number of threads.
>>
>> I make these limits recursive and per user namespace so that a
>> usernamespace root can reduce the limits further.  If a user namespace
>> root raises the limit the limit in the parent namespace will be honored.
>>
>> I have cut this implementation to the bare minimum needed to achieve
>> these objectives.
>>
>> Does anyone know if there is a proper error code to return for resource
>> limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
>> those feel a little wrong.
>
> ENFILE certainly seems weird. I suppose my first question is: why two
> different errors?

EUSERS was there in the user namespace case for this condition (well
nesting depth but same principle) so it made sense to start with.  But
too many users doesn't really make sense.

ENFILE is actually slightly less insane.  It actually is about hitting a
resource limit, and seemed the most generic catchall we had.

> Some alternatives you might want to consider: E2BIG, EOVERFLOW,
> or (maybe) ERANGE.

My problem with those is typically those are about the values in fields,
not about creating too many things.  So those really don't feel right.
EACCESS or EPERM might be better but those are very very generic.

I really want ETOOMANY or something like that.

I am actually surprised given how common this pattern is, and the fact that
rlimits have existed forever, that we don't have a resource limit
exceeded error code.  I suppose it might be worth looking at Posix to
see if they have any suggestions.  Perhaps it may even be time to add an
appropriate error code to the list.

Not the most important thing out of this patch series but something
worth looking at.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 15:14             ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 15:14 UTC (permalink / raw)
  To: Michael Kerrisk (man-pages)
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

"Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Hello Eric,
>
> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>
>> This patchset addresses two use cases:
>> - Implement a sane upper bound on the number of namespaces.
>> - Provide a way for sandboxes to limit the attack surface from
>>   namespaces.
>>
>> The maximum sane case I can imagine is if every process is a fat
>> process, so I set the maximum number of namespaces to the maximum
>> number of threads.
>>
>> I make these limits recursive and per user namespace so that a
>> usernamespace root can reduce the limits further.  If a user namespace
>> root raises the limit the limit in the parent namespace will be honored.
>>
>> I have cut this implementation to the bare minimum needed to achieve
>> these objectives.
>>
>> Does anyone know if there is a proper error code to return for resource
>> limit exceeded?  I am currently using -EUSERS or -ENFILE but both of
>> those feel a little wrong.
>
> ENFILE certainly seems weird. I suppose my first question is: why two
> different errors?

EUSERS was there in the user namespace case for this condition (well
nesting depth but same principle) so it made sense to start with.  But
too many users doesn't really make sense.

ENFILE is actually slightly less insane.  It actually is about hitting a
resource limit, and seemed the most generic catchall we had.

> Some alternatives you might want to consider: E2BIG, EOVERFLOW,
> or (maybe) ERANGE.

My problem with those is typically those are about the values in fields,
not about creating too many things.  So those really don't feel right.
EACCESS or EPERM might be better but those are very very generic.

I really want ETOOMANY or something like that.

I am actually surprised given how common this pattern is, and the fact that
rlimits have existed forever, that we don't have a resource limit
exceeded error code.  I suppose it might be worth looking at Posix to
see if they have any suggestions.  Perhaps it may even be time to add an
appropriate error code to the list.

Not the most important thing out of this patch series but something
worth looking at.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]           ` <87shuwtp4c.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-26 16:52             ` Kees Cook
  0 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-26 16:52 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Network Development, Linux Containers, LKML, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Michael Kerrisk (man-pages),
	Linux API, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> "Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>
>> Hello Eric,
>>
>> I realized I had a question after the last mail.
>>
>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>
>>> This patchset addresses two use cases:
>>> - Implement a sane upper bound on the number of namespaces.
>>> - Provide a way for sandboxes to limit the attack surface from
>>>   namespaces.
>>
>> Can you say more about the second point? What exactly is the
>> problem that is being addressed, and how does the patch series
>> address it? (It would be good to have those details in the
>> revised commit message...)
>
> At some point it was reported that seccomp was not sufficient to disable
> namespace creation.  I need to go back and look at that claim to see
> which set of circumstances that was referring to.  Seccomp doesn't stack
> so I can see why it is an issue.

seccomp does stack. The trouble usually comes from a perception that
seccomp overhead is not trivial, so setting a system-wide policy is a
bit of a large hammer for such a limitiation. Also, at the time,
seccomp could be bypasses with ptrace, but this (as of v4.8) is no
longer true.

> The general problem is that namespaces by their nature (and especially
> in combination with the user namespaces) allow unprivileged users to use
> more of the kernel than a user would have access to without them.  This
> in turn allows malicious users more kernel calls they can use in attempt
> to find an exploitable bug.
>
> So if you are building a sandbox/chroot jail/chromium tab or anything
> like that and you know you won't be needing a kernel feature having an
> easy way to disable the feature is useful for making the kernel
> marginally more secure, as certain attack vectors are no longer
> possible.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]           ` <87shuwtp4c.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
@ 2016-07-26 16:52             ` Kees Cook
  0 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-26 16:52 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Michael Kerrisk (man-pages),
	Linux Containers, Andy Lutomirski, Jann Horn, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	Network Development, LKML, Linux API

On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> writes:
>
>> Hello Eric,
>>
>> I realized I had a question after the last mail.
>>
>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>
>>> This patchset addresses two use cases:
>>> - Implement a sane upper bound on the number of namespaces.
>>> - Provide a way for sandboxes to limit the attack surface from
>>>   namespaces.
>>
>> Can you say more about the second point? What exactly is the
>> problem that is being addressed, and how does the patch series
>> address it? (It would be good to have those details in the
>> revised commit message...)
>
> At some point it was reported that seccomp was not sufficient to disable
> namespace creation.  I need to go back and look at that claim to see
> which set of circumstances that was referring to.  Seccomp doesn't stack
> so I can see why it is an issue.

seccomp does stack. The trouble usually comes from a perception that
seccomp overhead is not trivial, so setting a system-wide policy is a
bit of a large hammer for such a limitiation. Also, at the time,
seccomp could be bypasses with ptrace, but this (as of v4.8) is no
longer true.

> The general problem is that namespaces by their nature (and especially
> in combination with the user namespaces) allow unprivileged users to use
> more of the kernel than a user would have access to without them.  This
> in turn allows malicious users more kernel calls they can use in attempt
> to find an exploitable bug.
>
> So if you are building a sandbox/chroot jail/chromium tab or anything
> like that and you know you won't be needing a kernel feature having an
> easy way to disable the feature is useful for making the kernel
> marginally more secure, as certain attack vectors are no longer
> possible.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 16:52             ` Kees Cook
  0 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-26 16:52 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Michael Kerrisk (man-pages),
	Linux Containers, Andy Lutomirski, Jann Horn, Nikolay Borisov,
	Serge E. Hallyn, Seth Forshee,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Network Development, LKML,
	Linux API

On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> "Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>
>> Hello Eric,
>>
>> I realized I had a question after the last mail.
>>
>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>
>>> This patchset addresses two use cases:
>>> - Implement a sane upper bound on the number of namespaces.
>>> - Provide a way for sandboxes to limit the attack surface from
>>>   namespaces.
>>
>> Can you say more about the second point? What exactly is the
>> problem that is being addressed, and how does the patch series
>> address it? (It would be good to have those details in the
>> revised commit message...)
>
> At some point it was reported that seccomp was not sufficient to disable
> namespace creation.  I need to go back and look at that claim to see
> which set of circumstances that was referring to.  Seccomp doesn't stack
> so I can see why it is an issue.

seccomp does stack. The trouble usually comes from a perception that
seccomp overhead is not trivial, so setting a system-wide policy is a
bit of a large hammer for such a limitiation. Also, at the time,
seccomp could be bypasses with ptrace, but this (as of v4.8) is no
longer true.

> The general problem is that namespaces by their nature (and especially
> in combination with the user namespaces) allow unprivileged users to use
> more of the kernel than a user would have access to without them.  This
> in turn allows malicious users more kernel calls they can use in attempt
> to find an exploitable bug.
>
> So if you are building a sandbox/chroot jail/chromium tab or anything
> like that and you know you won't be needing a kernel feature having an
> easy way to disable the feature is useful for making the kernel
> marginally more secure, as certain attack vectors are no longer
> possible.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]             ` <CAGXu5j+ZtZAFpW0ajnnBTRZk17ez6XYF=meEJY_rF7wwgyZB5Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-07-26 17:29               ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 17:29 UTC (permalink / raw)
  To: Kees Cook
  Cc: Network Development, Linux Containers, LKML, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman, Linux API,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

On 26 July 2016 at 18:52, Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
> On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> "Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>>
>>> Hello Eric,
>>>
>>> I realized I had a question after the last mail.
>>>
>>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Can you say more about the second point? What exactly is the
>>> problem that is being addressed, and how does the patch series
>>> address it? (It would be good to have those details in the
>>> revised commit message...)
>>
>> At some point it was reported that seccomp was not sufficient to disable
>> namespace creation.  I need to go back and look at that claim to see
>> which set of circumstances that was referring to.  Seccomp doesn't stack
>> so I can see why it is an issue.
>
> seccomp does stack. The trouble usually comes from a perception that
> seccomp overhead is not trivial, so setting a system-wide policy is a
> bit of a large hammer for such a limitiation. Also, at the time,
> seccomp could be bypasses with ptrace, but this (as of v4.8) is no
> longer true.

Sounds like someone needs to send me a patch for the seccomp.2 man page?

Cheers,

Michael

-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]             ` <CAGXu5j+ZtZAFpW0ajnnBTRZk17ez6XYF=meEJY_rF7wwgyZB5Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-07-26 17:29               ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 17:29 UTC (permalink / raw)
  To: Kees Cook
  Cc: Eric W. Biederman, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	Network Development, LKML, Linux API

On 26 July 2016 at 18:52, Kees Cook <keescook@chromium.org> wrote:
> On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>> "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> writes:
>>
>>> Hello Eric,
>>>
>>> I realized I had a question after the last mail.
>>>
>>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Can you say more about the second point? What exactly is the
>>> problem that is being addressed, and how does the patch series
>>> address it? (It would be good to have those details in the
>>> revised commit message...)
>>
>> At some point it was reported that seccomp was not sufficient to disable
>> namespace creation.  I need to go back and look at that claim to see
>> which set of circumstances that was referring to.  Seccomp doesn't stack
>> so I can see why it is an issue.
>
> seccomp does stack. The trouble usually comes from a perception that
> seccomp overhead is not trivial, so setting a system-wide policy is a
> bit of a large hammer for such a limitiation. Also, at the time,
> seccomp could be bypasses with ptrace, but this (as of v4.8) is no
> longer true.

Sounds like someone needs to send me a patch for the seccomp.2 man page?

Cheers,

Michael

-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-07-26 17:29               ` Michael Kerrisk (man-pages)
  0 siblings, 0 replies; 113+ messages in thread
From: Michael Kerrisk (man-pages) @ 2016-07-26 17:29 UTC (permalink / raw)
  To: Kees Cook
  Cc: Eric W. Biederman, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Network Development, LKML,
	Linux API

On 26 July 2016 at 18:52, Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
> On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> "Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>>
>>> Hello Eric,
>>>
>>> I realized I had a question after the last mail.
>>>
>>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>>
>>>> This patchset addresses two use cases:
>>>> - Implement a sane upper bound on the number of namespaces.
>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>   namespaces.
>>>
>>> Can you say more about the second point? What exactly is the
>>> problem that is being addressed, and how does the patch series
>>> address it? (It would be good to have those details in the
>>> revised commit message...)
>>
>> At some point it was reported that seccomp was not sufficient to disable
>> namespace creation.  I need to go back and look at that claim to see
>> which set of circumstances that was referring to.  Seccomp doesn't stack
>> so I can see why it is an issue.
>
> seccomp does stack. The trouble usually comes from a perception that
> seccomp overhead is not trivial, so setting a system-wide policy is a
> bit of a large hammer for such a limitiation. Also, at the time,
> seccomp could be bypasses with ptrace, but this (as of v4.8) is no
> longer true.

Sounds like someone needs to send me a patch for the seccomp.2 man page?

Cheers,

Michael

-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
  2016-07-26  6:01                 ` Andrei Vagin
@ 2016-07-26 20:00                     ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 20:00 UTC (permalink / raw)
  To: Andrei Vagin
  Cc: Kees Cook, netdev-u79uwXL29TY76Z2rM5mHXA, Linux Containers, LKML,
	Andy Lutomirski, Seth Forshee, Nikolay Borisov, Linux API,
	linux-fsdevel, Jann Horn

Andrei Vagin <avagin-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> On Thu, Jul 21, 2016 at 9:40 AM, Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> index 2c2eb1b629b1..a489f192d619 100644
>> --- a/net/core/net_namespace.c
>> +++ b/net/core/net_namespace.c
>> @@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>>         return peer;
>>  }
>>
>> +static bool inc_net_namespaces(struct user_namespace *ns)
>> +{
>> +       return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
>> +}
>> +
>> +static void dec_net_namespaces(struct user_namespace *ns)
>> +{
>> +       dec_ucount(ns, UCOUNT_NET_NAMESPACES);
>> +}
>> +
>>  /*
>>   * setup_net runs the initializers for the network namespace object.
>>   */
>> @@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>>         int error = 0;
>>         LIST_HEAD(net_exit_list);
>>
>> +       if (!inc_net_namespaces(user_ns))
>> +               return -ENFILE;
>
> I think you need to move this check after initilizing  net->passive.
> When setup_net returns an error, net_drop_ns is called:
>
Good point.  Ouch!

> void net_drop_ns(void *p)
> {
>         struct net *ns = p;
>         if (ns && atomic_dec_and_test(&ns->passive))
>                 net_free(ns);
> }
>
> Actually, I think it would be better to make this check before
> net_alloc().

You are probably right.  I seem to be trying to be entirely too clever
putting that in setup_net so I can cover the initial network namespace.
Which really does not need to be counted.  As clearly I also goofed up
the decrement on error case as well.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 09/10] netns: Add a limit on the number of net namespaces
@ 2016-07-26 20:00                     ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-07-26 20:00 UTC (permalink / raw)
  To: Andrei Vagin
  Cc: Linux Containers, Kees Cook, netdev, LKML, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Linux API, linux-fsdevel,
	Jann Horn

Andrei Vagin <avagin@gmail.com> writes:

> On Thu, Jul 21, 2016 at 9:40 AM, Eric W. Biederman <ebiederm@xmission.com> wrote:
>> index 2c2eb1b629b1..a489f192d619 100644
>> --- a/net/core/net_namespace.c
>> +++ b/net/core/net_namespace.c
>> @@ -266,6 +266,16 @@ struct net *get_net_ns_by_id(struct net *net, int id)
>>         return peer;
>>  }
>>
>> +static bool inc_net_namespaces(struct user_namespace *ns)
>> +{
>> +       return inc_ucount(ns, UCOUNT_NET_NAMESPACES);
>> +}
>> +
>> +static void dec_net_namespaces(struct user_namespace *ns)
>> +{
>> +       dec_ucount(ns, UCOUNT_NET_NAMESPACES);
>> +}
>> +
>>  /*
>>   * setup_net runs the initializers for the network namespace object.
>>   */
>> @@ -276,6 +286,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
>>         int error = 0;
>>         LIST_HEAD(net_exit_list);
>>
>> +       if (!inc_net_namespaces(user_ns))
>> +               return -ENFILE;
>
> I think you need to move this check after initilizing  net->passive.
> When setup_net returns an error, net_drop_ns is called:
>
Good point.  Ouch!

> void net_drop_ns(void *p)
> {
>         struct net *ns = p;
>         if (ns && atomic_dec_and_test(&ns->passive))
>                 net_free(ns);
> }
>
> Actually, I think it would be better to make this check before
> net_alloc().

You are probably right.  I seem to be trying to be entirely too clever
putting that in setup_net so I can cover the initial network namespace.
Which really does not need to be counted.  As clearly I also goofed up
the decrement on error case as well.

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
       [not found]               ` <CAKgNAkhCvb-WkBbwebbBw6cfhg0FJ-SSbvK=aS1WK+BQ55eO5Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-07-26 20:44                 ` Kees Cook
  0 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-26 20:44 UTC (permalink / raw)
  To: Michael Kerrisk-manpages
  Cc: Network Development, Linux Containers, LKML, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Eric W. Biederman, Linux API,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn

On Tue, Jul 26, 2016 at 10:29 AM, Michael Kerrisk (man-pages)
<mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> On 26 July 2016 at 18:52, Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
>> On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
>> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>> "Michael Kerrisk (man-pages)" <mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>>>
>>>> Hello Eric,
>>>>
>>>> I realized I had a question after the last mail.
>>>>
>>>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>>>
>>>>> This patchset addresses two use cases:
>>>>> - Implement a sane upper bound on the number of namespaces.
>>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>>   namespaces.
>>>>
>>>> Can you say more about the second point? What exactly is the
>>>> problem that is being addressed, and how does the patch series
>>>> address it? (It would be good to have those details in the
>>>> revised commit message...)
>>>
>>> At some point it was reported that seccomp was not sufficient to disable
>>> namespace creation.  I need to go back and look at that claim to see
>>> which set of circumstances that was referring to.  Seccomp doesn't stack
>>> so I can see why it is an issue.
>>
>> seccomp does stack. The trouble usually comes from a perception that
>> seccomp overhead is not trivial, so setting a system-wide policy is a
>> bit of a large hammer for such a limitiation. Also, at the time,
>> seccomp could be bypasses with ptrace, but this (as of v4.8) is no
>> longer true.
>
> Sounds like someone needs to send me a patch for the seccomp.2 man page?

It's on my TODO list, no worries. :) I'm waiting for it to land in
Linus's tree first. It's only been in -next so far.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-26 17:29               ` Michael Kerrisk (man-pages)
  (?)
  (?)
@ 2016-07-26 20:44               ` Kees Cook
  -1 siblings, 0 replies; 113+ messages in thread
From: Kees Cook @ 2016-07-26 20:44 UTC (permalink / raw)
  To: Michael Kerrisk-manpages
  Cc: Eric W. Biederman, Linux Containers, Andy Lutomirski, Jann Horn,
	Nikolay Borisov, Serge E. Hallyn, Seth Forshee, linux-fsdevel,
	Network Development, LKML, Linux API

On Tue, Jul 26, 2016 at 10:29 AM, Michael Kerrisk (man-pages)
<mtk.manpages@gmail.com> wrote:
> On 26 July 2016 at 18:52, Kees Cook <keescook@chromium.org> wrote:
>> On Tue, Jul 26, 2016 at 8:06 AM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>> "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> writes:
>>>
>>>> Hello Eric,
>>>>
>>>> I realized I had a question after the last mail.
>>>>
>>>> On 07/21/2016 06:39 PM, Eric W. Biederman wrote:
>>>>>
>>>>> This patchset addresses two use cases:
>>>>> - Implement a sane upper bound on the number of namespaces.
>>>>> - Provide a way for sandboxes to limit the attack surface from
>>>>>   namespaces.
>>>>
>>>> Can you say more about the second point? What exactly is the
>>>> problem that is being addressed, and how does the patch series
>>>> address it? (It would be good to have those details in the
>>>> revised commit message...)
>>>
>>> At some point it was reported that seccomp was not sufficient to disable
>>> namespace creation.  I need to go back and look at that claim to see
>>> which set of circumstances that was referring to.  Seccomp doesn't stack
>>> so I can see why it is an issue.
>>
>> seccomp does stack. The trouble usually comes from a perception that
>> seccomp overhead is not trivial, so setting a system-wide policy is a
>> bit of a large hammer for such a limitiation. Also, at the time,
>> seccomp could be bypasses with ptrace, but this (as of v4.8) is no
>> longer true.
>
> Sounds like someone needs to send me a patch for the seccomp.2 man page?

It's on my TODO list, no worries. :) I'm waiting for it to land in
Linus's tree first. It's only been in -next so far.

-Kees

-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
  2016-07-21 16:39     ` Eric W. Biederman
  (?)
@ 2016-08-08 21:16         ` Eric W. Biederman
  -1 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-08-08 21:16 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Colin Walters, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Michael Kerrisk (man-pages),
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn


I won't have any more time for this until I return from vacation at the
end of the month but after a little bit of thought I think I have fixed
all of the bugs (except arguably the return value).

I have further tweaked these and made the limits per user.  Because it
occured to me that if the limits are global it is possible for one
misbehaving user to DOS another which is undesirable in princible.

I have put my updated code at:
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-08-08 21:16         ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-08-08 21:16 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, netdev, linux-kernel, Andy Lutomirski, Seth Forshee,
	Nikolay Borisov, linux-api, linux-fsdevel, Jann Horn,
	Andrei Vagin, Colin Walters, Michael Kerrisk (man-pages)


I won't have any more time for this until I return from vacation at the
end of the month but after a little bit of thought I think I have fixed
all of the bugs (except arguably the return value).

I have further tweaked these and made the limits per user.  Because it
occured to me that if the limits are global it is possible for one
misbehaving user to DOS another which is undesirable in princible.

I have put my updated code at:
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

* Re: [PATCH v2 00/10] userns: sysctl limits for namespaces
@ 2016-08-08 21:16         ` Eric W. Biederman
  0 siblings, 0 replies; 113+ messages in thread
From: Eric W. Biederman @ 2016-08-08 21:16 UTC (permalink / raw)
  To: Linux Containers
  Cc: Kees Cook, Colin Walters, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	Seth Forshee, Nikolay Borisov, Michael Kerrisk (man-pages),
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Jann Horn


I won't have any more time for this until I return from vacation at the
end of the month but after a little bit of thought I think I have fixed
all of the bugs (except arguably the return value).

I have further tweaked these and made the limits per user.  Because it
occured to me that if the limits are global it is possible for one
misbehaving user to DOS another which is undesirable in princible.

I have put my updated code at:
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-testing

Eric

^ permalink raw reply	[flat|nested] 113+ messages in thread

end of thread, other threads:[~2016-08-08 21:29 UTC | newest]

Thread overview: 113+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-20  1:13 [PATCH 0/10] userns: sysctl limits for namespaces Eric W. Biederman
     [not found] ` <8737n5dscy.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-20  1:21   ` [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup Eric W. Biederman
     [not found]     ` <20160720012138.7894-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-20  1:21       ` [PATCH 02/10] userns: Add per user namespace sysctls Eric W. Biederman
     [not found]         ` <20160720012138.7894-2-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-20  5:12           ` Serge E. Hallyn
     [not found]             ` <20160720051226.GC16885-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
2016-07-20 15:55               ` Eric W. Biederman
     [not found]                 ` <87shv49ud0.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-20 16:43                   ` Serge E. Hallyn
     [not found]                     ` <20160720164347.GA24378-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
2016-07-21 14:41                       ` Eric W. Biederman
     [not found]                         ` <87invz9hpz.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-21 15:07                           ` Serge E. Hallyn
2016-07-20  6:28           ` Nikolay Borisov
     [not found]             ` <578F1A00.6030701-6AxghH7DbtA@public.gmane.org>
2016-07-21 15:37               ` Eric W. Biederman
2016-07-20  1:21       ` [PATCH 03/10] userns: Add a limit on the number of user namespaces Eric W. Biederman
     [not found]         ` <20160720012138.7894-3-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-20  7:03           ` Nikolay Borisov
     [not found]             ` <578F2233.2040708-6AxghH7DbtA@public.gmane.org>
2016-07-21 15:23               ` Eric W. Biederman
2016-07-20  1:21       ` [PATCH 04/10] userns: Generalize the user namespace count into ucount Eric W. Biederman
2016-07-20  1:21       ` [PATCH 05/10] pidns: Add a limit on the number of pid namespaces Eric W. Biederman
2016-07-20  1:21       ` [PATCH 06/10] utsns: Add a limit on the number of uts namespaces Eric W. Biederman
2016-07-20  1:21       ` [PATCH 07/10] ipcns: Add a limit on the number of ipc namespaces Eric W. Biederman
2016-07-20  1:21       ` [PATCH 08/10] cgroupns: Add a limit on the number of cgroup namespaces Eric W. Biederman
2016-07-20  1:21       ` [PATCH 09/10] netns: Add a limit on the number of net namespaces Eric W. Biederman
2016-07-20  1:21       ` [PATCH 10/10] mntns: Add a limit on the number of mount namespaces Eric W. Biederman
2016-07-20  1:58       ` [PATCH 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup Eric W. Biederman
2016-07-20  5:06       ` Serge E. Hallyn
2016-07-20  4:02   ` [PATCH 0/10] userns: sysctl limits for namespaces Kees Cook
     [not found]     ` <CAGXu5jKWjHKxtPSGuogjR+XE+SbNWiUp2bOxugpD+hUex9QeZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-07-21 14:49       ` Eric W. Biederman
2016-07-21 16:58       ` Eric W. Biederman
     [not found]         ` <871t2n53o5.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-21 18:46           ` Kees Cook
2016-07-21 16:39   ` [PATCH v2 00/10] " Eric W. Biederman
2016-07-21 16:39     ` Eric W. Biederman
2016-07-22 13:33     ` Colin Walters
     [not found]       ` <1469194399.3817016.673814953.7581706C-2RFepEojUI2N1INw9kWLP6GC3tUn3ZHUQQ4Iyu8u01E@public.gmane.org>
2016-07-22 18:45         ` Eric W. Biederman
2016-07-22 18:45           ` Eric W. Biederman
2016-07-22 21:46           ` Kees Cook
2016-07-23  2:11             ` Eric W. Biederman
2016-07-23  2:11               ` Eric W. Biederman
2016-07-23  2:11               ` Eric W. Biederman
     [not found]             ` <CAGXu5j+X7eWggkwpBpABsFe4hqK5LN1mYJ2TH91qj3iSe6rtcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-07-23  2:11               ` Eric W. Biederman
     [not found]           ` <87poq5y0jw.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-22 21:46             ` Kees Cook
     [not found]     ` <87d1m754jc.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-21 16:40       ` [PATCH v2 01/10] sysctl: Stop implicitly passing current into sysctl_table_root.lookup Eric W. Biederman
2016-07-21 16:40         ` Eric W. Biederman
2016-07-21 16:40         ` [PATCH v2 04/10] userns: Generalize the user namespace count into ucount Eric W. Biederman
     [not found]           ` <20160721164014.17534-4-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:09             ` Serge E. Hallyn
2016-07-25 23:09           ` Serge E. Hallyn
2016-07-25 23:09             ` Serge E. Hallyn
2016-07-21 16:40         ` [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces Eric W. Biederman
     [not found]           ` <20160721164014.17534-6-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:09             ` Serge E. Hallyn
2016-07-25 23:09               ` Serge E. Hallyn
     [not found]         ` <20160721164014.17534-1-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-21 16:40           ` [PATCH v2 02/10] userns: Add per user namespace sysctls Eric W. Biederman
2016-07-21 16:40             ` Eric W. Biederman
     [not found]             ` <20160721164014.17534-2-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-26  0:02               ` Eric W. Biederman
2016-07-26  0:02                 ` Eric W. Biederman
     [not found]                 ` <878twp6zd2.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-26  0:24                   ` David Miller
2016-07-26  0:24                     ` David Miller
     [not found]                     ` <20160725.172406.352408511647766870.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2016-07-26  0:44                       ` Eric W. Biederman
2016-07-26  0:44                         ` Eric W. Biederman
     [not found]                         ` <87k2g95it9.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-26  2:58                           ` David Miller
2016-07-26  2:58                             ` David Miller
     [not found]                             ` <20160725.195836.1881375122073635224.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2016-07-26  4:00                               ` Eric W. Biederman
2016-07-26  4:00                             ` Eric W. Biederman
2016-07-26  4:00                               ` Eric W. Biederman
2016-07-21 16:40           ` [PATCH v2 03/10] userns: Add a limit on the number of user namespaces Eric W. Biederman
2016-07-21 16:40             ` Eric W. Biederman
     [not found]             ` <20160721164014.17534-3-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:05               ` Serge E. Hallyn
2016-07-25 23:05             ` Serge E. Hallyn
2016-07-21 16:40           ` [PATCH v2 04/10] userns: Generalize the user namespace count into ucount Eric W. Biederman
2016-07-21 16:40           ` [PATCH v2 05/10] pidns: Add a limit on the number of pid namespaces Eric W. Biederman
2016-07-21 16:40             ` Eric W. Biederman
     [not found]             ` <20160721164014.17534-5-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:09               ` Serge E. Hallyn
2016-07-25 23:09                 ` Serge E. Hallyn
2016-07-21 16:40           ` [PATCH v2 06/10] utsns: Add a limit on the number of uts namespaces Eric W. Biederman
2016-07-21 16:40           ` [PATCH v2 07/10] ipcns: Add a limit on the number of ipc namespaces Eric W. Biederman
2016-07-21 16:40             ` Eric W. Biederman
2016-07-25 23:10             ` Serge E. Hallyn
     [not found]             ` <20160721164014.17534-7-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:10               ` Serge E. Hallyn
2016-07-21 16:40           ` [PATCH v2 08/10] cgroupns: Add a limit on the number of cgroup namespaces Eric W. Biederman
2016-07-21 16:40             ` Eric W. Biederman
2016-07-25 23:12             ` Serge E. Hallyn
2016-07-25 23:12               ` Serge E. Hallyn
     [not found]             ` <20160721164014.17534-8-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:12               ` Serge E. Hallyn
2016-07-21 16:40           ` [PATCH v2 09/10] netns: Add a limit on the number of net namespaces Eric W. Biederman
2016-07-21 16:40             ` Eric W. Biederman
2016-07-25 23:13             ` Serge E. Hallyn
     [not found]             ` <20160721164014.17534-9-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:13               ` Serge E. Hallyn
2016-07-26  6:01               ` Andrei Vagin
2016-07-26  6:01                 ` Andrei Vagin
     [not found]                 ` <CANaxB-zfkvu9XYkuYB47tYFGLeVnWBhpgEUaSrGD4+-O8ThcNw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-07-26 20:00                   ` Eric W. Biederman
2016-07-26 20:00                     ` Eric W. Biederman
2016-07-21 16:40           ` [PATCH v2 10/10] mntns: Add a limit on the number of mount namespaces Eric W. Biederman
2016-07-21 16:40         ` Eric W. Biederman
     [not found]           ` <20160721164014.17534-10-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2016-07-25 23:15             ` Serge E. Hallyn
2016-07-25 23:15               ` Serge E. Hallyn
2016-07-22 13:33       ` [PATCH v2 00/10] userns: sysctl limits for namespaces Colin Walters
2016-07-26 10:27       ` Michael Kerrisk (man-pages)
2016-07-26 10:27         ` Michael Kerrisk (man-pages)
     [not found]         ` <94b608ae-1d06-5c41-cbd5-94e663a2163a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-07-26 15:14           ` Eric W. Biederman
2016-07-26 15:14             ` Eric W. Biederman
2016-07-26 15:14             ` Eric W. Biederman
2016-07-26 10:30       ` Michael Kerrisk (man-pages)
2016-07-26 10:30         ` Michael Kerrisk (man-pages)
     [not found]         ` <6be70177-a81d-7ed8-d2c9-a596d4d6a165-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-07-26 15:06           ` Eric W. Biederman
2016-07-26 15:06         ` Eric W. Biederman
2016-07-26 15:06           ` Eric W. Biederman
2016-07-26 15:06           ` Eric W. Biederman
2016-07-26 16:52           ` Kees Cook
2016-07-26 16:52             ` Kees Cook
     [not found]             ` <CAGXu5j+ZtZAFpW0ajnnBTRZk17ez6XYF=meEJY_rF7wwgyZB5Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-07-26 17:29               ` Michael Kerrisk (man-pages)
2016-07-26 17:29             ` Michael Kerrisk (man-pages)
2016-07-26 17:29               ` Michael Kerrisk (man-pages)
     [not found]               ` <CAKgNAkhCvb-WkBbwebbBw6cfhg0FJ-SSbvK=aS1WK+BQ55eO5Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-07-26 20:44                 ` Kees Cook
2016-07-26 20:44               ` Kees Cook
     [not found]           ` <87shuwtp4c.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-07-26 16:52             ` Kees Cook
2016-08-08 21:16       ` Eric W. Biederman
2016-08-08 21:16         ` Eric W. Biederman
2016-08-08 21:16         ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.