All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Gladkov <gladkov.alexey@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>,
	Linux Containers <containers@lists.linux-foundation.org>,
	Kernel Hardening <kernel-hardening@lists.openwall.com>
Cc: Alexey Gladkov <legion@kernel.org>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Christian Brauner <christian@brauner.io>,
	Kees Cook <keescook@chromium.org>
Subject: [RFC PATCH v1 2/4] Move the user's process counter to ucounts
Date: Mon,  2 Nov 2020 17:50:31 +0100	[thread overview]
Message-ID: <b6fb149b5884d341ededecc6f99bd1e40939b647.1604335819.git.gladkov.alexey@gmail.com> (raw)
In-Reply-To: <cover.1604335819.git.gladkov.alexey@gmail.com>

To count the number of user processes use the counter bound to the user
in the user namespace.

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/exec.c                      |  7 ++++---
 fs/io-wq.c                     | 14 +++++++++-----
 include/linux/sched/user.h     |  1 -
 include/linux/user_namespace.h |  8 ++++++++
 kernel/cred.c                  |  8 ++++----
 kernel/exit.c                  |  2 +-
 kernel/fork.c                  |  9 +++++----
 kernel/sys.c                   |  6 ++++--
 kernel/ucount.c                | 34 ++++++++++++++++++++++++++++++++++
 kernel/user.c                  |  3 ++-
 10 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..3f2071f7b9c7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1826,19 +1826,20 @@ static int __do_execve_file(int fd, struct filename *filename,
 	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
 	struct files_struct *displaced;
-	int retval;
+	int retval, processes;
 
 	if (IS_ERR(filename))
 		return PTR_ERR(filename);
 
+	processes = get_rlimit_counter(&init_user_ns, current_euid(), UCOUNT_RLIMIT_NPROC);
+
 	/*
 	 * We move the actual failure in case of RLIMIT_NPROC excess from
 	 * set*uid() to execve() because too many poorly written programs
 	 * don't check setuid() return code.  Here we additionally recheck
 	 * whether NPROC limit is still exceeded.
 	 */
-	if ((current->flags & PF_NPROC_EXCEEDED) &&
-	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+	if ((current->flags & PF_NPROC_EXCEEDED) && processes > rlimit(RLIMIT_NPROC)) {
 		retval = -EAGAIN;
 		goto out_ret;
 	}
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 47c5f3aeb460..6170aee986db 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -17,6 +17,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/fs_struct.h>
 #include <linux/task_work.h>
+#include <linux/user_namespace.h>
 
 #include "io-wq.h"
 
@@ -216,7 +217,7 @@ static void io_worker_exit(struct io_worker *worker)
 	if (worker->flags & IO_WORKER_F_RUNNING)
 		atomic_dec(&acct->nr_running);
 	if (!(worker->flags & IO_WORKER_F_BOUND))
-		atomic_dec(&wqe->wq->user->processes);
+		dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
 	worker->flags = 0;
 	preempt_enable();
 
@@ -349,12 +350,12 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
 			worker->flags |= IO_WORKER_F_BOUND;
 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
-			atomic_dec(&wqe->wq->user->processes);
+			dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
 		} else {
 			worker->flags &= ~IO_WORKER_F_BOUND;
 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
-			atomic_inc(&wqe->wq->user->processes);
+			inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
 		}
 		io_wqe_inc_running(wqe, worker);
 	 }
@@ -671,7 +672,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 	spin_unlock_irq(&wqe->lock);
 
 	if (index == IO_WQ_ACCT_UNBOUND)
-		atomic_inc(&wq->user->processes);
+		inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC);
 
 	wake_up_process(worker->task);
 	return true;
@@ -754,6 +755,7 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
 			    struct io_wq_work *work)
 {
 	bool free_worker;
+	int processes;
 
 	if (!(work->flags & IO_WQ_WORK_UNBOUND))
 		return true;
@@ -766,7 +768,9 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
 	if (free_worker)
 		return true;
 
-	if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
+	processes = get_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
+
+	if (processes >= acct->max_workers &&
 	    !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
 		return false;
 
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 917d88edb7b9..38e122bc3d07 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index fc75af812d73..6d9d180b2c9d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,9 +50,13 @@ enum ucount_type {
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
 #endif
+	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_COUNTS,
 };
 
+#define UCOUNT_MIN_RLIMIT UCOUNT_RLIMIT_NPROC
+#define UCOUNT_MAX_RLIMIT UCOUNT_RLIMIT_NPROC
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -104,6 +108,10 @@ void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
 
+long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c651..b6694700e760 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -351,7 +351,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		kdebug("share_creds(%p{%d,%d})",
 		       p->cred, atomic_read(&p->cred->usage),
 		       read_cred_subscribers(p->cred));
-		atomic_inc(&p->cred->user->processes);
+		inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC);
 		return 0;
 	}
 
@@ -384,7 +384,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	}
 #endif
 
-	atomic_inc(&new->user->processes);
+	inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
 	p->cred = p->real_cred = get_cred(new);
 	alter_cred_subscribers(new, 2);
 	validate_creds(new);
@@ -486,11 +486,11 @@ int commit_creds(struct cred *new)
 	 */
 	alter_cred_subscribers(new, 2);
 	if (new->user != old->user)
-		atomic_inc(&new->user->processes);
+		inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
-		atomic_dec(&old->user->processes);
+		dec_rlimit_counter(&init_user_ns, old->euid, UCOUNT_RLIMIT_NPROC);
 	alter_cred_subscribers(old, -2);
 
 	/* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f28103..5a0d7dd1ad64 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -187,7 +187,7 @@ void release_task(struct task_struct *p)
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
-	atomic_dec(&__task_cred(p)->user->processes);
+	dec_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC);
 	rcu_read_unlock();
 
 	cgroup_release(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493203ae..2bc8bd45179f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1844,7 +1844,7 @@ static __latent_entropy struct task_struct *copy_process(
 					int node,
 					struct kernel_clone_args *args)
 {
-	int pidfd = -1, retval;
+	int pidfd = -1, retval, processes;
 	struct task_struct *p;
 	struct multiprocess_signals delayed;
 	struct file *pidfile = NULL;
@@ -1958,9 +1958,10 @@ static __latent_entropy struct task_struct *copy_process(
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
+	processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid,
+			UCOUNT_RLIMIT_NPROC);
 	retval = -EAGAIN;
-	if (atomic_read(&p->real_cred->user->processes) >=
-			task_rlimit(p, RLIMIT_NPROC)) {
+	if (processes >= task_rlimit(p, RLIMIT_NPROC)) {
 		if (p->real_cred->user != INIT_USER &&
 		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
 			goto bad_fork_free;
@@ -2361,7 +2362,7 @@ static __latent_entropy struct task_struct *copy_process(
 #endif
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
-	atomic_dec(&p->cred->user->processes);
+	dec_rlimit_counter(&init_user_ns, p->cred->euid, UCOUNT_RLIMIT_NPROC);
 	exit_creds(p);
 bad_fork_free:
 	p->state = TASK_DEAD;
diff --git a/kernel/sys.c b/kernel/sys.c
index 00a96746e28a..db780ec32d86 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -461,11 +461,14 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
 static int set_user(struct cred *new)
 {
 	struct user_struct *new_user;
+	int processes;
 
 	new_user = alloc_uid(new->uid);
 	if (!new_user)
 		return -EAGAIN;
 
+	processes = get_rlimit_counter(&init_user_ns, new_user->uid, UCOUNT_RLIMIT_NPROC);
+
 	/*
 	 * We don't fail in case of NPROC limit excess here because too many
 	 * poorly written programs don't check set*uid() return code, assuming
@@ -473,8 +476,7 @@ static int set_user(struct cred *new)
 	 * for programs doing set*uid()+execve() by harmlessly deferring the
 	 * failure to the execve() stage.
 	 */
-	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
-			new_user != INIT_USER)
+	if (processes >= rlimit(RLIMIT_NPROC) && new_user != INIT_USER)
 		current->flags |= PF_NPROC_EXCEEDED;
 	else
 		current->flags &= ~PF_NPROC_EXCEEDED;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 7b2bca8582ef..e00d644e4ca5 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -74,6 +74,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -222,6 +223,39 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 	put_ucounts(ucounts);
 }
 
+long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type)
+{
+	long v;
+	struct ucounts *ucounts = get_ucounts(ns, uid);
+	if (!ucounts)
+		return LONG_MAX;
+	v = atomic_long_read(&ucounts->ucount[type]);
+	put_ucounts(ucounts);
+	return v;
+}
+
+struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid,
+		enum ucount_type type)
+{
+	if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT)
+		return NULL;
+
+	return inc_ucount(ns, uid, type);
+}
+
+void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type)
+{
+	struct ucounts *ucounts;
+
+	if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT)
+		return;
+
+	ucounts = get_ucounts(ns, uid);
+
+	if (ucounts)
+		dec_ucount(ucounts, type);
+}
+
 static __init int user_namespace_sysctl_init(void)
 {
 #ifdef CONFIG_SYSCTL
diff --git a/kernel/user.c b/kernel/user.c
index b1635d94a1f2..5bb75ebdef4f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.processes	= ATOMIC_INIT(1),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
@@ -224,6 +223,8 @@ static int __init uid_cache_init(void)
 	uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
 	spin_unlock_irq(&uidhash_lock);
 
+	inc_rlimit_counter(&init_user_ns, GLOBAL_ROOT_UID, UCOUNT_RLIMIT_NPROC);
+
 	return 0;
 }
 subsys_initcall(uid_cache_init);
-- 
2.25.4

_______________________________________________
Containers mailing list
Containers@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/containers

WARNING: multiple messages have this Message-ID (diff)
From: Alexey Gladkov <gladkov.alexey@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>,
	Linux Containers <containers@lists.linux-foundation.org>,
	Kernel Hardening <kernel-hardening@lists.openwall.com>
Cc: Alexey Gladkov <legion@kernel.org>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Kees Cook <keescook@chromium.org>,
	Christian Brauner <christian@brauner.io>
Subject: [RFC PATCH v1 2/4] Move the user's process counter to ucounts
Date: Mon,  2 Nov 2020 17:50:31 +0100	[thread overview]
Message-ID: <b6fb149b5884d341ededecc6f99bd1e40939b647.1604335819.git.gladkov.alexey@gmail.com> (raw)
In-Reply-To: <cover.1604335819.git.gladkov.alexey@gmail.com>

To count the number of user processes use the counter bound to the user
in the user namespace.

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 fs/exec.c                      |  7 ++++---
 fs/io-wq.c                     | 14 +++++++++-----
 include/linux/sched/user.h     |  1 -
 include/linux/user_namespace.h |  8 ++++++++
 kernel/cred.c                  |  8 ++++----
 kernel/exit.c                  |  2 +-
 kernel/fork.c                  |  9 +++++----
 kernel/sys.c                   |  6 ++++--
 kernel/ucount.c                | 34 ++++++++++++++++++++++++++++++++++
 kernel/user.c                  |  3 ++-
 10 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..3f2071f7b9c7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1826,19 +1826,20 @@ static int __do_execve_file(int fd, struct filename *filename,
 	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
 	struct files_struct *displaced;
-	int retval;
+	int retval, processes;
 
 	if (IS_ERR(filename))
 		return PTR_ERR(filename);
 
+	processes = get_rlimit_counter(&init_user_ns, current_euid(), UCOUNT_RLIMIT_NPROC);
+
 	/*
 	 * We move the actual failure in case of RLIMIT_NPROC excess from
 	 * set*uid() to execve() because too many poorly written programs
 	 * don't check setuid() return code.  Here we additionally recheck
 	 * whether NPROC limit is still exceeded.
 	 */
-	if ((current->flags & PF_NPROC_EXCEEDED) &&
-	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+	if ((current->flags & PF_NPROC_EXCEEDED) && processes > rlimit(RLIMIT_NPROC)) {
 		retval = -EAGAIN;
 		goto out_ret;
 	}
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 47c5f3aeb460..6170aee986db 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -17,6 +17,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/fs_struct.h>
 #include <linux/task_work.h>
+#include <linux/user_namespace.h>
 
 #include "io-wq.h"
 
@@ -216,7 +217,7 @@ static void io_worker_exit(struct io_worker *worker)
 	if (worker->flags & IO_WORKER_F_RUNNING)
 		atomic_dec(&acct->nr_running);
 	if (!(worker->flags & IO_WORKER_F_BOUND))
-		atomic_dec(&wqe->wq->user->processes);
+		dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
 	worker->flags = 0;
 	preempt_enable();
 
@@ -349,12 +350,12 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
 			worker->flags |= IO_WORKER_F_BOUND;
 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
-			atomic_dec(&wqe->wq->user->processes);
+			dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
 		} else {
 			worker->flags &= ~IO_WORKER_F_BOUND;
 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
-			atomic_inc(&wqe->wq->user->processes);
+			inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
 		}
 		io_wqe_inc_running(wqe, worker);
 	 }
@@ -671,7 +672,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 	spin_unlock_irq(&wqe->lock);
 
 	if (index == IO_WQ_ACCT_UNBOUND)
-		atomic_inc(&wq->user->processes);
+		inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC);
 
 	wake_up_process(worker->task);
 	return true;
@@ -754,6 +755,7 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
 			    struct io_wq_work *work)
 {
 	bool free_worker;
+	int processes;
 
 	if (!(work->flags & IO_WQ_WORK_UNBOUND))
 		return true;
@@ -766,7 +768,9 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
 	if (free_worker)
 		return true;
 
-	if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
+	processes = get_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
+
+	if (processes >= acct->max_workers &&
 	    !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
 		return false;
 
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 917d88edb7b9..38e122bc3d07 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index fc75af812d73..6d9d180b2c9d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,9 +50,13 @@ enum ucount_type {
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
 #endif
+	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_COUNTS,
 };
 
+#define UCOUNT_MIN_RLIMIT UCOUNT_RLIMIT_NPROC
+#define UCOUNT_MAX_RLIMIT UCOUNT_RLIMIT_NPROC
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -104,6 +108,10 @@ void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
 
+long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c651..b6694700e760 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -351,7 +351,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		kdebug("share_creds(%p{%d,%d})",
 		       p->cred, atomic_read(&p->cred->usage),
 		       read_cred_subscribers(p->cred));
-		atomic_inc(&p->cred->user->processes);
+		inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC);
 		return 0;
 	}
 
@@ -384,7 +384,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	}
 #endif
 
-	atomic_inc(&new->user->processes);
+	inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
 	p->cred = p->real_cred = get_cred(new);
 	alter_cred_subscribers(new, 2);
 	validate_creds(new);
@@ -486,11 +486,11 @@ int commit_creds(struct cred *new)
 	 */
 	alter_cred_subscribers(new, 2);
 	if (new->user != old->user)
-		atomic_inc(&new->user->processes);
+		inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
-		atomic_dec(&old->user->processes);
+		dec_rlimit_counter(&init_user_ns, old->euid, UCOUNT_RLIMIT_NPROC);
 	alter_cred_subscribers(old, -2);
 
 	/* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f28103..5a0d7dd1ad64 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -187,7 +187,7 @@ void release_task(struct task_struct *p)
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
-	atomic_dec(&__task_cred(p)->user->processes);
+	dec_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC);
 	rcu_read_unlock();
 
 	cgroup_release(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493203ae..2bc8bd45179f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1844,7 +1844,7 @@ static __latent_entropy struct task_struct *copy_process(
 					int node,
 					struct kernel_clone_args *args)
 {
-	int pidfd = -1, retval;
+	int pidfd = -1, retval, processes;
 	struct task_struct *p;
 	struct multiprocess_signals delayed;
 	struct file *pidfile = NULL;
@@ -1958,9 +1958,10 @@ static __latent_entropy struct task_struct *copy_process(
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
+	processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid,
+			UCOUNT_RLIMIT_NPROC);
 	retval = -EAGAIN;
-	if (atomic_read(&p->real_cred->user->processes) >=
-			task_rlimit(p, RLIMIT_NPROC)) {
+	if (processes >= task_rlimit(p, RLIMIT_NPROC)) {
 		if (p->real_cred->user != INIT_USER &&
 		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
 			goto bad_fork_free;
@@ -2361,7 +2362,7 @@ static __latent_entropy struct task_struct *copy_process(
 #endif
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
-	atomic_dec(&p->cred->user->processes);
+	dec_rlimit_counter(&init_user_ns, p->cred->euid, UCOUNT_RLIMIT_NPROC);
 	exit_creds(p);
 bad_fork_free:
 	p->state = TASK_DEAD;
diff --git a/kernel/sys.c b/kernel/sys.c
index 00a96746e28a..db780ec32d86 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -461,11 +461,14 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
 static int set_user(struct cred *new)
 {
 	struct user_struct *new_user;
+	int processes;
 
 	new_user = alloc_uid(new->uid);
 	if (!new_user)
 		return -EAGAIN;
 
+	processes = get_rlimit_counter(&init_user_ns, new_user->uid, UCOUNT_RLIMIT_NPROC);
+
 	/*
 	 * We don't fail in case of NPROC limit excess here because too many
 	 * poorly written programs don't check set*uid() return code, assuming
@@ -473,8 +476,7 @@ static int set_user(struct cred *new)
 	 * for programs doing set*uid()+execve() by harmlessly deferring the
 	 * failure to the execve() stage.
 	 */
-	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
-			new_user != INIT_USER)
+	if (processes >= rlimit(RLIMIT_NPROC) && new_user != INIT_USER)
 		current->flags |= PF_NPROC_EXCEEDED;
 	else
 		current->flags &= ~PF_NPROC_EXCEEDED;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 7b2bca8582ef..e00d644e4ca5 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -74,6 +74,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -222,6 +223,39 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 	put_ucounts(ucounts);
 }
 
+long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type)
+{
+	long v;
+	struct ucounts *ucounts = get_ucounts(ns, uid);
+	if (!ucounts)
+		return LONG_MAX;
+	v = atomic_long_read(&ucounts->ucount[type]);
+	put_ucounts(ucounts);
+	return v;
+}
+
+struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid,
+		enum ucount_type type)
+{
+	if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT)
+		return NULL;
+
+	return inc_ucount(ns, uid, type);
+}
+
+void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum ucount_type type)
+{
+	struct ucounts *ucounts;
+
+	if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT)
+		return;
+
+	ucounts = get_ucounts(ns, uid);
+
+	if (ucounts)
+		dec_ucount(ucounts, type);
+}
+
 static __init int user_namespace_sysctl_init(void)
 {
 #ifdef CONFIG_SYSCTL
diff --git a/kernel/user.c b/kernel/user.c
index b1635d94a1f2..5bb75ebdef4f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.processes	= ATOMIC_INIT(1),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
@@ -224,6 +223,8 @@ static int __init uid_cache_init(void)
 	uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
 	spin_unlock_irq(&uidhash_lock);
 
+	inc_rlimit_counter(&init_user_ns, GLOBAL_ROOT_UID, UCOUNT_RLIMIT_NPROC);
+
 	return 0;
 }
 subsys_initcall(uid_cache_init);
-- 
2.25.4


  parent reply	other threads:[~2020-11-02 17:01 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-02 16:50 [RFC PATCH v1 0/4] Per user namespace rlimits Alexey Gladkov
2020-11-02 16:50 ` Alexey Gladkov
2020-11-02 16:50 ` [RFC PATCH v1 1/4] Increase size of ucounts to atomic_long_t Alexey Gladkov
2020-11-02 16:50   ` Alexey Gladkov
2020-11-02 18:03   ` Christian Brauner
2020-11-02 18:03     ` Christian Brauner
2020-11-02 21:23     ` Alexey Gladkov
2020-11-02 21:23       ` Alexey Gladkov
2020-11-02 16:50 ` Alexey Gladkov [this message]
2020-11-02 16:50   ` [RFC PATCH v1 2/4] Move the user's process counter to ucounts Alexey Gladkov
2020-11-02 16:50 ` [RFC PATCH v1 3/4] Do not allow fork if RLIMIT_NPROC is exceeded in the user namespace tree Alexey Gladkov
2020-11-02 16:50   ` Alexey Gladkov
2020-11-02 16:50 ` [RFC PATCH v1 4/4] Allow to change the user namespace in which user rlimits are counted Alexey Gladkov
2020-11-02 16:50   ` Alexey Gladkov
2020-11-02 17:10   ` Jann Horn via Containers
2020-11-02 17:10     ` Jann Horn
2020-11-02 17:10     ` Jann Horn
2020-11-02 17:30     ` Alexey Gladkov
2020-11-02 17:30       ` Alexey Gladkov
2020-11-04 10:03   ` Sargun Dhillon
2020-11-04 10:03     ` Sargun Dhillon
2020-11-04 16:21     ` Alexey Gladkov
2020-11-04 16:21       ` Alexey Gladkov
2020-11-02 17:55 ` [RFC PATCH v1 0/4] Per user namespace rlimits Christian Brauner
2020-11-02 17:55   ` Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b6fb149b5884d341ededecc6f99bd1e40939b647.1604335819.git.gladkov.alexey@gmail.com \
    --to=gladkov.alexey@gmail.com \
    --cc=christian@brauner.io \
    --cc=containers@lists.linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=keescook@chromium.org \
    --cc=kernel-hardening@lists.openwall.com \
    --cc=legion@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.