From: Alexey Gladkov <gladkov.alexey@gmail.com> To: LKML <linux-kernel@vger.kernel.org>, Linux Containers <containers@lists.linux-foundation.org>, Kernel Hardening <kernel-hardening@lists.openwall.com> Cc: Alexey Gladkov <legion@kernel.org>, "Eric W . Biederman" <ebiederm@xmission.com>, Christian Brauner <christian@brauner.io>, Kees Cook <keescook@chromium.org> Subject: [RFC PATCH v1 3/4] Do not allow fork if RLIMIT_NPROC is exceeded in the user namespace tree Date: Mon, 2 Nov 2020 17:50:32 +0100 [thread overview] Message-ID: <a6a6b015b18b83eeaa5b237b4377f178015847c9.1604335819.git.gladkov.alexey@gmail.com> (raw) In-Reply-To: <cover.1604335819.git.gladkov.alexey@gmail.com> Since RLIMIT_NPROC is counted per user namespace, the existing over-limit check in the current user namespace is not sufficient. We must consider exceeding this limit in parent user namespaces. Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com> --- fs/exec.c | 6 ++++++ fs/io-wq.c | 12 ++++++++---- include/linux/sched.h | 3 +++ kernel/cred.c | 17 ++++++++++------- kernel/fork.c | 6 +++++- 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 3f2071f7b9c7..c45dfc716394 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1831,6 +1831,12 @@ static int __do_execve_file(int fd, struct filename *filename, if (IS_ERR(filename)) return PTR_ERR(filename); + if (current->flags & PF_NPROC_UNS_EXCEEDED) { + current->flags &= ~PF_NPROC_UNS_EXCEEDED; + retval = -EAGAIN; + goto out_ret; + } + processes = get_rlimit_counter(&init_user_ns, current_euid(), UCOUNT_RLIMIT_NPROC); /* diff --git a/fs/io-wq.c b/fs/io-wq.c index 6170aee986db..c3b0843abc9b 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -352,10 +352,11 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++; dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); } else { + if (!inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC)) + return; worker->flags &= ~IO_WORKER_F_BOUND; wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++; wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--; - inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); } io_wqe_inc_running(wqe, worker); } @@ -660,6 +661,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) return false; } + if (index == IO_WQ_ACCT_UNBOUND && + !inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC)) { + kfree(worker); + return false; + } + spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); list_add_tail_rcu(&worker->all_list, &wqe->all_list); @@ -671,9 +678,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) acct->nr_workers++; spin_unlock_irq(&wqe->lock); - if (index == IO_WQ_ACCT_UNBOUND) - inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC); - wake_up_process(worker->task); return true; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 683372943093..c3cf034b4aa7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1506,6 +1506,9 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NPROC_UNS_EXCEEDED 0x01000000 /* It means that we have reached the RLIMIT_NPROC + * in the current user namespace or in one of + * the parent's and we can't fork */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ diff --git a/kernel/cred.c b/kernel/cred.c index b6694700e760..748704db1f6b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -345,13 +345,14 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif clone_flags & CLONE_THREAD ) { + if (!inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC)) + return -EACCES; p->real_cred = get_cred(p->cred); get_cred(p->cred); alter_cred_subscribers(p->cred, 2); kdebug("share_creds(%p{%d,%d})", p->cred, atomic_read(&p->cred->usage), read_cred_subscribers(p->cred)); - inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC); return 0; } @@ -384,7 +385,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) } #endif - inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC); + if (!inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC)) + return -EACCES; p->cred = p->real_cred = get_cred(new); alter_cred_subscribers(new, 2); validate_creds(new); @@ -480,13 +482,14 @@ int commit_creds(struct cred *new) if (!gid_eq(new->fsgid, old->fsgid)) key_fsgid_changed(new); - /* do it - * RLIMIT_NPROC limits on user->processes have already been checked - * in set_user(). + /* + * The RLIMIT_NPROC limits have already been checked in set_user(), but + * perhaps this limit is exceeded in the parent user namespace. */ alter_cred_subscribers(new, 2); - if (new->user != old->user) - inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC); + if (new->user != old->user && + !inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC)) + task->flags |= PF_NPROC_UNS_EXCEEDED; rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) diff --git a/kernel/fork.c b/kernel/fork.c index 2bc8bd45179f..d2b28634dc8f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1958,9 +1958,13 @@ static __latent_entropy struct task_struct *copy_process( DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + retval = -EAGAIN; + if (current->flags & PF_NPROC_UNS_EXCEEDED) { + current->flags &= ~PF_NPROC_UNS_EXCEEDED; + goto bad_fork_free; + } processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid, UCOUNT_RLIMIT_NPROC); - retval = -EAGAIN; if (processes >= task_rlimit(p, RLIMIT_NPROC)) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) -- 2.25.4 _______________________________________________ Containers mailing list Containers@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/containers
WARNING: multiple messages have this Message-ID (diff)
From: Alexey Gladkov <gladkov.alexey@gmail.com> To: LKML <linux-kernel@vger.kernel.org>, Linux Containers <containers@lists.linux-foundation.org>, Kernel Hardening <kernel-hardening@lists.openwall.com> Cc: Alexey Gladkov <legion@kernel.org>, "Eric W . Biederman" <ebiederm@xmission.com>, Kees Cook <keescook@chromium.org>, Christian Brauner <christian@brauner.io> Subject: [RFC PATCH v1 3/4] Do not allow fork if RLIMIT_NPROC is exceeded in the user namespace tree Date: Mon, 2 Nov 2020 17:50:32 +0100 [thread overview] Message-ID: <a6a6b015b18b83eeaa5b237b4377f178015847c9.1604335819.git.gladkov.alexey@gmail.com> (raw) In-Reply-To: <cover.1604335819.git.gladkov.alexey@gmail.com> Since RLIMIT_NPROC is counted per user namespace, the existing over-limit check in the current user namespace is not sufficient. We must consider exceeding this limit in parent user namespaces. Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com> --- fs/exec.c | 6 ++++++ fs/io-wq.c | 12 ++++++++---- include/linux/sched.h | 3 +++ kernel/cred.c | 17 ++++++++++------- kernel/fork.c | 6 +++++- 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 3f2071f7b9c7..c45dfc716394 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1831,6 +1831,12 @@ static int __do_execve_file(int fd, struct filename *filename, if (IS_ERR(filename)) return PTR_ERR(filename); + if (current->flags & PF_NPROC_UNS_EXCEEDED) { + current->flags &= ~PF_NPROC_UNS_EXCEEDED; + retval = -EAGAIN; + goto out_ret; + } + processes = get_rlimit_counter(&init_user_ns, current_euid(), UCOUNT_RLIMIT_NPROC); /* diff --git a/fs/io-wq.c b/fs/io-wq.c index 6170aee986db..c3b0843abc9b 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -352,10 +352,11 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++; dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); } else { + if (!inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC)) + return; worker->flags &= ~IO_WORKER_F_BOUND; wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++; wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--; - inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC); } io_wqe_inc_running(wqe, worker); } @@ -660,6 +661,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) return false; } + if (index == IO_WQ_ACCT_UNBOUND && + !inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC)) { + kfree(worker); + return false; + } + spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); list_add_tail_rcu(&worker->all_list, &wqe->all_list); @@ -671,9 +678,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) acct->nr_workers++; spin_unlock_irq(&wqe->lock); - if (index == IO_WQ_ACCT_UNBOUND) - inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC); - wake_up_process(worker->task); return true; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 683372943093..c3cf034b4aa7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1506,6 +1506,9 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NPROC_UNS_EXCEEDED 0x01000000 /* It means that we have reached the RLIMIT_NPROC + * in the current user namespace or in one of + * the parent's and we can't fork */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ diff --git a/kernel/cred.c b/kernel/cred.c index b6694700e760..748704db1f6b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -345,13 +345,14 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif clone_flags & CLONE_THREAD ) { + if (!inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC)) + return -EACCES; p->real_cred = get_cred(p->cred); get_cred(p->cred); alter_cred_subscribers(p->cred, 2); kdebug("share_creds(%p{%d,%d})", p->cred, atomic_read(&p->cred->usage), read_cred_subscribers(p->cred)); - inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC); return 0; } @@ -384,7 +385,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) } #endif - inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC); + if (!inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC)) + return -EACCES; p->cred = p->real_cred = get_cred(new); alter_cred_subscribers(new, 2); validate_creds(new); @@ -480,13 +482,14 @@ int commit_creds(struct cred *new) if (!gid_eq(new->fsgid, old->fsgid)) key_fsgid_changed(new); - /* do it - * RLIMIT_NPROC limits on user->processes have already been checked - * in set_user(). + /* + * The RLIMIT_NPROC limits have already been checked in set_user(), but + * perhaps this limit is exceeded in the parent user namespace. */ alter_cred_subscribers(new, 2); - if (new->user != old->user) - inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC); + if (new->user != old->user && + !inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC)) + task->flags |= PF_NPROC_UNS_EXCEEDED; rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) diff --git a/kernel/fork.c b/kernel/fork.c index 2bc8bd45179f..d2b28634dc8f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1958,9 +1958,13 @@ static __latent_entropy struct task_struct *copy_process( DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + retval = -EAGAIN; + if (current->flags & PF_NPROC_UNS_EXCEEDED) { + current->flags &= ~PF_NPROC_UNS_EXCEEDED; + goto bad_fork_free; + } processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid, UCOUNT_RLIMIT_NPROC); - retval = -EAGAIN; if (processes >= task_rlimit(p, RLIMIT_NPROC)) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) -- 2.25.4
next prev parent reply other threads:[~2020-11-02 17:01 UTC|newest] Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-11-02 16:50 [RFC PATCH v1 0/4] Per user namespace rlimits Alexey Gladkov 2020-11-02 16:50 ` Alexey Gladkov 2020-11-02 16:50 ` [RFC PATCH v1 1/4] Increase size of ucounts to atomic_long_t Alexey Gladkov 2020-11-02 16:50 ` Alexey Gladkov 2020-11-02 18:03 ` Christian Brauner 2020-11-02 18:03 ` Christian Brauner 2020-11-02 21:23 ` Alexey Gladkov 2020-11-02 21:23 ` Alexey Gladkov 2020-11-02 16:50 ` [RFC PATCH v1 2/4] Move the user's process counter to ucounts Alexey Gladkov 2020-11-02 16:50 ` Alexey Gladkov 2020-11-02 16:50 ` Alexey Gladkov [this message] 2020-11-02 16:50 ` [RFC PATCH v1 3/4] Do not allow fork if RLIMIT_NPROC is exceeded in the user namespace tree Alexey Gladkov 2020-11-02 16:50 ` [RFC PATCH v1 4/4] Allow to change the user namespace in which user rlimits are counted Alexey Gladkov 2020-11-02 16:50 ` Alexey Gladkov 2020-11-02 17:10 ` Jann Horn via Containers 2020-11-02 17:10 ` Jann Horn 2020-11-02 17:10 ` Jann Horn 2020-11-02 17:30 ` Alexey Gladkov 2020-11-02 17:30 ` Alexey Gladkov 2020-11-04 10:03 ` Sargun Dhillon 2020-11-04 10:03 ` Sargun Dhillon 2020-11-04 16:21 ` Alexey Gladkov 2020-11-04 16:21 ` Alexey Gladkov 2020-11-02 17:55 ` [RFC PATCH v1 0/4] Per user namespace rlimits Christian Brauner 2020-11-02 17:55 ` Christian Brauner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=a6a6b015b18b83eeaa5b237b4377f178015847c9.1604335819.git.gladkov.alexey@gmail.com \ --to=gladkov.alexey@gmail.com \ --cc=christian@brauner.io \ --cc=containers@lists.linux-foundation.org \ --cc=ebiederm@xmission.com \ --cc=keescook@chromium.org \ --cc=kernel-hardening@lists.openwall.com \ --cc=legion@kernel.org \ --cc=linux-kernel@vger.kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.