From: Alexey Gladkov <gladkov.alexey@gmail.com> To: LKML <linux-kernel@vger.kernel.org>, Kernel Hardening <kernel-hardening@lists.openwall.com>, Linux Containers <containers@lists.linux-foundation.org>, linux-mm@kvack.org Cc: Jens Axboe <axboe@kernel.dk>, Kees Cook <keescook@chromium.org>, Jann Horn <jannh@google.com>, Linus Torvalds <torvalds@linux-foundation.org>, Oleg Nesterov <oleg@redhat.com>, "Eric W . Biederman" <ebiederm@xmission.com>, Andrew Morton <akpm@linux-foundation.org>, Alexey Gladkov <legion@kernel.org> Subject: [PATCH v10 6/9] Reimplement RLIMIT_SIGPENDING on top of ucounts Date: Wed, 7 Apr 2021 19:08:11 +0200 [thread overview] Message-ID: <7abe5ab608c61fc2363ba458bea21cf9a4a64588.1617814298.git.gladkov.alexey@gmail.com> (raw) In-Reply-To: <cover.1617814298.git.gladkov.alexey@gmail.com> The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. v10: * Fix memory leak on get_ucounts failure. Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com> --- fs/proc/array.c | 2 +- include/linux/sched/user.h | 1 - include/linux/signal_types.h | 4 ++- include/linux/user_namespace.h | 1 + kernel/fork.c | 1 + kernel/signal.c | 58 ++++++++++++++++------------------ kernel/ucount.c | 1 + kernel/user.c | 1 - kernel/user_namespace.c | 1 + 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index bb87e4d89cd8..74b0ea4b7e38 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) collect_sigign_sigcatch(p, &ignored, &caught); num_threads = get_nr_threads(p); rcu_read_lock(); /* FIXME: is this correct? */ - qsize = atomic_read(&__task_cred(p)->user->sigpending); + qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING); rcu_read_unlock(); qlim = task_rlimit(p, RLIMIT_SIGPENDING); unlock_task_sighand(p, &flags); diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 8a34446681aa..8ba9cec4fb99 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -12,7 +12,6 @@ */ struct user_struct { refcount_t __count; /* reference count */ - atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 68e06c75c5b2..34cb28b8f16c 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -13,6 +13,8 @@ typedef struct kernel_siginfo { __SIGINFO; } kernel_siginfo_t; +struct ucounts; + /* * Real Time signals may be queued. */ @@ -21,7 +23,7 @@ struct sigqueue { struct list_head list; int flags; kernel_siginfo_t info; - struct user_struct *user; + struct ucounts *ucounts; }; /* flags values. */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index d0fea0306394..6e8736c7aa29 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -52,6 +52,7 @@ enum ucount_type { #endif UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, + UCOUNT_RLIMIT_SIGPENDING, UCOUNT_COUNTS, }; diff --git a/kernel/fork.c b/kernel/fork.c index 85c6094f5a48..741f896c156e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -824,6 +824,7 @@ void __init fork_init(void) init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE); + init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING); #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", diff --git a/kernel/signal.c b/kernel/signal.c index f2a1b898da29..4e80386acec7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -413,49 +413,45 @@ void task_join_group_stop(struct task_struct *task) static struct sigqueue * __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) { - struct sigqueue *q = NULL; - struct user_struct *user; - int sigpending; + struct sigqueue *q = kmem_cache_alloc(sigqueue_cachep, flags); - /* - * Protect access to @t credentials. This can go away when all - * callers hold rcu read lock. - * - * NOTE! A pending signal will hold on to the user refcount, - * and we get/put the refcount only when the sigpending count - * changes from/to zero. - */ - rcu_read_lock(); - user = __task_cred(t)->user; - sigpending = atomic_inc_return(&user->sigpending); - if (sigpending == 1) - get_uid(user); - rcu_read_unlock(); + if (likely(q != NULL)) { + bool overlimit; - if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { - q = kmem_cache_alloc(sigqueue_cachep, flags); - } else { - print_dropped_signal(sig); - } - - if (unlikely(q == NULL)) { - if (atomic_dec_and_test(&user->sigpending)) - free_uid(user); - } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = user; + + /* + * Protect access to @t credentials. This can go away when all + * callers hold rcu read lock. + */ + rcu_read_lock(); + q->ucounts = get_ucounts(task_ucounts(t)); + if (q->ucounts) { + overlimit = inc_rlimit_ucounts_and_test(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, + 1, task_rlimit(t, RLIMIT_SIGPENDING)); + + if (override_rlimit || likely(!overlimit)) { + rcu_read_unlock(); + return q; + } + } + rcu_read_unlock(); + kmem_cache_free(sigqueue_cachep, q); } - return q; + print_dropped_signal(sig); + return NULL; } static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - if (atomic_dec_and_test(&q->user->sigpending)) - free_uid(q->user); + if (q->ucounts) { + dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); + put_ucounts(q->ucounts); + } kmem_cache_free(sigqueue_cachep, q); } diff --git a/kernel/ucount.c b/kernel/ucount.c index 3f1682b690e6..5c1381ff388a 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -80,6 +80,7 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), #endif + { }, { }, { }, { } diff --git a/kernel/user.c b/kernel/user.c index 7f5ff498207a..6737327f83be 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { .__count = REFCOUNT_INIT(1), - .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index cc90d5203acf..df1bed32dd48 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -123,6 +123,7 @@ int create_user_ns(struct cred *new) } ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE); + ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -- 2.29.3 _______________________________________________ Containers mailing list Containers@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/containers
WARNING: multiple messages have this Message-ID (diff)
From: Alexey Gladkov <gladkov.alexey@gmail.com> To: LKML <linux-kernel@vger.kernel.org>, Kernel Hardening <kernel-hardening@lists.openwall.com>, Linux Containers <containers@lists.linux-foundation.org>, linux-mm@kvack.org Cc: Alexey Gladkov <legion@kernel.org>, Andrew Morton <akpm@linux-foundation.org>, Christian Brauner <christian.brauner@ubuntu.com>, "Eric W . Biederman" <ebiederm@xmission.com>, Jann Horn <jannh@google.com>, Jens Axboe <axboe@kernel.dk>, Kees Cook <keescook@chromium.org>, Linus Torvalds <torvalds@linux-foundation.org>, Oleg Nesterov <oleg@redhat.com> Subject: [PATCH v10 6/9] Reimplement RLIMIT_SIGPENDING on top of ucounts Date: Wed, 7 Apr 2021 19:08:11 +0200 [thread overview] Message-ID: <7abe5ab608c61fc2363ba458bea21cf9a4a64588.1617814298.git.gladkov.alexey@gmail.com> (raw) In-Reply-To: <cover.1617814298.git.gladkov.alexey@gmail.com> The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. v10: * Fix memory leak on get_ucounts failure. Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com> --- fs/proc/array.c | 2 +- include/linux/sched/user.h | 1 - include/linux/signal_types.h | 4 ++- include/linux/user_namespace.h | 1 + kernel/fork.c | 1 + kernel/signal.c | 58 ++++++++++++++++------------------ kernel/ucount.c | 1 + kernel/user.c | 1 - kernel/user_namespace.c | 1 + 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index bb87e4d89cd8..74b0ea4b7e38 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) collect_sigign_sigcatch(p, &ignored, &caught); num_threads = get_nr_threads(p); rcu_read_lock(); /* FIXME: is this correct? */ - qsize = atomic_read(&__task_cred(p)->user->sigpending); + qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING); rcu_read_unlock(); qlim = task_rlimit(p, RLIMIT_SIGPENDING); unlock_task_sighand(p, &flags); diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 8a34446681aa..8ba9cec4fb99 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -12,7 +12,6 @@ */ struct user_struct { refcount_t __count; /* reference count */ - atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 68e06c75c5b2..34cb28b8f16c 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -13,6 +13,8 @@ typedef struct kernel_siginfo { __SIGINFO; } kernel_siginfo_t; +struct ucounts; + /* * Real Time signals may be queued. */ @@ -21,7 +23,7 @@ struct sigqueue { struct list_head list; int flags; kernel_siginfo_t info; - struct user_struct *user; + struct ucounts *ucounts; }; /* flags values. */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index d0fea0306394..6e8736c7aa29 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -52,6 +52,7 @@ enum ucount_type { #endif UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, + UCOUNT_RLIMIT_SIGPENDING, UCOUNT_COUNTS, }; diff --git a/kernel/fork.c b/kernel/fork.c index 85c6094f5a48..741f896c156e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -824,6 +824,7 @@ void __init fork_init(void) init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE); + init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING); #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", diff --git a/kernel/signal.c b/kernel/signal.c index f2a1b898da29..4e80386acec7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -413,49 +413,45 @@ void task_join_group_stop(struct task_struct *task) static struct sigqueue * __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) { - struct sigqueue *q = NULL; - struct user_struct *user; - int sigpending; + struct sigqueue *q = kmem_cache_alloc(sigqueue_cachep, flags); - /* - * Protect access to @t credentials. This can go away when all - * callers hold rcu read lock. - * - * NOTE! A pending signal will hold on to the user refcount, - * and we get/put the refcount only when the sigpending count - * changes from/to zero. - */ - rcu_read_lock(); - user = __task_cred(t)->user; - sigpending = atomic_inc_return(&user->sigpending); - if (sigpending == 1) - get_uid(user); - rcu_read_unlock(); + if (likely(q != NULL)) { + bool overlimit; - if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { - q = kmem_cache_alloc(sigqueue_cachep, flags); - } else { - print_dropped_signal(sig); - } - - if (unlikely(q == NULL)) { - if (atomic_dec_and_test(&user->sigpending)) - free_uid(user); - } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = user; + + /* + * Protect access to @t credentials. This can go away when all + * callers hold rcu read lock. + */ + rcu_read_lock(); + q->ucounts = get_ucounts(task_ucounts(t)); + if (q->ucounts) { + overlimit = inc_rlimit_ucounts_and_test(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, + 1, task_rlimit(t, RLIMIT_SIGPENDING)); + + if (override_rlimit || likely(!overlimit)) { + rcu_read_unlock(); + return q; + } + } + rcu_read_unlock(); + kmem_cache_free(sigqueue_cachep, q); } - return q; + print_dropped_signal(sig); + return NULL; } static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - if (atomic_dec_and_test(&q->user->sigpending)) - free_uid(q->user); + if (q->ucounts) { + dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); + put_ucounts(q->ucounts); + } kmem_cache_free(sigqueue_cachep, q); } diff --git a/kernel/ucount.c b/kernel/ucount.c index 3f1682b690e6..5c1381ff388a 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -80,6 +80,7 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), #endif + { }, { }, { }, { } diff --git a/kernel/user.c b/kernel/user.c index 7f5ff498207a..6737327f83be 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { .__count = REFCOUNT_INIT(1), - .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index cc90d5203acf..df1bed32dd48 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -123,6 +123,7 @@ int create_user_ns(struct cred *new) } ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE); + ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -- 2.29.3
next prev parent reply other threads:[~2021-04-07 17:08 UTC|newest] Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-04-07 17:08 [PATCH v10 0/9] Count rlimits in each user namespace Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` [PATCH v10 1/9] Increase size of ucounts to atomic_long_t Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` [PATCH v10 2/9] Add a reference to ucounts for each cred Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` [PATCH v10 3/9] Use atomic_t for ucounts reference counting Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` [PATCH v10 4/9] Reimplement RLIMIT_NPROC on top of ucounts Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` [PATCH v10 5/9] Reimplement RLIMIT_MSGQUEUE " Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov [this message] 2021-04-07 17:08 ` [PATCH v10 6/9] Reimplement RLIMIT_SIGPENDING " Alexey Gladkov 2021-04-08 8:30 ` 08ed4efad6: stress-ng.sigsegv.ops_per_sec -41.9% regression kernel test robot 2021-04-08 8:30 ` kernel test robot 2021-04-08 8:30 ` kernel test robot 2021-04-08 16:22 ` Linus Torvalds 2021-04-08 16:22 ` Linus Torvalds 2021-04-08 16:22 ` Linus Torvalds 2021-04-08 16:22 ` Linus Torvalds 2021-04-08 16:44 ` Alexey Gladkov 2021-04-08 16:44 ` Alexey Gladkov 2021-04-08 16:44 ` Alexey Gladkov 2021-04-08 18:44 ` Eric W. Biederman 2021-04-08 18:44 ` Eric W. Biederman 2021-04-08 18:44 ` Eric W. Biederman 2021-04-08 18:44 ` Eric W. Biederman 2021-04-16 11:33 ` Alexey Gladkov 2021-04-16 11:33 ` Alexey Gladkov 2021-04-16 11:33 ` Alexey Gladkov 2021-04-23 2:47 ` Oliver Sang 2021-04-23 2:47 ` Oliver Sang 2021-04-23 2:47 ` Oliver Sang 2021-04-23 7:44 ` Alexey Gladkov 2021-04-23 7:44 ` Alexey Gladkov 2021-04-23 7:44 ` Alexey Gladkov 2021-04-28 14:36 ` Oliver Sang 2021-04-28 14:36 ` Oliver Sang 2021-04-28 14:36 ` Oliver Sang 2021-04-28 15:09 ` Alexey Gladkov 2021-04-28 15:09 ` Alexey Gladkov 2021-04-28 15:09 ` Alexey Gladkov 2021-05-07 7:14 ` Oliver Sang 2021-05-07 7:14 ` Oliver Sang 2021-04-07 17:08 ` [PATCH v10 7/9] Reimplement RLIMIT_MEMLOCK on top of ucounts Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 21:37 ` kernel test robot 2021-04-07 21:37 ` kernel test robot 2021-04-07 21:37 ` kernel test robot 2021-04-07 17:08 ` [PATCH v10 8/9] kselftests: Add test to check for rlimit changes in different user namespaces Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov 2021-04-07 17:08 ` [PATCH v10 9/9] ucounts: Set ucount_max to the largest positive value the type can hold Alexey Gladkov 2021-04-07 17:08 ` Alexey Gladkov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=7abe5ab608c61fc2363ba458bea21cf9a4a64588.1617814298.git.gladkov.alexey@gmail.com \ --to=gladkov.alexey@gmail.com \ --cc=akpm@linux-foundation.org \ --cc=axboe@kernel.dk \ --cc=containers@lists.linux-foundation.org \ --cc=ebiederm@xmission.com \ --cc=jannh@google.com \ --cc=keescook@chromium.org \ --cc=kernel-hardening@lists.openwall.com \ --cc=legion@kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=oleg@redhat.com \ --cc=torvalds@linux-foundation.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.