* + fs-epoll-use-a-per-cpu-counter-for-users-watches-count.patch added to -mm tree
@ 2021-08-02 21:09 akpm
0 siblings, 0 replies; only message in thread
From: akpm @ 2021-08-02 21:09 UTC (permalink / raw)
To: mm-commits, viro, anton, npiggin
The patch titled
Subject: fs/epoll: use a per-cpu counter for user's watches count
has been added to the -mm tree. Its filename is
fs-epoll-use-a-per-cpu-counter-for-users-watches-count.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/fs-epoll-use-a-per-cpu-counter-for-users-watches-count.patch
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/fs-epoll-use-a-per-cpu-counter-for-users-watches-count.patch
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Nicholas Piggin <npiggin@gmail.com>
Subject: fs/epoll: use a per-cpu counter for user's watches count
This counter tracks the number of watches a user has, to compare against
the 'max_user_watches' limit. This causes a scalability bottleneck on
SPECjbb2015 on large systems as there is only one user. Changing to a
per-cpu counter increases throughput of the benchmark by about 30% on a
16-socket, > 1000 thread system.
Link: https://lkml.kernel.org/r/20210802032013.2751916-1-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reported-by: Anton Blanchard <anton@ozlabs.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/eventpoll.c | 18 ++++++++++--------
include/linux/sched/user.h | 3 ++-
kernel/user.c | 9 +++++++++
3 files changed, 21 insertions(+), 9 deletions(-)
--- a/fs/eventpoll.c~fs-epoll-use-a-per-cpu-counter-for-users-watches-count
+++ a/fs/eventpoll.c
@@ -723,7 +723,7 @@ static int ep_remove(struct eventpoll *e
*/
call_rcu(&epi->rcu, epi_rcu_free);
- atomic_long_dec(&ep->user->epoll_watches);
+ percpu_counter_dec(&ep->user->epoll_watches);
return 0;
}
@@ -1439,7 +1439,6 @@ static int ep_insert(struct eventpoll *e
{
int error, pwake = 0;
__poll_t revents;
- long user_watches;
struct epitem *epi;
struct ep_pqueue epq;
struct eventpoll *tep = NULL;
@@ -1449,11 +1448,15 @@ static int ep_insert(struct eventpoll *e
lockdep_assert_irqs_enabled();
- user_watches = atomic_long_read(&ep->user->epoll_watches);
- if (unlikely(user_watches >= max_user_watches))
+ if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
+ max_user_watches) >= 0))
return -ENOSPC;
- if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL)))
+ percpu_counter_inc(&ep->user->epoll_watches);
+
+ if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) {
+ percpu_counter_dec(&ep->user->epoll_watches);
return -ENOMEM;
+ }
/* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
@@ -1466,17 +1469,16 @@ static int ep_insert(struct eventpoll *e
mutex_lock_nested(&tep->mtx, 1);
/* Add the current item to the list of active epoll hook for this file */
if (unlikely(attach_epitem(tfile, epi) < 0)) {
- kmem_cache_free(epi_cache, epi);
if (tep)
mutex_unlock(&tep->mtx);
+ kmem_cache_free(epi_cache, epi);
+ percpu_counter_dec(&ep->user->epoll_watches);
return -ENOMEM;
}
if (full_check && !tep)
list_file(tfile);
- atomic_long_inc(&ep->user->epoll_watches);
-
/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.
--- a/include/linux/sched/user.h~fs-epoll-use-a-per-cpu-counter-for-users-watches-count
+++ a/include/linux/sched/user.h
@@ -4,6 +4,7 @@
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/percpu_counter.h>
#include <linux/refcount.h>
#include <linux/ratelimit.h>
@@ -13,7 +14,7 @@
struct user_struct {
refcount_t __count; /* reference count */
#ifdef CONFIG_EPOLL
- atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+ struct percpu_counter epoll_watches; /* The number of file descriptors currently watched */
#endif
unsigned long unix_inflight; /* How many files in flight in unix sockets */
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
--- a/kernel/user.c~fs-epoll-use-a-per-cpu-counter-for-users-watches-count
+++ a/kernel/user.c
@@ -138,6 +138,7 @@ static void free_user(struct user_struct
{
uid_hash_remove(up);
spin_unlock_irqrestore(&uidhash_lock, flags);
+ percpu_counter_destroy(&up->epoll_watches);
kmem_cache_free(uid_cachep, up);
}
@@ -185,6 +186,10 @@ struct user_struct *alloc_uid(kuid_t uid
new->uid = uid;
refcount_set(&new->__count, 1);
+ if (percpu_counter_init(&new->epoll_watches, 0, GFP_KERNEL)) {
+ kmem_cache_free(uid_cachep, new);
+ return NULL;
+ }
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
@@ -195,6 +200,7 @@ struct user_struct *alloc_uid(kuid_t uid
spin_lock_irq(&uidhash_lock);
up = uid_hash_find(uid, hashent);
if (up) {
+ percpu_counter_destroy(&new->epoll_watches);
kmem_cache_free(uid_cachep, new);
} else {
uid_hash_insert(new, hashent);
@@ -216,6 +222,9 @@ static int __init uid_cache_init(void)
for(n = 0; n < UIDHASH_SZ; ++n)
INIT_HLIST_HEAD(uidhash_table + n);
+ if (percpu_counter_init(&root_user.epoll_watches, 0, GFP_KERNEL))
+ panic("percpu cpunter alloc failed");
+
/* Insert the root user immediately (init already runs as root) */
spin_lock_irq(&uidhash_lock);
uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
_
Patches currently in -mm which might be from npiggin@gmail.com are
lazy-tlb-introduce-lazy-mm-refcount-helper-functions.patch
lazy-tlb-introduce-lazy-mm-refcount-helper-functions-fix.patch
lazy-tlb-allow-lazy-tlb-mm-refcounting-to-be-configurable.patch
lazy-tlb-allow-lazy-tlb-mm-refcounting-to-be-configurable-fix-2.patch
lazy-tlb-shoot-lazies-a-non-refcounting-lazy-tlb-option.patch
lazy-tlb-shoot-lazies-a-non-refcounting-lazy-tlb-option-fix.patch
powerpc-64s-enable-mmu_lazy_tlb_shootdown.patch
fs-epoll-use-a-per-cpu-counter-for-users-watches-count.patch
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-08-02 21:09 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-02 21:09 + fs-epoll-use-a-per-cpu-counter-for-users-watches-count.patch added to -mm tree akpm
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.