* [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries @ 2020-01-14 16:57 Trond Myklebust 2020-02-06 16:33 ` J. Bruce Fields 0 siblings, 1 reply; 11+ messages in thread From: Trond Myklebust @ 2020-01-14 16:57 UTC (permalink / raw) To: J. Bruce Fields; +Cc: linux-nfs If the cache entry never gets initialised, we want the garbage collector to be able to evict it. Otherwise if the upcall daemon fails to initialise the entry, we end up never expiring it. Fixes: d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache as unexpired") Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> --- include/linux/sunrpc/cache.h | 3 --- net/sunrpc/cache.c | 36 +++++++++++++++++++----------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index f8603724fbee..0428dd23fd79 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -202,9 +202,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - if (!test_bit(CACHE_VALID, &h->flags)) - return false; - return (h->expiry_time < seconds_since_boot()) || (detail->flush_time >= h->last_refresh); } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 52d927210d32..99d630150af6 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -65,13 +65,14 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, rcu_read_lock(); hlist_for_each_entry_rcu(tmp, head, cache_list) { - if (detail->match(tmp, key)) { - if (cache_is_expired(detail, tmp)) - continue; - tmp = cache_get_rcu(tmp); - rcu_read_unlock(); - return tmp; - } + if (!detail->match(tmp, key)) + continue; + if (test_bit(CACHE_VALID, &tmp->flags) && + cache_is_expired(detail, tmp)) + continue; + tmp = cache_get_rcu(tmp); + rcu_read_unlock(); + return tmp; } rcu_read_unlock(); return NULL; @@ -114,17 +115,18 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, /* check if entry appeared while we slept */ hlist_for_each_entry_rcu(tmp, head, cache_list) { - if (detail->match(tmp, key)) { - if (cache_is_expired(detail, tmp)) { - sunrpc_begin_cache_remove_entry(tmp, detail); - freeme = tmp; - break; - } - cache_get(tmp); - spin_unlock(&detail->hash_lock); - cache_put(new, detail); - return tmp; + if (!detail->match(tmp, key)) + continue; + if (test_bit(CACHE_VALID, &tmp->flags) && + cache_is_expired(detail, tmp)) { + sunrpc_begin_cache_remove_entry(tmp, detail); + freeme = tmp; + break; } + cache_get(tmp); + spin_unlock(&detail->hash_lock); + cache_put(new, detail); + return tmp; } hlist_add_head_rcu(&new->cache_list, head); -- 2.24.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-01-14 16:57 [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries Trond Myklebust @ 2020-02-06 16:33 ` J. Bruce Fields 2020-02-07 14:25 ` Trond Myklebust 0 siblings, 1 reply; 11+ messages in thread From: J. Bruce Fields @ 2020-02-06 16:33 UTC (permalink / raw) To: Trond Myklebust; +Cc: J. Bruce Fields, linux-nfs On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote: > If the cache entry never gets initialised, we want the garbage > collector to be able to evict it. Otherwise if the upcall daemon > fails to initialise the entry, we end up never expiring it. Could you tell us more about what motivated this? It's causing failures on pynfs server-reboot tests. I haven't pinned down the cause yet, but it looks like it could be a regression to the behavior Kinglong Mee describes in detail in his original patch. Dropping for the time being. --b. > > Fixes: d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache as unexpired") > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > --- > include/linux/sunrpc/cache.h | 3 --- > net/sunrpc/cache.c | 36 +++++++++++++++++++----------------- > 2 files changed, 19 insertions(+), 20 deletions(-) > > diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h > index f8603724fbee..0428dd23fd79 100644 > --- a/include/linux/sunrpc/cache.h > +++ b/include/linux/sunrpc/cache.h > @@ -202,9 +202,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) > > static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) > { > - if (!test_bit(CACHE_VALID, &h->flags)) > - return false; > - > return (h->expiry_time < seconds_since_boot()) || > (detail->flush_time >= h->last_refresh); > } > diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c > index 52d927210d32..99d630150af6 100644 > --- a/net/sunrpc/cache.c > +++ b/net/sunrpc/cache.c > @@ -65,13 +65,14 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, > > rcu_read_lock(); > hlist_for_each_entry_rcu(tmp, head, cache_list) { > - if (detail->match(tmp, key)) { > - if (cache_is_expired(detail, tmp)) > - continue; > - tmp = cache_get_rcu(tmp); > - rcu_read_unlock(); > - return tmp; > - } > + if (!detail->match(tmp, key)) > + continue; > + if (test_bit(CACHE_VALID, &tmp->flags) && > + cache_is_expired(detail, tmp)) > + continue; > + tmp = cache_get_rcu(tmp); > + rcu_read_unlock(); > + return tmp; > } > rcu_read_unlock(); > return NULL; > @@ -114,17 +115,18 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, > > /* check if entry appeared while we slept */ > hlist_for_each_entry_rcu(tmp, head, cache_list) { > - if (detail->match(tmp, key)) { > - if (cache_is_expired(detail, tmp)) { > - sunrpc_begin_cache_remove_entry(tmp, detail); > - freeme = tmp; > - break; > - } > - cache_get(tmp); > - spin_unlock(&detail->hash_lock); > - cache_put(new, detail); > - return tmp; > + if (!detail->match(tmp, key)) > + continue; > + if (test_bit(CACHE_VALID, &tmp->flags) && > + cache_is_expired(detail, tmp)) { > + sunrpc_begin_cache_remove_entry(tmp, detail); > + freeme = tmp; > + break; > } > + cache_get(tmp); > + spin_unlock(&detail->hash_lock); > + cache_put(new, detail); > + return tmp; > } > > hlist_add_head_rcu(&new->cache_list, head); > -- > 2.24.1 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-02-06 16:33 ` J. Bruce Fields @ 2020-02-07 14:25 ` Trond Myklebust 2020-02-07 18:18 ` bfields 0 siblings, 1 reply; 11+ messages in thread From: Trond Myklebust @ 2020-02-07 14:25 UTC (permalink / raw) To: bfields; +Cc: linux-nfs, bfields On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote: > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote: > > If the cache entry never gets initialised, we want the garbage > > collector to be able to evict it. Otherwise if the upcall daemon > > fails to initialise the entry, we end up never expiring it. > > Could you tell us more about what motivated this? > > It's causing failures on pynfs server-reboot tests. I haven't pinned > down the cause yet, but it looks like it could be a regression to the > behavior Kinglong Mee describes in detail in his original patch. > Can you point me to the tests that are failing? The motivation here is to allow the garbage collector to do its job of evicting cache entries after they are supposed to have timed out. The fact that uninitialised cache entries are given an infinite lifetime, and are never evicted is a de facto memory leak if, for instance, the mountd daemon ignores the cache request, or the downcall in expkey_parse() or svc_export_parse() fails without being able to update the request. The threads that are waiting for the cache replies already have a mechanism for dealing with timeouts (with cache_wait_req() and deferred requests), so the question is what is so special about uninitialised requests that we have to leak them in order to avoid a problem with reboot? > Dropping for the time being. > > --b. > > > Fixes: d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache > > as unexpired") > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > > --- > > include/linux/sunrpc/cache.h | 3 --- > > net/sunrpc/cache.c | 36 +++++++++++++++++++------------- > > ---- > > 2 files changed, 19 insertions(+), 20 deletions(-) > > > > diff --git a/include/linux/sunrpc/cache.h > > b/include/linux/sunrpc/cache.h > > index f8603724fbee..0428dd23fd79 100644 > > --- a/include/linux/sunrpc/cache.h > > +++ b/include/linux/sunrpc/cache.h > > @@ -202,9 +202,6 @@ static inline void cache_put(struct cache_head > > *h, struct cache_detail *cd) > > > > static inline bool cache_is_expired(struct cache_detail *detail, > > struct cache_head *h) > > { > > - if (!test_bit(CACHE_VALID, &h->flags)) > > - return false; > > - > > return (h->expiry_time < seconds_since_boot()) || > > (detail->flush_time >= h->last_refresh); > > } > > diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c > > index 52d927210d32..99d630150af6 100644 > > --- a/net/sunrpc/cache.c > > +++ b/net/sunrpc/cache.c > > @@ -65,13 +65,14 @@ static struct cache_head > > *sunrpc_cache_find_rcu(struct cache_detail *detail, > > > > rcu_read_lock(); > > hlist_for_each_entry_rcu(tmp, head, cache_list) { > > - if (detail->match(tmp, key)) { > > - if (cache_is_expired(detail, tmp)) > > - continue; > > - tmp = cache_get_rcu(tmp); > > - rcu_read_unlock(); > > - return tmp; > > - } > > + if (!detail->match(tmp, key)) > > + continue; > > + if (test_bit(CACHE_VALID, &tmp->flags) && > > + cache_is_expired(detail, tmp)) > > + continue; > > + tmp = cache_get_rcu(tmp); > > + rcu_read_unlock(); > > + return tmp; > > } > > rcu_read_unlock(); > > return NULL; > > @@ -114,17 +115,18 @@ static struct cache_head > > *sunrpc_cache_add_entry(struct cache_detail *detail, > > > > /* check if entry appeared while we slept */ > > hlist_for_each_entry_rcu(tmp, head, cache_list) { > > - if (detail->match(tmp, key)) { > > - if (cache_is_expired(detail, tmp)) { > > - sunrpc_begin_cache_remove_entry(tmp, > > detail); > > - freeme = tmp; > > - break; > > - } > > - cache_get(tmp); > > - spin_unlock(&detail->hash_lock); > > - cache_put(new, detail); > > - return tmp; > > + if (!detail->match(tmp, key)) > > + continue; > > + if (test_bit(CACHE_VALID, &tmp->flags) && > > + cache_is_expired(detail, tmp)) { > > + sunrpc_begin_cache_remove_entry(tmp, detail); > > + freeme = tmp; > > + break; > > } > > + cache_get(tmp); > > + spin_unlock(&detail->hash_lock); > > + cache_put(new, detail); > > + return tmp; > > } > > > > hlist_add_head_rcu(&new->cache_list, head); > > -- > > 2.24.1 -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@hammerspace.com ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-02-07 14:25 ` Trond Myklebust @ 2020-02-07 18:18 ` bfields 2020-02-10 18:47 ` Trond Myklebust 2020-03-26 20:40 ` bfields 0 siblings, 2 replies; 11+ messages in thread From: bfields @ 2020-02-07 18:18 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-nfs, bfields On Fri, Feb 07, 2020 at 02:25:27PM +0000, Trond Myklebust wrote: > On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote: > > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote: > > > If the cache entry never gets initialised, we want the garbage > > > collector to be able to evict it. Otherwise if the upcall daemon > > > fails to initialise the entry, we end up never expiring it. > > > > Could you tell us more about what motivated this? > > > > It's causing failures on pynfs server-reboot tests. I haven't pinned > > down the cause yet, but it looks like it could be a regression to the > > behavior Kinglong Mee describes in detail in his original patch. > > > > Can you point me to the tests that are failing? I'm basically doing ./nfs4.1/testserver.py myserver:/path reboot --serverhelper=examples/server_helper.sh --serverhelperarg=myserver For all I know at this point, the change could be exposing a pynfs-side bug. > The motivation here is to allow the garbage collector to do its job of > evicting cache entries after they are supposed to have timed out. Understood. I was curious whether this was found by code inspection or because you'd run across a case where the leak was causing a practical problem. --b. > The fact that uninitialised cache entries are given an infinite > lifetime, and are never evicted is a de facto memory leak if, for > instance, the mountd daemon ignores the cache request, or the downcall > in expkey_parse() or svc_export_parse() fails without being able to > update the request. > > The threads that are waiting for the cache replies already have a > mechanism for dealing with timeouts (with cache_wait_req() and > deferred requests), so the question is what is so special about > uninitialised requests that we have to leak them in order to avoid a > problem with reboot? ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-02-07 18:18 ` bfields @ 2020-02-10 18:47 ` Trond Myklebust 2020-03-26 20:40 ` bfields 1 sibling, 0 replies; 11+ messages in thread From: Trond Myklebust @ 2020-02-10 18:47 UTC (permalink / raw) To: bfields; +Cc: linux-nfs, bfields On Fri, 2020-02-07 at 13:18 -0500, bfields@fieldses.org wrote: > On Fri, Feb 07, 2020 at 02:25:27PM +0000, Trond Myklebust wrote: > > On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote: > > > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote: > > > > If the cache entry never gets initialised, we want the garbage > > > > collector to be able to evict it. Otherwise if the upcall > > > > daemon > > > > fails to initialise the entry, we end up never expiring it. > > > > > > Could you tell us more about what motivated this? > > > > > > It's causing failures on pynfs server-reboot tests. I haven't > > > pinned > > > down the cause yet, but it looks like it could be a regression to > > > the > > > behavior Kinglong Mee describes in detail in his original patch. > > > > > > > Can you point me to the tests that are failing? > > I'm basically doing > > ./nfs4.1/testserver.py myserver:/path reboot > --serverhelper=examples/server_helper.sh > --serverhelperarg=myserver > > For all I know at this point, the change could be exposing a pynfs- > side > bug. > > > The motivation here is to allow the garbage collector to do its job > > of > > evicting cache entries after they are supposed to have timed out. > > Understood. I was curious whether this was found by code inspection > or > because you'd run across a case where the leak was causing a > practical > problem. I was seeing hangs in some cases where the path lookup was temporarily failing in the export path downcall. In that case, the mount daemon just ignores the returned error, and leaves it up to the kernel to fix what is happening. The problem is that since the garbage collector is currently told to ignore stuff if CACHE_VALID is not set, and since cache_check() won't do a second upcall, then any RPC call trying to access data on the path that was affected just hangs and eventually times out. Nothing prompts for this condition to be fixed. > > --b. > > > The fact that uninitialised cache entries are given an infinite > > lifetime, and are never evicted is a de facto memory leak if, for > > instance, the mountd daemon ignores the cache request, or the > > downcall > > in expkey_parse() or svc_export_parse() fails without being able to > > update the request. > > > > The threads that are waiting for the cache replies already have a > > mechanism for dealing with timeouts (with cache_wait_req() and > > deferred requests), so the question is what is so special about > > uninitialised requests that we have to leak them in order to avoid > > a > > problem with reboot? Trond Myklebust CTO, Hammerspace Inc 4300 El Camino Real, Suite 105 Los Altos, CA 94022 www.hammer.space -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@hammerspace.com ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-02-07 18:18 ` bfields 2020-02-10 18:47 ` Trond Myklebust @ 2020-03-26 20:40 ` bfields 2020-03-26 21:42 ` Trond Myklebust 1 sibling, 1 reply; 11+ messages in thread From: bfields @ 2020-03-26 20:40 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-nfs, bfields, Kinglong Mee Sorry, just getting back to this: On Fri, Feb 07, 2020 at 01:18:17PM -0500, bfields@fieldses.org wrote: > On Fri, Feb 07, 2020 at 02:25:27PM +0000, Trond Myklebust wrote: > > On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote: > > > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote: > > > > If the cache entry never gets initialised, we want the garbage > > > > collector to be able to evict it. Otherwise if the upcall daemon > > > > fails to initialise the entry, we end up never expiring it. > > > > > > Could you tell us more about what motivated this? > > > > > > It's causing failures on pynfs server-reboot tests. I haven't pinned > > > down the cause yet, but it looks like it could be a regression to the > > > behavior Kinglong Mee describes in detail in his original patch. > > > > > > > Can you point me to the tests that are failing? > > I'm basically doing > > ./nfs4.1/testserver.py myserver:/path reboot > --serverhelper=examples/server_helper.sh > --serverhelperarg=myserver > > For all I know at this point, the change could be exposing a pynfs-side > bug. From a trace, it's clear that the server is actually becoming unresponsive, so it's not a pynfs bug. > > The motivation here is to allow the garbage collector to do its job of > > evicting cache entries after they are supposed to have timed out. > > Understood. I was curious whether this was found by code inspection or > because you'd run across a case where the leak was causing a practical > problem. I'm still curious. > > The fact that uninitialised cache entries are given an infinite > > lifetime, and are never evicted is a de facto memory leak if, for > > instance, the mountd daemon ignores the cache request, or the downcall > > in expkey_parse() or svc_export_parse() fails without being able to > > update the request. If mountd ignores cache requests, or downcalls fail, then the server's broken anyway. The server can't do anything without mountd. > > The threads that are waiting for the cache replies already have a > > mechanism for dealing with timeouts (with cache_wait_req() and > > deferred requests), so the question is what is so special about > > uninitialised requests that we have to leak them in order to avoid a > > problem with reboot? I'm not sure I have this right yet. I'm just staring at the code and at Kinglong Mee's description on d6fc8821c2d2. I think the way it works is that a cash flush from mountd results in all cache entries (including invalid entries that nfsd threads are waiting on) being considered expired. So cache_check() returns an immediate ETIMEDOUT without waiting. Maybe the cache_is_expired() logic should be something more like: if (h->expiry_time < seconds_since_boot()) return true; if (!test_bit(CACHE_VALID, &h->flags)) return false; return h->expiry_time < seconds_since_boot(); So invalid cache entries (which are waiting for a reply from mountd) can expire, but they can't be flushed. If that makes sense. As a stopgap we may want to revert or drop the "Allow garbage collection" patch, as the (preexisting) memory leak seems lower impact than the server hang. --b. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-03-26 20:40 ` bfields @ 2020-03-26 21:42 ` Trond Myklebust 2020-03-27 1:50 ` J. Bruce Fields 0 siblings, 1 reply; 11+ messages in thread From: Trond Myklebust @ 2020-03-26 21:42 UTC (permalink / raw) To: bfields; +Cc: kinglongmee, linux-nfs, bfields Hi Bruce, On Thu, 2020-03-26 at 16:40 -0400, bfields@fieldses.org wrote: > Sorry, just getting back to this: > > On Fri, Feb 07, 2020 at 01:18:17PM -0500, bfields@fieldses.org wrote: > > On Fri, Feb 07, 2020 at 02:25:27PM +0000, Trond Myklebust wrote: > > > On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote: > > > > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust > > > > wrote: > > > > > If the cache entry never gets initialised, we want the > > > > > garbage > > > > > collector to be able to evict it. Otherwise if the upcall > > > > > daemon > > > > > fails to initialise the entry, we end up never expiring it. > > > > > > > > Could you tell us more about what motivated this? > > > > > > > > It's causing failures on pynfs server-reboot tests. I haven't > > > > pinned > > > > down the cause yet, but it looks like it could be a regression > > > > to the > > > > behavior Kinglong Mee describes in detail in his original > > > > patch. > > > > > > > > > > Can you point me to the tests that are failing? > > > > I'm basically doing > > > > ./nfs4.1/testserver.py myserver:/path reboot > > --serverhelper=examples/server_helper.sh > > --serverhelperarg=myserver > > > > For all I know at this point, the change could be exposing a pynfs- > > side > > bug. > > From a trace, it's clear that the server is actually becoming > unresponsive, so it's not a pynfs bug. > > > > The motivation here is to allow the garbage collector to do its > > > job of > > > evicting cache entries after they are supposed to have timed out. > > > > Understood. I was curious whether this was found by code > > inspection or > > because you'd run across a case where the leak was causing a > > practical > > problem. > > I'm still curious. > > > > The fact that uninitialised cache entries are given an infinite > > > lifetime, and are never evicted is a de facto memory leak if, for > > > instance, the mountd daemon ignores the cache request, or the > > > downcall > > > in expkey_parse() or svc_export_parse() fails without being able > > > to > > > update the request. > > If mountd ignores cache requests, or downcalls fail, then the > server's > broken anyway. The server can't do anything without mountd. > > > > The threads that are waiting for the cache replies already have a > > > mechanism for dealing with timeouts (with cache_wait_req() and > > > deferred requests), so the question is what is so special about > > > uninitialised requests that we have to leak them in order to > > > avoid a > > > problem with reboot? > > I'm not sure I have this right yet. I'm just staring at the code and > at > Kinglong Mee's description on d6fc8821c2d2. I think the way it works > is > that a cash flush from mountd results in all cache entries (including > invalid entries that nfsd threads are waiting on) being considered > expired. So cache_check() returns an immediate ETIMEDOUT without > waiting. > > Maybe the cache_is_expired() logic should be something more like: > > if (h->expiry_time < seconds_since_boot()) > return true; > if (!test_bit(CACHE_VALID, &h->flags)) > return false; > return h->expiry_time < seconds_since_boot(); > > So invalid cache entries (which are waiting for a reply from mountd) > can > expire, but they can't be flushed. If that makes sense. > > As a stopgap we may want to revert or drop the "Allow garbage > collection" patch, as the (preexisting) memory leak seems lower > impact > than the server hang. > I believe you were probably seeing the effect of the cache_listeners_exist() test, which is just wrong for all cache upcall users except idmapper and svcauth_gss. We should not be creating negative cache entries just because the rpc.mountd daemon happens to be slow to connect to the upcall pipes when starting up, or because it crashes and fails to restart correctly. That's why, when I resubmitted this patch, I included https://git.linux-nfs.org/?p=cel/cel-2.6.git;a=commitdiff;h=b840228cd6096bebe16b3e4eb5d93597d0e02c6d which turns off that particular test for all the upcalls to rpc.mountd. Note that the patch series I sent includes a bunch of kernel tracepoints that we can enable to help debugging if we see anything similar happening in the future. Cheers Trond -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@hammerspace.com ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-03-26 21:42 ` Trond Myklebust @ 2020-03-27 1:50 ` J. Bruce Fields 2020-03-27 12:33 ` Trond Myklebust 0 siblings, 1 reply; 11+ messages in thread From: J. Bruce Fields @ 2020-03-27 1:50 UTC (permalink / raw) To: Trond Myklebust; +Cc: bfields, kinglongmee, linux-nfs On Thu, Mar 26, 2020 at 09:42:19PM +0000, Trond Myklebust wrote: > On Thu, 2020-03-26 at 16:40 -0400, bfields@fieldses.org wrote: > > Maybe the cache_is_expired() logic should be something more like: > > > > if (h->expiry_time < seconds_since_boot()) > > return true; > > if (!test_bit(CACHE_VALID, &h->flags)) > > return false; > > return h->expiry_time < seconds_since_boot(); > > > > So invalid cache entries (which are waiting for a reply from mountd) > > can > > expire, but they can't be flushed. If that makes sense. > > > > As a stopgap we may want to revert or drop the "Allow garbage > > collection" patch, as the (preexisting) memory leak seems lower > > impact > > than the server hang. > > I believe you were probably seeing the effect of the > cache_listeners_exist() test, which is just wrong for all cache upcall > users except idmapper and svcauth_gss. We should not be creating > negative cache entries just because the rpc.mountd daemon happens to be > slow to connect to the upcall pipes when starting up, or because it > crashes and fails to restart correctly. > > That's why, when I resubmitted this patch, I included > https://git.linux-nfs.org/?p=cel/cel-2.6.git;a=commitdiff;h=b840228cd6096bebe16b3e4eb5d93597d0e02c6d > > which turns off that particular test for all the upcalls to rpc.mountd. The hangs persist with that patch, but go away with the change to the cache_is_expired() logic above. --b. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries 2020-03-27 1:50 ` J. Bruce Fields @ 2020-03-27 12:33 ` Trond Myklebust 2020-03-27 15:53 ` [PATCH] SUNRPC/cache: don't allow invalid entries to be flushed J. Bruce Fields 0 siblings, 1 reply; 11+ messages in thread From: Trond Myklebust @ 2020-03-27 12:33 UTC (permalink / raw) To: bfields; +Cc: bfields, kinglongmee, linux-nfs On Thu, 2020-03-26 at 21:50 -0400, J. Bruce Fields wrote: > On Thu, Mar 26, 2020 at 09:42:19PM +0000, Trond Myklebust wrote: > > On Thu, 2020-03-26 at 16:40 -0400, bfields@fieldses.org wrote: > > > Maybe the cache_is_expired() logic should be something more like: > > > > > > if (h->expiry_time < seconds_since_boot()) > > > return true; > > > if (!test_bit(CACHE_VALID, &h->flags)) > > > return false; > > > return h->expiry_time < seconds_since_boot(); Did you mean return detail->flush_time >= h->last_refresh; instead of repeating the h->expiry_time check? > > > > > > So invalid cache entries (which are waiting for a reply from > > > mountd) > > > can > > > expire, but they can't be flushed. If that makes sense. > > > > > > As a stopgap we may want to revert or drop the "Allow garbage > > > collection" patch, as the (preexisting) memory leak seems lower > > > impact > > > than the server hang. > > > > I believe you were probably seeing the effect of the > > cache_listeners_exist() test, which is just wrong for all cache > > upcall > > users except idmapper and svcauth_gss. We should not be creating > > negative cache entries just because the rpc.mountd daemon happens > > to be > > slow to connect to the upcall pipes when starting up, or because it > > crashes and fails to restart correctly. > > > > That's why, when I resubmitted this patch, I included > > https://git.linux-nfs.org/?p=cel/cel-2.6.git;a=commitdiff;h=b840228cd6096bebe16b3e4eb5d93597d0e02c6d > > > > which turns off that particular test for all the upcalls to > > rpc.mountd. > > The hangs persist with that patch, but go away with the change to the > cache_is_expired() logic above. Fair enough. Do you want to send Chuck a fix? -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@hammerspace.com ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] SUNRPC/cache: don't allow invalid entries to be flushed 2020-03-27 12:33 ` Trond Myklebust @ 2020-03-27 15:53 ` J. Bruce Fields 2020-03-27 16:15 ` Chuck Lever 0 siblings, 1 reply; 11+ messages in thread From: J. Bruce Fields @ 2020-03-27 15:53 UTC (permalink / raw) To: Chuck Lever; +Cc: Trond Myklebust, bfields, kinglongmee, linux-nfs From: "J. Bruce Fields" <bfields@redhat.com> Trond points out in 277f27e2f277 that we allow invalid cache entries to persist indefinitely. That fix, however, reintroduces the problem fixed by Kinglong Mee's d6fc8821c2d2 "SUNRPC/Cache: Always treat the invalid cache as unexpired", where an invalid cache entry is immediately removed by a flush before mountd responds to it. The result is that the server thread that should be waiting for mountd to fill in that entry instead gets an -ETIMEDOUT return from cache_check(). Symptoms are the server becoming unresponsive after a restart, reproduceable by running pynfs 4.1 test REBT5. Instead, take a compromise approach: allow invalid cache entries to be removed after they expire, but not to be removed by a cache flush. Fixes: 277f27e2f277 "SUNRPC/cache: Allow garbage collection..." Signed-off-by: J. Bruce Fields <bfields@redhat.com> --- include/linux/sunrpc/cache.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 532cdbda43da..10891b70fc7b 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -209,8 +209,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time >= h->last_refresh); + if (h->expiry_time < seconds_since_boot()) + return true; + if (!test_bit(CACHE_VALID, &h->flags)) + return false; + return detail->flush_time >= h->last_refresh; } extern int cache_check(struct cache_detail *detail, -- 2.25.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] SUNRPC/cache: don't allow invalid entries to be flushed 2020-03-27 15:53 ` [PATCH] SUNRPC/cache: don't allow invalid entries to be flushed J. Bruce Fields @ 2020-03-27 16:15 ` Chuck Lever 0 siblings, 0 replies; 11+ messages in thread From: Chuck Lever @ 2020-03-27 16:15 UTC (permalink / raw) To: Bruce Fields Cc: Trond Myklebust, Bruce Fields, kinglongmee, Linux NFS Mailing List > On Mar 27, 2020, at 11:53 AM, J. Bruce Fields <bfields@redhat.com> wrote: > > From: "J. Bruce Fields" <bfields@redhat.com> > > Trond points out in 277f27e2f277 that we allow invalid cache entries to > persist indefinitely. That fix, however, reintroduces the problem fixed > by Kinglong Mee's d6fc8821c2d2 "SUNRPC/Cache: Always treat the invalid > cache as unexpired", where an invalid cache entry is immediately removed > by a flush before mountd responds to it. The result is that the server > thread that should be waiting for mountd to fill in that entry instead > gets an -ETIMEDOUT return from cache_check(). Symptoms are the server > becoming unresponsive after a restart, reproduceable by running pynfs > 4.1 test REBT5. > > Instead, take a compromise approach: allow invalid cache entries to be > removed after they expire, but not to be removed by a cache flush. > > Fixes: 277f27e2f277 "SUNRPC/cache: Allow garbage collection..." > Signed-off-by: J. Bruce Fields <bfields@redhat.com> Thanks, Bruce. checkpatch.pl is complaining loudly about the style of the short commit descriptions. I'll fix those up before applying it to nfsd-5.7. > --- > include/linux/sunrpc/cache.h | 7 +++++-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h > index 532cdbda43da..10891b70fc7b 100644 > --- a/include/linux/sunrpc/cache.h > +++ b/include/linux/sunrpc/cache.h > @@ -209,8 +209,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) > > static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) > { > - return (h->expiry_time < seconds_since_boot()) || > - (detail->flush_time >= h->last_refresh); > + if (h->expiry_time < seconds_since_boot()) > + return true; > + if (!test_bit(CACHE_VALID, &h->flags)) > + return false; > + return detail->flush_time >= h->last_refresh; > } > > extern int cache_check(struct cache_detail *detail, > -- > 2.25.1 > -- Chuck Lever ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2020-03-27 16:17 UTC | newest] Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-01-14 16:57 [PATCH] SUNRPC/cache: Allow garbage collection of invalid cache entries Trond Myklebust 2020-02-06 16:33 ` J. Bruce Fields 2020-02-07 14:25 ` Trond Myklebust 2020-02-07 18:18 ` bfields 2020-02-10 18:47 ` Trond Myklebust 2020-03-26 20:40 ` bfields 2020-03-26 21:42 ` Trond Myklebust 2020-03-27 1:50 ` J. Bruce Fields 2020-03-27 12:33 ` Trond Myklebust 2020-03-27 15:53 ` [PATCH] SUNRPC/cache: don't allow invalid entries to be flushed J. Bruce Fields 2020-03-27 16:15 ` Chuck Lever
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).