All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] SUNRPC: Cache timeout injection
@ 2022-03-31 20:38 Chuck Lever
  2022-04-01 20:36 ` Chuck Lever III
  0 siblings, 1 reply; 2+ messages in thread
From: Chuck Lever @ 2022-03-31 20:38 UTC (permalink / raw)
  To: linux-nfs

Cache timeout injection stress-tests the cache timeout logic as well
as upper layer protocol deferred request handlers.

A file called /sys/kernel/debug/fail_sunrpc/ignore-cache-timeout
enables administrators to turn off cache timeout injection while
allowing other types of sunrpc errors to be injected. The default
setting is that cache timeout injection is enabled (ignore=false).

To enable cache timeout injection, CONFIG_FAULT_INJECTION,
CONFIG_FAULT_INJECTION_DEBUG_FS, and CONFIG_SUNRPC_DEBUG must all be
set to "Y".

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/cache.c   |   16 ++++++++++++++++
 net/sunrpc/debugfs.c |    3 +++
 net/sunrpc/fail.h    |    2 +-
 3 files changed, 20 insertions(+), 1 deletion(-)


Proof of concept: compile-tested only. The idea is to inject timeout
failures in the cache code so we can see what happens when a rqst
actually has to be deferred.


diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index bb1177395b99..e5ec125afec9 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,7 +33,9 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <trace/events/sunrpc.h>
+
 #include "netns.h"
+#include "fail.h"
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -629,6 +631,19 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
 	complete(&dr->completion);
 }
 
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static inline bool cache_timeout_should_fail(void)
+{
+	return !fail_sunrpc.ignore_cache_timeout &&
+		should_fail(&fail_sunrpc.attr, 1);
+}
+#else
+static inline bool cache_timeout_should_fail(void)
+{
+	return false;
+}
+#endif
+
 static void cache_wait_req(struct cache_req *req, struct cache_head *item)
 {
 	struct thread_deferred_req sleeper;
@@ -640,6 +655,7 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item)
 	setup_deferral(dreq, item, 0);
 
 	if (!test_bit(CACHE_PENDING, &item->flags) ||
+	    cache_timeout_should_fail() ||
 	    wait_for_completion_interruptible_timeout(
 		    &sleeper.completion, req->thread_wait) <= 0) {
 		/* The completion wasn't completed, so we need
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 7dc9cc929bfd..68272885873a 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
 
 	debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
 			    &fail_sunrpc.ignore_server_disconnect);
+
+	debugfs_create_bool("ignore-cache-timeout", S_IFREG | 0600, dir,
+			    &fail_sunrpc.ignore_cache_timeout);
 }
 #else
 static void fail_sunrpc_init(void)
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
index 69dc30cc44b8..13b8436b5f15 100644
--- a/net/sunrpc/fail.h
+++ b/net/sunrpc/fail.h
@@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
 	struct fault_attr	attr;
 
 	bool			ignore_client_disconnect;
-
 	bool			ignore_server_disconnect;
+	bool			ignore_cache_timeout;
 };
 
 extern struct fail_sunrpc_attr fail_sunrpc;



^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH RFC] SUNRPC: Cache timeout injection
  2022-03-31 20:38 [PATCH RFC] SUNRPC: Cache timeout injection Chuck Lever
@ 2022-04-01 20:36 ` Chuck Lever III
  0 siblings, 0 replies; 2+ messages in thread
From: Chuck Lever III @ 2022-04-01 20:36 UTC (permalink / raw)
  To: Linux NFS Mailing List; +Cc: Trond Myklebust


> On Mar 31, 2022, at 4:38 PM, Chuck Lever <chuck.lever@oracle.com> wrote:
> 
> Cache timeout injection stress-tests the cache timeout logic as well
> as upper layer protocol deferred request handlers.
> 
> A file called /sys/kernel/debug/fail_sunrpc/ignore-cache-timeout
> enables administrators to turn off cache timeout injection while
> allowing other types of sunrpc errors to be injected. The default
> setting is that cache timeout injection is enabled (ignore=false).
> 
> To enable cache timeout injection, CONFIG_FAULT_INJECTION,
> CONFIG_FAULT_INJECTION_DEBUG_FS, and CONFIG_SUNRPC_DEBUG must all be
> set to "Y".
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> net/sunrpc/cache.c   |   16 ++++++++++++++++
> net/sunrpc/debugfs.c |    3 +++
> net/sunrpc/fail.h    |    2 +-
> 3 files changed, 20 insertions(+), 1 deletion(-)
> 
> 
> Proof of concept: compile-tested only. The idea is to inject timeout
> failures in the cache code so we can see what happens when a rqst
> actually has to be deferred.

Using v2 of this RFC patch, I am able to reproduce Trond's
crash exactly on the same nfsd thread that's handling a
deferred request.

I'll work on addressing it.


> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
> index bb1177395b99..e5ec125afec9 100644
> --- a/net/sunrpc/cache.c
> +++ b/net/sunrpc/cache.c
> @@ -33,7 +33,9 @@
> #include <linux/sunrpc/stats.h>
> #include <linux/sunrpc/rpc_pipe_fs.h>
> #include <trace/events/sunrpc.h>
> +
> #include "netns.h"
> +#include "fail.h"
> 
> #define	 RPCDBG_FACILITY RPCDBG_CACHE
> 
> @@ -629,6 +631,19 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
> 	complete(&dr->completion);
> }
> 
> +#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
> +static inline bool cache_timeout_should_fail(void)
> +{
> +	return !fail_sunrpc.ignore_cache_timeout &&
> +		should_fail(&fail_sunrpc.attr, 1);
> +}
> +#else
> +static inline bool cache_timeout_should_fail(void)
> +{
> +	return false;
> +}
> +#endif
> +
> static void cache_wait_req(struct cache_req *req, struct cache_head *item)
> {
> 	struct thread_deferred_req sleeper;
> @@ -640,6 +655,7 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item)
> 	setup_deferral(dreq, item, 0);
> 
> 	if (!test_bit(CACHE_PENDING, &item->flags) ||
> +	    cache_timeout_should_fail() ||
> 	    wait_for_completion_interruptible_timeout(
> 		    &sleeper.completion, req->thread_wait) <= 0) {
> 		/* The completion wasn't completed, so we need
> diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
> index 7dc9cc929bfd..68272885873a 100644
> --- a/net/sunrpc/debugfs.c
> +++ b/net/sunrpc/debugfs.c
> @@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
> 
> 	debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
> 			    &fail_sunrpc.ignore_server_disconnect);
> +
> +	debugfs_create_bool("ignore-cache-timeout", S_IFREG | 0600, dir,
> +			    &fail_sunrpc.ignore_cache_timeout);
> }
> #else
> static void fail_sunrpc_init(void)
> diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
> index 69dc30cc44b8..13b8436b5f15 100644
> --- a/net/sunrpc/fail.h
> +++ b/net/sunrpc/fail.h
> @@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
> 	struct fault_attr	attr;
> 
> 	bool			ignore_client_disconnect;
> -
> 	bool			ignore_server_disconnect;
> +	bool			ignore_cache_timeout;
> };
> 
> extern struct fail_sunrpc_attr fail_sunrpc;
> 
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-04-01 20:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-31 20:38 [PATCH RFC] SUNRPC: Cache timeout injection Chuck Lever
2022-04-01 20:36 ` Chuck Lever III

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.