* [PATCH RFC] SUNRPC: Cache timeout injection
@ 2022-03-31 20:38 Chuck Lever
2022-04-01 20:36 ` Chuck Lever III
0 siblings, 1 reply; 2+ messages in thread
From: Chuck Lever @ 2022-03-31 20:38 UTC (permalink / raw)
To: linux-nfs
Cache timeout injection stress-tests the cache timeout logic as well
as upper layer protocol deferred request handlers.
A file called /sys/kernel/debug/fail_sunrpc/ignore-cache-timeout
enables administrators to turn off cache timeout injection while
allowing other types of sunrpc errors to be injected. The default
setting is that cache timeout injection is enabled (ignore=false).
To enable cache timeout injection, CONFIG_FAULT_INJECTION,
CONFIG_FAULT_INJECTION_DEBUG_FS, and CONFIG_SUNRPC_DEBUG must all be
set to "Y".
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/cache.c | 16 ++++++++++++++++
net/sunrpc/debugfs.c | 3 +++
net/sunrpc/fail.h | 2 +-
3 files changed, 20 insertions(+), 1 deletion(-)
Proof of concept: compile-tested only. The idea is to inject timeout
failures in the cache code so we can see what happens when a rqst
actually has to be deferred.
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index bb1177395b99..e5ec125afec9 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,7 +33,9 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <trace/events/sunrpc.h>
+
#include "netns.h"
+#include "fail.h"
#define RPCDBG_FACILITY RPCDBG_CACHE
@@ -629,6 +631,19 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
complete(&dr->completion);
}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static inline bool cache_timeout_should_fail(void)
+{
+ return !fail_sunrpc.ignore_cache_timeout &&
+ should_fail(&fail_sunrpc.attr, 1);
+}
+#else
+static inline bool cache_timeout_should_fail(void)
+{
+ return false;
+}
+#endif
+
static void cache_wait_req(struct cache_req *req, struct cache_head *item)
{
struct thread_deferred_req sleeper;
@@ -640,6 +655,7 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item)
setup_deferral(dreq, item, 0);
if (!test_bit(CACHE_PENDING, &item->flags) ||
+ cache_timeout_should_fail() ||
wait_for_completion_interruptible_timeout(
&sleeper.completion, req->thread_wait) <= 0) {
/* The completion wasn't completed, so we need
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 7dc9cc929bfd..68272885873a 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_server_disconnect);
+
+ debugfs_create_bool("ignore-cache-timeout", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_cache_timeout);
}
#else
static void fail_sunrpc_init(void)
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
index 69dc30cc44b8..13b8436b5f15 100644
--- a/net/sunrpc/fail.h
+++ b/net/sunrpc/fail.h
@@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
struct fault_attr attr;
bool ignore_client_disconnect;
-
bool ignore_server_disconnect;
+ bool ignore_cache_timeout;
};
extern struct fail_sunrpc_attr fail_sunrpc;
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH RFC] SUNRPC: Cache timeout injection
2022-03-31 20:38 [PATCH RFC] SUNRPC: Cache timeout injection Chuck Lever
@ 2022-04-01 20:36 ` Chuck Lever III
0 siblings, 0 replies; 2+ messages in thread
From: Chuck Lever III @ 2022-04-01 20:36 UTC (permalink / raw)
To: Linux NFS Mailing List; +Cc: Trond Myklebust
> On Mar 31, 2022, at 4:38 PM, Chuck Lever <chuck.lever@oracle.com> wrote:
>
> Cache timeout injection stress-tests the cache timeout logic as well
> as upper layer protocol deferred request handlers.
>
> A file called /sys/kernel/debug/fail_sunrpc/ignore-cache-timeout
> enables administrators to turn off cache timeout injection while
> allowing other types of sunrpc errors to be injected. The default
> setting is that cache timeout injection is enabled (ignore=false).
>
> To enable cache timeout injection, CONFIG_FAULT_INJECTION,
> CONFIG_FAULT_INJECTION_DEBUG_FS, and CONFIG_SUNRPC_DEBUG must all be
> set to "Y".
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> net/sunrpc/cache.c | 16 ++++++++++++++++
> net/sunrpc/debugfs.c | 3 +++
> net/sunrpc/fail.h | 2 +-
> 3 files changed, 20 insertions(+), 1 deletion(-)
>
>
> Proof of concept: compile-tested only. The idea is to inject timeout
> failures in the cache code so we can see what happens when a rqst
> actually has to be deferred.
Using v2 of this RFC patch, I am able to reproduce Trond's
crash exactly on the same nfsd thread that's handling a
deferred request.
I'll work on addressing it.
> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
> index bb1177395b99..e5ec125afec9 100644
> --- a/net/sunrpc/cache.c
> +++ b/net/sunrpc/cache.c
> @@ -33,7 +33,9 @@
> #include <linux/sunrpc/stats.h>
> #include <linux/sunrpc/rpc_pipe_fs.h>
> #include <trace/events/sunrpc.h>
> +
> #include "netns.h"
> +#include "fail.h"
>
> #define RPCDBG_FACILITY RPCDBG_CACHE
>
> @@ -629,6 +631,19 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
> complete(&dr->completion);
> }
>
> +#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
> +static inline bool cache_timeout_should_fail(void)
> +{
> + return !fail_sunrpc.ignore_cache_timeout &&
> + should_fail(&fail_sunrpc.attr, 1);
> +}
> +#else
> +static inline bool cache_timeout_should_fail(void)
> +{
> + return false;
> +}
> +#endif
> +
> static void cache_wait_req(struct cache_req *req, struct cache_head *item)
> {
> struct thread_deferred_req sleeper;
> @@ -640,6 +655,7 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item)
> setup_deferral(dreq, item, 0);
>
> if (!test_bit(CACHE_PENDING, &item->flags) ||
> + cache_timeout_should_fail() ||
> wait_for_completion_interruptible_timeout(
> &sleeper.completion, req->thread_wait) <= 0) {
> /* The completion wasn't completed, so we need
> diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
> index 7dc9cc929bfd..68272885873a 100644
> --- a/net/sunrpc/debugfs.c
> +++ b/net/sunrpc/debugfs.c
> @@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
>
> debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
> &fail_sunrpc.ignore_server_disconnect);
> +
> + debugfs_create_bool("ignore-cache-timeout", S_IFREG | 0600, dir,
> + &fail_sunrpc.ignore_cache_timeout);
> }
> #else
> static void fail_sunrpc_init(void)
> diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
> index 69dc30cc44b8..13b8436b5f15 100644
> --- a/net/sunrpc/fail.h
> +++ b/net/sunrpc/fail.h
> @@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
> struct fault_attr attr;
>
> bool ignore_client_disconnect;
> -
> bool ignore_server_disconnect;
> + bool ignore_cache_timeout;
> };
>
> extern struct fail_sunrpc_attr fail_sunrpc;
>
>
--
Chuck Lever
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-04-01 20:36 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-31 20:38 [PATCH RFC] SUNRPC: Cache timeout injection Chuck Lever
2022-04-01 20:36 ` Chuck Lever III
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.