[PATCH v3 1/3] SUNRPC: Prevent thundering herd when the socket is not connected

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 1/3] SUNRPC: Prevent thundering herd when the socket is not connected
@ 2019-03-08  0:46 Trond Myklebust
  2019-03-08  0:46 ` [PATCH v3 2/3] SUNRPC: Fix up RPC back channel transmission Trond Myklebust
  0 siblings, 1 reply; 3+ messages in thread
From: Trond Myklebust @ 2019-03-08  0:46 UTC (permalink / raw)
  To: linux-nfs

If the socket is not connected, then we want to initiate a reconnect
rather that trying to transmit requests. If there is a large number
of requests queued and waiting for the lock in call_transmit(),
then it can take a while for one of the to loop back and retake
the lock in call_connect.

Fixes: 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain...")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/clnt.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 241e8423fd0c..7ab4da342ab5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1807,7 +1807,12 @@ call_encode(struct rpc_task *task)
 		xprt_request_enqueue_receive(task);
 	xprt_request_enqueue_transmit(task);
 out:
-	task->tk_action = call_bind;
+	task->tk_action = call_transmit;
+	/* Check that the connection is OK */
+	if (!xprt_bound(task->tk_xprt))
+		task->tk_action = call_bind;
+	else if (!xprt_connected(task->tk_xprt))
+		task->tk_action = call_connect;
 }
 
 /*
@@ -1999,13 +2004,19 @@ call_transmit(struct rpc_task *task)
 {
 	dprint_status(task);
 
-	task->tk_status = 0;
+	task->tk_action = call_transmit_status;
 	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
 		if (!xprt_prepare_transmit(task))
 			return;
-		xprt_transmit(task);
+		task->tk_status = 0;
+		if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+			if (!xprt_connected(task->tk_xprt)) {
+				task->tk_status = -ENOTCONN;
+				return;
+			}
+			xprt_transmit(task);
+		}
 	}
-	task->tk_action = call_transmit_status;
 	xprt_end_transmit(task);
 }
 
@@ -2067,6 +2078,8 @@ call_transmit_status(struct rpc_task *task)
 	case -EADDRINUSE:
 	case -ENOTCONN:
 	case -EPIPE:
+		task->tk_action = call_bind;
+		task->tk_status = 0;
 		break;
 	}
 }
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v3 2/3] SUNRPC: Fix up RPC back channel transmission
  2019-03-08  0:46 [PATCH v3 1/3] SUNRPC: Prevent thundering herd when the socket is not connected Trond Myklebust
@ 2019-03-08  0:46 ` Trond Myklebust
  2019-03-08  0:46   ` [PATCH v3 3/3] SUNRPC: Respect RPC call timeouts when retrying transmission Trond Myklebust
  0 siblings, 1 reply; 3+ messages in thread
From: Trond Myklebust @ 2019-03-08  0:46 UTC (permalink / raw)
  To: linux-nfs

Now that transmissions happen through a queue, we require the RPC tasks
to handle error conditions that may have been set while they were
sleeping. The back channel does not currently do this, but assumes
that any error condition happens during its own call to xprt_transmit().

The solution is to ensure that the back channel splits out the
error handling just like the forward channel does.

Fixes: 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain...")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/clnt.c | 61 +++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7ab4da342ab5..b9558e10c6c1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -66,9 +66,6 @@ static void	call_decode(struct rpc_task *task);
 static void	call_bind(struct rpc_task *task);
 static void	call_bind_status(struct rpc_task *task);
 static void	call_transmit(struct rpc_task *task);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static void	call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 static void	call_status(struct rpc_task *task);
 static void	call_transmit_status(struct rpc_task *task);
 static void	call_refresh(struct rpc_task *task);
@@ -1133,6 +1130,8 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_encode(struct rpc_task *task);
+
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
@@ -1154,7 +1153,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	task = rpc_new_task(&task_setup_data);
 	xprt_init_bc_request(req, task);
 
-	task->tk_action = call_bc_transmit;
+	task->tk_action = call_bc_encode;
 	atomic_inc(&task->tk_count);
 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
 	rpc_execute(task);
@@ -2085,6 +2084,16 @@ call_transmit_status(struct rpc_task *task)
 }
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_transmit(struct rpc_task *task);
+static void call_bc_transmit_status(struct rpc_task *task);
+
+static void
+call_bc_encode(struct rpc_task *task)
+{
+	xprt_request_enqueue_transmit(task);
+	task->tk_action = call_bc_transmit;
+}
+
 /*
  * 5b.	Send the backchannel RPC reply.  On error, drop the reply.  In
  * addition, disconnect on connectivity errors.
@@ -2092,26 +2101,23 @@ call_transmit_status(struct rpc_task *task)
 static void
 call_bc_transmit(struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-
-	if (rpc_task_need_encode(task))
-		xprt_request_enqueue_transmit(task);
-	if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
-		goto out_wakeup;
-
-	if (!xprt_prepare_transmit(task))
-		goto out_retry;
-
-	if (task->tk_status < 0) {
-		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
-			"error: %d\n", task->tk_status);
-		goto out_done;
+	task->tk_action = call_bc_transmit_status;
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+		if (!xprt_prepare_transmit(task))
+			return;
+		task->tk_status = 0;
+		xprt_transmit(task);
 	}
+	xprt_end_transmit(task);
+}
 
-	xprt_transmit(task);
+static void
+call_bc_transmit_status(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
 
-	xprt_end_transmit(task);
 	dprint_status(task);
+
 	switch (task->tk_status) {
 	case 0:
 		/* Success */
@@ -2125,8 +2131,14 @@ call_bc_transmit(struct rpc_task *task)
 	case -ENOTCONN:
 	case -EPIPE:
 		break;
+	case -ENOBUFS:
+		rpc_delay(task, HZ>>2);
+		/* fall through */
+	case -EBADSLT:
 	case -EAGAIN:
-		goto out_retry;
+		task->tk_status = 0;
+		task->tk_action = call_bc_transmit;
+		return;
 	case -ETIMEDOUT:
 		/*
 		 * Problem reaching the server.  Disconnect and let the
@@ -2145,18 +2157,11 @@ call_bc_transmit(struct rpc_task *task)
 		 * We were unable to reply and will have to drop the
 		 * request.  The server should reconnect and retransmit.
 		 */
-		WARN_ON_ONCE(task->tk_status == -EAGAIN);
 		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
 			"error: %d\n", task->tk_status);
 		break;
 	}
-out_wakeup:
-	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
-out_done:
 	task->tk_action = rpc_exit_task;
-	return;
-out_retry:
-	task->tk_status = 0;
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v3 3/3] SUNRPC: Respect RPC call timeouts when retrying transmission
  2019-03-08  0:46 ` [PATCH v3 2/3] SUNRPC: Fix up RPC back channel transmission Trond Myklebust
@ 2019-03-08  0:46   ` Trond Myklebust
  0 siblings, 0 replies; 3+ messages in thread
From: Trond Myklebust @ 2019-03-08  0:46 UTC (permalink / raw)
  To: linux-nfs

Fix a regression where soft and softconn requests are not timing out
as expected.

Fixes: 89f90fe1ad8b ("SUNRPC: Allow calls to xprt_transmit() to drain...")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/clnt.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b9558e10c6c1..311029b7c33a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,6 +79,7 @@ static int	rpc_encode_header(struct rpc_task *task,
 static int	rpc_decode_header(struct rpc_task *task,
 				  struct xdr_stream *xdr);
 static int	rpc_ping(struct rpc_clnt *clnt);
+static void	rpc_check_timeout(struct rpc_task *task);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
@@ -1962,8 +1963,7 @@ call_connect_status(struct rpc_task *task)
 			break;
 		if (clnt->cl_autobind) {
 			rpc_force_rebind(clnt);
-			task->tk_action = call_bind;
-			return;
+			goto out_retry;
 		}
 		/* fall through */
 	case -ECONNRESET:
@@ -1983,16 +1983,19 @@ call_connect_status(struct rpc_task *task)
 		/* fall through */
 	case -ENOTCONN:
 	case -EAGAIN:
-		/* Check for timeouts before looping back to call_bind */
 	case -ETIMEDOUT:
-		task->tk_action = call_timeout;
-		return;
+		goto out_retry;
 	case 0:
 		clnt->cl_stats->netreconn++;
 		task->tk_action = call_transmit;
 		return;
 	}
 	rpc_exit(task, status);
+	return;
+out_retry:
+	/* Check for timeouts before looping back to call_bind */
+	task->tk_action = call_bind;
+	rpc_check_timeout(task);
 }
 
 /*
@@ -2069,7 +2072,7 @@ call_transmit_status(struct rpc_task *task)
 				trace_xprt_ping(task->tk_xprt,
 						task->tk_status);
 			rpc_exit(task, task->tk_status);
-			break;
+			return;
 		}
 		/* fall through */
 	case -ECONNRESET:
@@ -2081,6 +2084,7 @@ call_transmit_status(struct rpc_task *task)
 		task->tk_status = 0;
 		break;
 	}
+	rpc_check_timeout(task);
 }
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
@@ -2217,7 +2221,7 @@ call_status(struct rpc_task *task)
 	case -EPIPE:
 	case -ENOTCONN:
 	case -EAGAIN:
-		task->tk_action = call_encode;
+		task->tk_action = call_timeout;
 		break;
 	case -EIO:
 		/* shutdown or soft timeout */
@@ -2231,20 +2235,13 @@ call_status(struct rpc_task *task)
 	}
 }
 
-/*
- * 6a.	Handle RPC timeout
- * 	We do not release the request slot, so we keep using the
- *	same XID for all retransmits.
- */
 static void
-call_timeout(struct rpc_task *task)
+rpc_check_timeout(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 
-	if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
-		dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid);
-		goto retry;
-	}
+	if (xprt_adjust_timeout(task->tk_rqstp) == 0)
+		return;
 
 	dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
 	task->tk_timeouts++;
@@ -2280,10 +2277,19 @@ call_timeout(struct rpc_task *task)
 	 * event? RFC2203 requires the server to drop all such requests.
 	 */
 	rpcauth_invalcred(task);
+}
 
-retry:
+/*
+ * 6a.	Handle RPC timeout
+ * 	We do not release the request slot, so we keep using the
+ *	same XID for all retransmits.
+ */
+static void
+call_timeout(struct rpc_task *task)
+{
 	task->tk_action = call_encode;
 	task->tk_status = 0;
+	rpc_check_timeout(task);
 }
 
 /*
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-03-08  0:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-08  0:46 [PATCH v3 1/3] SUNRPC: Prevent thundering herd when the socket is not connected Trond Myklebust
2019-03-08  0:46 ` [PATCH v3 2/3] SUNRPC: Fix up RPC back channel transmission Trond Myklebust
2019-03-08  0:46   ` [PATCH v3 3/3] SUNRPC: Respect RPC call timeouts when retrying transmission Trond Myklebust

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.