All of lore.kernel.org
 help / color / mirror / Atom feed
From: Trond Myklebust <trondmy@gmail.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH v2 25/34] SUNRPC: Support for congestion control when queuing is enabled
Date: Tue,  4 Sep 2018 17:05:40 -0400	[thread overview]
Message-ID: <20180904210549.81673-26-trond.myklebust@hammerspace.com> (raw)
In-Reply-To: <20180904210549.81673-25-trond.myklebust@hammerspace.com>

Both RDMA and UDP transports require the request to get a "congestion control"
credit before they can be transmitted. Right now, this is done when
the request locks the socket. We'd like it to happen when a request attempts
to be transmitted for the first time.
In order to support retransmission of requests that already hold such
credits, we also want to ensure that they get queued first, so that we
don't deadlock with requests that have yet to obtain a credit.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h       |  1 +
 net/sunrpc/clnt.c                 |  5 +++
 net/sunrpc/xprt.c                 | 70 ++++++++++++++++++-------------
 net/sunrpc/xprtrdma/backchannel.c |  3 ++
 net/sunrpc/xprtrdma/transport.c   |  3 ++
 net/sunrpc/xprtsock.c             |  4 ++
 6 files changed, 57 insertions(+), 29 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6d91acfe0644..b23c757bebfc 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -395,6 +395,7 @@ void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_pin_rqst(struct rpc_rqst *req);
 void			xprt_unpin_rqst(struct rpc_rqst *req);
 void			xprt_release_rqst_cong(struct rpc_task *task);
+bool			xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req);
 void			xprt_disconnect_done(struct rpc_xprt *xprt);
 void			xprt_force_disconnect(struct rpc_xprt *xprt);
 void			xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 00384bde593e..52494baca7bc 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2003,6 +2003,11 @@ call_transmit_status(struct rpc_task *task)
 		dprint_status(task);
 		xprt_end_transmit(task);
 		break;
+	case -EBADSLT:
+		xprt_end_transmit(task);
+		task->tk_action = call_transmit;
+		task->tk_status = 0;
+		break;
 	case -EBADMSG:
 		xprt_end_transmit(task);
 		task->tk_action = call_encode;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e2f5b4668a66..a7a93d61567f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,8 +68,6 @@
 static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
 static __be32	xprt_alloc_xid(struct rpc_xprt *xprt);
 static void	xprt_connect_status(struct rpc_task *task);
-static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
 static void	 xprt_destroy(struct rpc_xprt *xprt);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
@@ -228,6 +226,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
  * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
  * integrated into the decision of whether a request is allowed to be
  * woken up and given access to the transport.
+ * Note that the lock is only granted if we know there are free slots.
  */
 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -243,14 +242,12 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 		xprt->snd_task = task;
 		return 1;
 	}
-	if (__xprt_get_cong(xprt, task)) {
+	if (!RPCXPRT_CONGESTED(xprt)) {
 		xprt->snd_task = task;
 		return 1;
 	}
 	xprt_clear_locked(xprt);
 out_sleep:
-	if (req)
-		__xprt_put_cong(xprt, req);
 	dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
@@ -294,24 +291,6 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
 	xprt_clear_locked(xprt);
 }
 
-static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
-{
-	struct rpc_xprt *xprt = data;
-	struct rpc_rqst *req;
-
-	req = task->tk_rqstp;
-	if (req == NULL) {
-		xprt->snd_task = task;
-		return true;
-	}
-	if (__xprt_get_cong(xprt, task)) {
-		xprt->snd_task = task;
-		req->rq_ntrans++;
-		return true;
-	}
-	return false;
-}
-
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
@@ -319,7 +298,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 	if (RPCXPRT_CONGESTED(xprt))
 		goto out_unlock;
 	if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
-				__xprt_lock_write_cong_func, xprt))
+				__xprt_lock_write_func, xprt))
 		return;
 out_unlock:
 	xprt_clear_locked(xprt);
@@ -370,14 +349,12 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta
  * overflowed. Put the task to sleep if this is the case.
  */
 static int
-__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-
 	if (req->rq_cong)
 		return 1;
 	dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
-			task->tk_pid, xprt->cong, xprt->cwnd);
+			req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
 	if (RPCXPRT_CONGESTED(xprt))
 		return 0;
 	req->rq_cong = 1;
@@ -399,6 +376,25 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
 	__xprt_lock_write_next_cong(xprt);
 }
 
+/**
+ * xprt_request_get_cong - Request congestion control credits
+ * @xprt: pointer to transport
+ * @req: pointer to RPC request
+ *
+ * Useful for transports that require congestion control.
+ */
+bool
+xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	bool ret = false;
+
+	spin_lock_bh(&xprt->transport_lock);
+	ret = __xprt_get_cong(xprt, req) != 0;
+	spin_unlock_bh(&xprt->transport_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_request_get_cong);
+
 /**
  * xprt_release_rqst_cong - housekeeping when request is complete
  * @task: RPC request that recently completed
@@ -1050,8 +1046,24 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 
 	spin_lock(&xprt->queue_lock);
 	if (list_empty(&req->rq_xmit) && xprt_request_need_transmit(task) &&
-	    test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+	    test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+		/*
+		 * Requests that carry congestion control credits are added
+		 * to the head of the list to avoid starvation issues.
+		 */
+		if (req->rq_cong) {
+			struct rpc_rqst *pos;
+			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+				if (pos->rq_cong)
+					continue;
+				/* Note: req is added _before_ pos */
+				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+				goto out;
+			}
+		}
 		list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+	}
+out:
 	spin_unlock(&xprt->queue_lock);
 }
 
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index fc01fdabbbce..14fc4596075e 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -202,6 +202,9 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
 	if (!xprt_connected(rqst->rq_xprt))
 		goto drop_connection;
 
+	if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+		return -EBADSLT;
+
 	rc = rpcrdma_bc_marshal_reply(rqst);
 	if (rc < 0)
 		goto failed_marshal;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index fa684bf4d090..9ff322e53f37 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -721,6 +721,9 @@ xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
 	if (!xprt_connected(xprt))
 		goto drop_connection;
 
+	if (!xprt_request_get_cong(xprt, rqst))
+		return -EBADSLT;
+
 	rc = rpcrdma_marshal_req(r_xprt, rqst);
 	if (rc < 0)
 		goto failed_marshal;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b8143eded4af..8831e84a058a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -609,6 +609,10 @@ static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task)
 
 	if (!xprt_bound(xprt))
 		return -ENOTCONN;
+
+	if (!xprt_request_get_cong(xprt, req))
+		return -EBADSLT;
+
 	req->rq_xtime = ktime_get();
 	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
 			      xdr, 0, true, &sent);
-- 
2.17.1

  reply	other threads:[~2018-09-05  1:33 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-04 21:05 [PATCH v2 00/34] Convert RPC client transmission to a queued model Trond Myklebust
2018-09-04 21:05 ` [PATCH v2 01/34] SUNRPC: Clean up initialisation of the struct rpc_rqst Trond Myklebust
2018-09-04 21:05   ` [PATCH v2 02/34] SUNRPC: If there is no reply expected, bail early from call_decode Trond Myklebust
2018-09-04 21:05     ` [PATCH v2 03/34] SUNRPC: The transmitted message must lie in the RPCSEC window of validity Trond Myklebust
2018-09-04 21:05       ` [PATCH v2 04/34] SUNRPC: Simplify identification of when the message send/receive is complete Trond Myklebust
2018-09-04 21:05         ` [PATCH v2 05/34] SUNRPC: Avoid holding locks across the XDR encoding of the RPC message Trond Myklebust
2018-09-04 21:05           ` [PATCH v2 06/34] SUNRPC: Rename TCP receive-specific state variables Trond Myklebust
2018-09-04 21:05             ` [PATCH v2 07/34] SUNRPC: Move reset of TCP state variables into the reconnect code Trond Myklebust
2018-09-04 21:05               ` [PATCH v2 08/34] SUNRPC: Add socket transmit queue offset tracking Trond Myklebust
2018-09-04 21:05                 ` [PATCH v2 09/34] SUNRPC: Simplify dealing with aborted partially transmitted messages Trond Myklebust
2018-09-04 21:05                   ` [PATCH v2 10/34] SUNRPC: Refactor the transport request pinning Trond Myklebust
2018-09-04 21:05                     ` [PATCH v2 11/34] SUNRPC: Add a helper to wake up a sleeping rpc_task and set its status Trond Myklebust
2018-09-04 21:05                       ` [PATCH v2 12/34] SUNRPC: Don't wake queued RPC calls multiple times in xprt_transmit Trond Myklebust
2018-09-04 21:05                         ` [PATCH v2 13/34] SUNRPC: Rename xprt->recv_lock to xprt->queue_lock Trond Myklebust
2018-09-04 21:05                           ` [PATCH v2 14/34] SUNRPC: Refactor xprt_transmit() to remove the reply queue code Trond Myklebust
2018-09-04 21:05                             ` [PATCH v2 15/34] SUNRPC: Refactor xprt_transmit() to remove wait for reply code Trond Myklebust
2018-09-04 21:05                               ` [PATCH v2 16/34] SUNRPC: Minor cleanup for call_transmit() Trond Myklebust
2018-09-04 21:05                                 ` [PATCH v2 17/34] SUNRPC: Distinguish between the slot allocation list and receive queue Trond Myklebust
2018-09-04 21:05                                   ` [PATCH v2 18/34] NFS: Add a transmission queue for RPC requests Trond Myklebust
2018-09-04 21:05                                     ` [PATCH v2 19/34] SUNRPC: Refactor RPC call encoding Trond Myklebust
2018-09-04 21:05                                       ` [PATCH v2 20/34] SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() Trond Myklebust
2018-09-04 21:05                                         ` [PATCH v2 21/34] SUNRPC: Don't reset the request 'bytes_sent' counter when releasing XPRT_LOCK Trond Myklebust
2018-09-04 21:05                                           ` [PATCH v2 22/34] SUNRPC: Simplify xprt_prepare_transmit() Trond Myklebust
2018-09-04 21:05                                             ` [PATCH v2 23/34] SUNRPC: Move RPC retransmission stat counter to xprt_transmit() Trond Myklebust
2018-09-04 21:05                                               ` [PATCH v2 24/34] SUNRPC: Fix up the back channel transmit Trond Myklebust
2018-09-04 21:05                                                 ` Trond Myklebust [this message]
2018-09-04 21:05                                                   ` [PATCH v2 26/34] SUNRPC: Improve latency for interactive tasks Trond Myklebust
2018-09-04 21:05                                                     ` [PATCH v2 27/34] SUNRPC: Allow calls to xprt_transmit() to drain the entire transmit queue Trond Myklebust
2018-09-04 21:05                                                       ` [PATCH v2 28/34] SUNRPC: Queue the request for transmission immediately after encoding Trond Myklebust
2018-09-04 21:05                                                         ` [PATCH v2 29/34] SUNRPC: Convert the xprt->sending queue back to an ordinary wait queue Trond Myklebust
2018-09-04 21:05                                                           ` [PATCH v2 30/34] SUNRPC: Allow soft RPC calls to time out when waiting for the XPRT_LOCK Trond Myklebust
2018-09-04 21:05                                                             ` [PATCH v2 31/34] SUNRPC: Turn off throttling of RPC slots for TCP sockets Trond Myklebust
2018-09-04 21:05                                                               ` [PATCH v2 32/34] SUNRPC: Clean up transport write space handling Trond Myklebust
2018-09-04 21:05                                                                 ` [PATCH v2 33/34] SUNRPC: Cleanup: remove the unused 'task' argument from the request_send() Trond Myklebust
2018-09-04 21:05                                                                   ` [PATCH v2 34/34] SUNRPC: Queue fairness for all Trond Myklebust
2018-09-06 14:17                                                           ` [PATCH v2 29/34] SUNRPC: Convert the xprt->sending queue back to an ordinary wait queue Schumaker, Anna
2018-09-06 14:23                                                             ` Schumaker, Anna
2018-09-06 14:23                                                             ` Schumaker, Anna
2018-09-05 14:30                                               ` [PATCH v2 23/34] SUNRPC: Move RPC retransmission stat counter to xprt_transmit() Chuck Lever
2018-09-05 15:28                                                 ` Trond Myklebust
2018-09-05 15:31                                                   ` Chuck Lever
2018-09-05 16:07                                                     ` Trond Myklebust
2018-09-05 16:34                                                       ` Chuck Lever
2018-09-06 18:49                                         ` [PATCH v2 20/34] SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() Schumaker, Anna
2018-09-06 18:57                                           ` Trond Myklebust

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180904210549.81673-26-trond.myklebust@hammerspace.com \
    --to=trondmy@gmail.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.