linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Trond Myklebust <trondmy@gmail.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH v3 36/44] SUNRPC: Fix priority queue fairness
Date: Mon, 17 Sep 2018 09:03:27 -0400	[thread overview]
Message-ID: <20180917130335.112832-37-trond.myklebust@hammerspace.com> (raw)
In-Reply-To: <20180917130335.112832-36-trond.myklebust@hammerspace.com>

Fix up the priority queue to not batch by owner, but by queue, so that
we allow '1 << priority' elements to be dequeued before switching to
the next priority queue.
The owner field is still used to wake up requests in round robin order
by owner to avoid single processes hogging the RPC layer by loading the
queues.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |   2 -
 net/sunrpc/sched.c           | 109 +++++++++++++++++------------------
 2 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8840a420cf4c..7b540c066594 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -189,7 +189,6 @@ struct rpc_timer {
 struct rpc_wait_queue {
 	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
-	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
 	unsigned char		nr;			/* # tasks remaining for cookie */
@@ -205,7 +204,6 @@ struct rpc_wait_queue {
  * from a single cookie.  The aim is to improve
  * performance of NFS operations such as read/write.
  */
-#define RPC_BATCH_COUNT			16
 #define RPC_IS_PRIORITY(q)		((q)->maxpriority > 0)
 
 /*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9a8ec012b449..57ca5bead1cb 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
-	struct list_head *q = &queue->tasks[queue->priority];
-	struct rpc_task *task;
-
-	if (!list_empty(q)) {
-		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
-		if (task->tk_owner == queue->owner)
-			list_move_tail(&task->u.tk_wait.list, q);
-	}
-}
-
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	if (queue->priority != priority) {
-		/* Fairness: rotate the list when changing priority */
-		rpc_rotate_queue_owner(queue);
 		queue->priority = priority;
+		queue->nr = 1U << priority;
 	}
 }
 
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
 static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
 {
 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
 }
 
 /*
- * Add new request to a priority queue.
+ * Add a request to a queue list
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
-		struct rpc_task *task,
-		unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
 {
-	struct list_head *q;
 	struct rpc_task *t;
 
-	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	if (unlikely(queue_priority > queue->maxpriority))
-		queue_priority = queue->maxpriority;
-	if (queue_priority > queue->priority)
-		rpc_set_waitqueue_priority(queue, queue_priority);
-	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
-			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+			list_add_tail(&task->u.tk_wait.links,
+					&t->u.tk_wait.links);
+			/* Cache the queue head in task->u.tk_wait.list */
+			task->u.tk_wait.list.next = q;
+			task->u.tk_wait.list.prev = NULL;
 			return;
 		}
 	}
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	list_add_tail(&task->u.tk_wait.list, q);
 }
 
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+	struct list_head *q;
+	struct rpc_task *t;
+
+	if (task->u.tk_wait.list.prev == NULL) {
+		list_del(&task->u.tk_wait.links);
+		return;
+	}
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_first_entry(&task->u.tk_wait.links,
+				struct rpc_task,
+				u.tk_wait.links);
+		/* Assume __rpc_list_enqueue_task() cached the queue head */
+		q = t->u.tk_wait.list.next;
+		list_add_tail(&t->u.tk_wait.list, q);
+		list_del(&task->u.tk_wait.links);
+	}
+	list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+		struct rpc_task *task,
+		unsigned char queue_priority)
+{
+	if (unlikely(queue_priority > queue->maxpriority))
+		queue_priority = queue->maxpriority;
+	__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
 /*
  * Add new request to wait queue.
  *
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
  */
 static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 {
-	struct rpc_task *t;
-
-	if (!list_empty(&task->u.tk_wait.links)) {
-		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
-		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
-		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
-	}
+	__rpc_list_dequeue_task(task);
 }
 
 /*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
 	__rpc_disable_timer(queue, task);
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_remove_wait_queue_priority(task);
-	list_del(&task->u.tk_wait.list);
+	else
+		list_del(&task->u.tk_wait.list);
 	queue->qlen--;
 	dprintk("RPC: %5u removed from queue %p \"%s\"\n",
 			task->tk_pid, queue, rpc_qname(queue));
@@ -545,17 +554,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 	 * Service a batch of tasks from a single owner.
 	 */
 	q = &queue->tasks[queue->priority];
-	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-		if (queue->owner == task->tk_owner) {
-			if (--queue->nr)
-				goto out;
-			list_move_tail(&task->u.tk_wait.list, q);
-		}
-		/*
-		 * Check if we need to switch queues.
-		 */
-		goto new_owner;
+	if (!list_empty(q) && --queue->nr) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		goto out;
 	}
 
 	/*
@@ -567,7 +568,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+			task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -577,8 +578,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 
 new_queue:
 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
-	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
 	return task;
 }
-- 
2.17.1

  reply	other threads:[~2018-09-17 18:31 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-17 13:02 [PATCH v3 00/44] Convert RPC client transmission to a queued model Trond Myklebust
2018-09-17 13:02 ` [PATCH v3 01/44] SUNRPC: Clean up initialisation of the struct rpc_rqst Trond Myklebust
2018-09-17 13:02   ` [PATCH v3 02/44] SUNRPC: If there is no reply expected, bail early from call_decode Trond Myklebust
2018-09-17 13:02     ` [PATCH v3 03/44] SUNRPC: The transmitted message must lie in the RPCSEC window of validity Trond Myklebust
2018-09-17 13:02       ` [PATCH v3 04/44] SUNRPC: Simplify identification of when the message send/receive is complete Trond Myklebust
2018-09-17 13:02         ` [PATCH v3 05/44] SUNRPC: Avoid holding locks across the XDR encoding of the RPC message Trond Myklebust
2018-09-17 13:02           ` [PATCH v3 06/44] SUNRPC: Rename TCP receive-specific state variables Trond Myklebust
2018-09-17 13:02             ` [PATCH v3 07/44] SUNRPC: Move reset of TCP state variables into the reconnect code Trond Myklebust
2018-09-17 13:02               ` [PATCH v3 08/44] SUNRPC: Add socket transmit queue offset tracking Trond Myklebust
2018-09-17 13:03                 ` [PATCH v3 09/44] SUNRPC: Simplify dealing with aborted partially transmitted messages Trond Myklebust
2018-09-17 13:03                   ` [PATCH v3 10/44] SUNRPC: Refactor the transport request pinning Trond Myklebust
2018-09-17 13:03                     ` [PATCH v3 11/44] SUNRPC: Add a helper to wake up a sleeping rpc_task and set its status Trond Myklebust
2018-09-17 13:03                       ` [PATCH v3 12/44] SUNRPC: Test whether the task is queued before grabbing the queue spinlocks Trond Myklebust
2018-09-17 13:03                         ` [PATCH v3 13/44] SUNRPC: Don't wake queued RPC calls multiple times in xprt_transmit Trond Myklebust
2018-09-17 13:03                           ` [PATCH v3 14/44] SUNRPC: Rename xprt->recv_lock to xprt->queue_lock Trond Myklebust
2018-09-17 13:03                             ` [PATCH v3 15/44] SUNRPC: Refactor xprt_transmit() to remove the reply queue code Trond Myklebust
2018-09-17 13:03                               ` [PATCH v3 16/44] SUNRPC: Refactor xprt_transmit() to remove wait for reply code Trond Myklebust
2018-09-17 13:03                                 ` [PATCH v3 17/44] SUNRPC: Minor cleanup for call_transmit() Trond Myklebust
2018-09-17 13:03                                   ` [PATCH v3 18/44] SUNRPC: Distinguish between the slot allocation list and receive queue Trond Myklebust
2018-09-17 13:03                                     ` [PATCH v3 19/44] SUNRPC: Add a transmission queue for RPC requests Trond Myklebust
2018-09-17 13:03                                       ` [PATCH v3 20/44] SUNRPC: Refactor RPC call encoding Trond Myklebust
2018-09-17 13:03                                         ` [PATCH v3 21/44] SUNRPC: Fix up the back channel transmit Trond Myklebust
2018-09-17 13:03                                           ` [PATCH v3 22/44] SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() Trond Myklebust
2018-09-17 13:03                                             ` [PATCH v3 23/44] SUNRPC: Don't reset the request 'bytes_sent' counter when releasing XPRT_LOCK Trond Myklebust
2018-09-17 13:03                                               ` [PATCH v3 24/44] SUNRPC: Simplify xprt_prepare_transmit() Trond Myklebust
2018-09-17 13:03                                                 ` [PATCH v3 25/44] SUNRPC: Move RPC retransmission stat counter to xprt_transmit() Trond Myklebust
2018-09-17 13:03                                                   ` [PATCH v3 26/44] SUNRPC: Improve latency for interactive tasks Trond Myklebust
2018-09-17 13:03                                                     ` [PATCH v3 27/44] SUNRPC: Support for congestion control when queuing is enabled Trond Myklebust
2018-09-17 13:03                                                       ` [PATCH v3 28/44] SUNRPC: Enqueue swapper tagged RPCs at the head of the transmit queue Trond Myklebust
2018-09-17 13:03                                                         ` [PATCH v3 29/44] SUNRPC: Allow calls to xprt_transmit() to drain the entire " Trond Myklebust
2018-09-17 13:03                                                           ` [PATCH v3 30/44] SUNRPC: Allow soft RPC calls to time out when waiting for the XPRT_LOCK Trond Myklebust
2018-09-17 13:03                                                             ` [PATCH v3 31/44] SUNRPC: Turn off throttling of RPC slots for TCP sockets Trond Myklebust
2018-09-17 13:03                                                               ` [PATCH v3 32/44] SUNRPC: Clean up transport write space handling Trond Myklebust
2018-09-17 13:03                                                                 ` [PATCH v3 33/44] SUNRPC: Cleanup: remove the unused 'task' argument from the request_send() Trond Myklebust
2018-09-17 13:03                                                                   ` [PATCH v3 34/44] SUNRPC: Don't take transport->lock unnecessarily when taking XPRT_LOCK Trond Myklebust
2018-09-17 13:03                                                                     ` [PATCH v3 35/44] SUNRPC: Convert xprt receive queue to use an rbtree Trond Myklebust
2018-09-17 13:03                                                                       ` Trond Myklebust [this message]
2018-09-17 13:03                                                                         ` [PATCH v3 37/44] SUNRPC: Convert the xprt->sending queue back to an ordinary wait queue Trond Myklebust
2018-09-17 13:03                                                                           ` [PATCH v3 38/44] SUNRPC: Add a label for RPC calls that require allocation on receive Trond Myklebust
2018-09-17 13:03                                                                             ` [PATCH v3 39/44] SUNRPC: Add a bvec array to struct xdr_buf for use with iovec_iter() Trond Myklebust
2018-09-17 13:03                                                                               ` [PATCH v3 40/44] SUNRPC: Simplify TCP receive code by switching to using iterators Trond Myklebust
2018-09-17 13:03                                                                                 ` [PATCH v3 41/44] SUNRPC: Clean up - rename xs_tcp_data_receive() to xs_stream_data_receive() Trond Myklebust
2018-09-17 13:03                                                                                   ` [PATCH v3 42/44] SUNRPC: Allow AF_LOCAL sockets to use the generic stream receive Trond Myklebust
2018-09-17 13:03                                                                                     ` [PATCH v3 43/44] SUNRPC: Clean up xs_udp_data_receive() Trond Myklebust
2018-09-17 13:03                                                                                       ` [PATCH v3 44/44] SUNRPC: Unexport xdr_partial_copy_from_skb() Trond Myklebust
2018-09-17 20:44                                                                                 ` [PATCH v3 40/44] SUNRPC: Simplify TCP receive code by switching to using iterators Trond Myklebust
2018-11-09 11:19                                                                                 ` Catalin Marinas
2018-11-29 19:28                                                                                   ` Cristian Marussi
2018-11-29 19:56                                                                                     ` Trond Myklebust
2018-11-30 16:19                                                                                       ` Cristian Marussi
2018-11-30 19:31                                                                                         ` Trond Myklebust
2018-12-02 16:44                                                                                           ` Trond Myklebust
2018-12-03 11:45                                                                                             ` Catalin Marinas
2018-12-03 11:53                                                                                               ` Cristian Marussi
2018-12-03 18:54                                                                                                 ` Cristian Marussi
2018-12-27 19:21                                                     ` [PATCH v3 26/44] SUNRPC: Improve latency for interactive tasks Chuck Lever
2018-12-27 22:14                                                       ` Trond Myklebust
2018-12-27 22:34                                                         ` Chuck Lever
2018-12-31 18:09                                                           ` Trond Myklebust
2018-12-31 18:44                                                             ` Chuck Lever
2018-12-31 18:59                                                               ` Trond Myklebust
2018-12-31 19:09                                                                 ` Chuck Lever
2018-12-31 19:18                                                                   ` Trond Myklebust
2018-12-31 19:21                                                                     ` Trond Myklebust
2019-01-02 18:17                                                                       ` Chuck Lever
2019-01-02 18:45                                                                         ` Trond Myklebust
2019-01-02 18:51                                                                           ` Chuck Lever
2019-01-02 18:57                                                                             ` Trond Myklebust
2019-01-02 19:06                                                                               ` Trond Myklebust
2019-01-02 19:24                                                                                 ` Trond Myklebust
2019-01-02 19:33                                                                                   ` Chuck Lever
2019-01-02 19:08                                                                               ` Chuck Lever
2019-01-02 19:11                                                                                 ` Trond Myklebust
2018-09-18 21:01                               ` [PATCH v3 15/44] SUNRPC: Refactor xprt_transmit() to remove the reply queue code Anna Schumaker
2018-09-19 15:48                                 ` Trond Myklebust
2018-09-19 17:30                                   ` Anna Schumaker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180917130335.112832-37-trond.myklebust@hammerspace.com \
    --to=trondmy@gmail.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).