linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Trond Myklebust <trondmy@gmail.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH v2 34/34] SUNRPC: Queue fairness for all.
Date: Tue,  4 Sep 2018 17:05:49 -0400	[thread overview]
Message-ID: <20180904210549.81673-35-trond.myklebust@hammerspace.com> (raw)
In-Reply-To: <20180904210549.81673-34-trond.myklebust@hammerspace.com>

Fix up the priority queue to not batch by owner, but by queue, so that
we allow '1 << priority' elements to be dequeued before switching to
the next priority queue.
The owner field is still used to wake up requests in round robin order
by owner to avoid single processes hogging the RPC layer by loading the
queues. We extend this property to non-priority queues as well.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |   2 -
 net/sunrpc/sched.c           | 126 ++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 71 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index c5bc779feb00..869e2ee787fa 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -188,7 +188,6 @@ struct rpc_timer {
 struct rpc_wait_queue {
 	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
-	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
 	unsigned char		nr;			/* # tasks remaining for cookie */
@@ -204,7 +203,6 @@ struct rpc_wait_queue {
  * from a single cookie.  The aim is to improve
  * performance of NFS operations such as read/write.
  */
-#define RPC_BATCH_COUNT			16
 #define RPC_IS_PRIORITY(q)		((q)->maxpriority > 0)
 
 /*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 104c056daf83..1120857eb1df 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,64 +99,77 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
-	struct list_head *q = &queue->tasks[queue->priority];
-	struct rpc_task *task;
-
-	if (!list_empty(q)) {
-		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
-		if (task->tk_owner == queue->owner)
-			list_move_tail(&task->u.tk_wait.list, q);
-	}
-}
-
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	if (queue->priority != priority) {
-		/* Fairness: rotate the list when changing priority */
-		rpc_rotate_queue_owner(queue);
 		queue->priority = priority;
+		queue->nr = 1U << priority;
 	}
 }
 
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
 static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
 {
 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
 }
 
 /*
- * Add new request to a priority queue.
+ * Add a request to a queue list
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
-		struct rpc_task *task,
-		unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
 {
-	struct list_head *q;
 	struct rpc_task *t;
-
-	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	if (unlikely(queue_priority > queue->maxpriority))
-		queue_priority = queue->maxpriority;
-	if (queue_priority > queue->priority)
-		rpc_set_waitqueue_priority(queue, queue_priority);
-	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
-			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+			list_add_tail(&task->u.tk_wait.links,
+					&t->u.tk_wait.links);
+			/* Cache the queue head in task->u.tk_wait.list */
+			task->u.tk_wait.list.next = q;
+			task->u.tk_wait.list.prev = NULL;
 			return;
 		}
 	}
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	list_add_tail(&task->u.tk_wait.list, q);
 }
 
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+	struct list_head *q;
+	struct rpc_task *t;
+
+	if (task->u.tk_wait.list.prev == NULL) {
+		list_del(&task->u.tk_wait.links);
+		return;
+	}
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_first_entry(&task->u.tk_wait.links,
+				struct rpc_task,
+				u.tk_wait.links);
+		/* Assume __rpc_list_enqueue_task() cached the queue head */
+		q = t->u.tk_wait.list.next;
+		list_add_tail(&t->u.tk_wait.list, q);
+		list_del(&task->u.tk_wait.links);
+	}
+	list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+		struct rpc_task *task,
+		unsigned char queue_priority)
+{
+	if (unlikely(queue_priority > queue->maxpriority))
+		queue_priority = queue->maxpriority;
+	__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
 /*
  * Add new request to wait queue.
  *
@@ -175,10 +188,11 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_add_wait_queue_priority(queue, task, queue_priority);
-	else if (RPC_IS_SWAPPER(task))
+	else if (RPC_IS_SWAPPER(task)) {
 		list_add(&task->u.tk_wait.list, &queue->tasks[0]);
-	else
-		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
+		INIT_LIST_HEAD(&task->u.tk_wait.links);
+	} else
+		__rpc_list_enqueue_task(&queue->tasks[0], task);
 	task->tk_waitqueue = queue;
 	queue->qlen++;
 	/* barrier matches the read in rpc_wake_up_task_queue_locked() */
@@ -189,20 +203,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 			task->tk_pid, queue, rpc_qname(queue));
 }
 
-/*
- * Remove request from a priority queue.
- */
-static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
-{
-	struct rpc_task *t;
-
-	if (!list_empty(&task->u.tk_wait.links)) {
-		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
-		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
-		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
-	}
-}
-
 /*
  * Remove request from queue.
  * Note: must be called with spin lock held.
@@ -210,9 +210,7 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
 	__rpc_disable_timer(queue, task);
-	if (RPC_IS_PRIORITY(queue))
-		__rpc_remove_wait_queue_priority(task);
-	list_del(&task->u.tk_wait.list);
+	__rpc_list_dequeue_task(task);
 	queue->qlen--;
 	dprintk("RPC: %5u removed from queue %p \"%s\"\n",
 			task->tk_pid, queue, rpc_qname(queue));
@@ -536,20 +534,12 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 	struct rpc_task *task;
 
 	/*
-	 * Service a batch of tasks from a single owner.
+	 * Service a batch of tasks from a single queue.
 	 */
 	q = &queue->tasks[queue->priority];
-	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-		if (queue->owner == task->tk_owner) {
-			if (--queue->nr)
-				goto out;
-			list_move_tail(&task->u.tk_wait.list, q);
-		}
-		/*
-		 * Check if we need to switch queues.
-		 */
-		goto new_owner;
+	if (!list_empty(q) && --queue->nr) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		goto out;
 	}
 
 	/*
@@ -561,7 +551,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+			task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -571,8 +561,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 
 new_queue:
 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
-	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
 	return task;
 }
-- 
2.17.1

  reply	other threads:[~2018-09-05  1:33 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-04 21:05 [PATCH v2 00/34] Convert RPC client transmission to a queued model Trond Myklebust
2018-09-04 21:05 ` [PATCH v2 01/34] SUNRPC: Clean up initialisation of the struct rpc_rqst Trond Myklebust
2018-09-04 21:05   ` [PATCH v2 02/34] SUNRPC: If there is no reply expected, bail early from call_decode Trond Myklebust
2018-09-04 21:05     ` [PATCH v2 03/34] SUNRPC: The transmitted message must lie in the RPCSEC window of validity Trond Myklebust
2018-09-04 21:05       ` [PATCH v2 04/34] SUNRPC: Simplify identification of when the message send/receive is complete Trond Myklebust
2018-09-04 21:05         ` [PATCH v2 05/34] SUNRPC: Avoid holding locks across the XDR encoding of the RPC message Trond Myklebust
2018-09-04 21:05           ` [PATCH v2 06/34] SUNRPC: Rename TCP receive-specific state variables Trond Myklebust
2018-09-04 21:05             ` [PATCH v2 07/34] SUNRPC: Move reset of TCP state variables into the reconnect code Trond Myklebust
2018-09-04 21:05               ` [PATCH v2 08/34] SUNRPC: Add socket transmit queue offset tracking Trond Myklebust
2018-09-04 21:05                 ` [PATCH v2 09/34] SUNRPC: Simplify dealing with aborted partially transmitted messages Trond Myklebust
2018-09-04 21:05                   ` [PATCH v2 10/34] SUNRPC: Refactor the transport request pinning Trond Myklebust
2018-09-04 21:05                     ` [PATCH v2 11/34] SUNRPC: Add a helper to wake up a sleeping rpc_task and set its status Trond Myklebust
2018-09-04 21:05                       ` [PATCH v2 12/34] SUNRPC: Don't wake queued RPC calls multiple times in xprt_transmit Trond Myklebust
2018-09-04 21:05                         ` [PATCH v2 13/34] SUNRPC: Rename xprt->recv_lock to xprt->queue_lock Trond Myklebust
2018-09-04 21:05                           ` [PATCH v2 14/34] SUNRPC: Refactor xprt_transmit() to remove the reply queue code Trond Myklebust
2018-09-04 21:05                             ` [PATCH v2 15/34] SUNRPC: Refactor xprt_transmit() to remove wait for reply code Trond Myklebust
2018-09-04 21:05                               ` [PATCH v2 16/34] SUNRPC: Minor cleanup for call_transmit() Trond Myklebust
2018-09-04 21:05                                 ` [PATCH v2 17/34] SUNRPC: Distinguish between the slot allocation list and receive queue Trond Myklebust
2018-09-04 21:05                                   ` [PATCH v2 18/34] NFS: Add a transmission queue for RPC requests Trond Myklebust
2018-09-04 21:05                                     ` [PATCH v2 19/34] SUNRPC: Refactor RPC call encoding Trond Myklebust
2018-09-04 21:05                                       ` [PATCH v2 20/34] SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() Trond Myklebust
2018-09-04 21:05                                         ` [PATCH v2 21/34] SUNRPC: Don't reset the request 'bytes_sent' counter when releasing XPRT_LOCK Trond Myklebust
2018-09-04 21:05                                           ` [PATCH v2 22/34] SUNRPC: Simplify xprt_prepare_transmit() Trond Myklebust
2018-09-04 21:05                                             ` [PATCH v2 23/34] SUNRPC: Move RPC retransmission stat counter to xprt_transmit() Trond Myklebust
2018-09-04 21:05                                               ` [PATCH v2 24/34] SUNRPC: Fix up the back channel transmit Trond Myklebust
2018-09-04 21:05                                                 ` [PATCH v2 25/34] SUNRPC: Support for congestion control when queuing is enabled Trond Myklebust
2018-09-04 21:05                                                   ` [PATCH v2 26/34] SUNRPC: Improve latency for interactive tasks Trond Myklebust
2018-09-04 21:05                                                     ` [PATCH v2 27/34] SUNRPC: Allow calls to xprt_transmit() to drain the entire transmit queue Trond Myklebust
2018-09-04 21:05                                                       ` [PATCH v2 28/34] SUNRPC: Queue the request for transmission immediately after encoding Trond Myklebust
2018-09-04 21:05                                                         ` [PATCH v2 29/34] SUNRPC: Convert the xprt->sending queue back to an ordinary wait queue Trond Myklebust
2018-09-04 21:05                                                           ` [PATCH v2 30/34] SUNRPC: Allow soft RPC calls to time out when waiting for the XPRT_LOCK Trond Myklebust
2018-09-04 21:05                                                             ` [PATCH v2 31/34] SUNRPC: Turn off throttling of RPC slots for TCP sockets Trond Myklebust
2018-09-04 21:05                                                               ` [PATCH v2 32/34] SUNRPC: Clean up transport write space handling Trond Myklebust
2018-09-04 21:05                                                                 ` [PATCH v2 33/34] SUNRPC: Cleanup: remove the unused 'task' argument from the request_send() Trond Myklebust
2018-09-04 21:05                                                                   ` Trond Myklebust [this message]
2018-09-06 14:17                                                           ` [PATCH v2 29/34] SUNRPC: Convert the xprt->sending queue back to an ordinary wait queue Schumaker, Anna
2018-09-06 14:23                                                             ` Schumaker, Anna
2018-09-06 14:23                                                             ` Schumaker, Anna
2018-09-05 14:30                                               ` [PATCH v2 23/34] SUNRPC: Move RPC retransmission stat counter to xprt_transmit() Chuck Lever
2018-09-05 15:28                                                 ` Trond Myklebust
2018-09-05 15:31                                                   ` Chuck Lever
2018-09-05 16:07                                                     ` Trond Myklebust
2018-09-05 16:34                                                       ` Chuck Lever
2018-09-06 18:49                                         ` [PATCH v2 20/34] SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() Schumaker, Anna
2018-09-06 18:57                                           ` Trond Myklebust

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180904210549.81673-35-trond.myklebust@hammerspace.com \
    --to=trondmy@gmail.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).