All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Damien Le Moal <damien.lemoal@wdc.com>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	Khazhy Kumykov <khazhy@google.com>,
	Jaegeuk Kim <jaegeuk@kernel.org>,
	Bart Van Assche <bvanassche@acm.org>
Subject: [PATCH 5/5] block/mq-deadline: Remove zone locking
Date: Tue, 14 Jun 2022 10:49:43 -0700	[thread overview]
Message-ID: <20220614174943.611369-6-bvanassche@acm.org> (raw)
In-Reply-To: <20220614174943.611369-1-bvanassche@acm.org>

Measurements have shown that limiting the queue depth to one has a
significant negative performance impact on zoned UFS devices. Hence this
patch that removes zone locking from the mq-deadline scheduler. This
patch is based on the following assumptions:
- Applications submit write requests to sequential write required zones
  in order.
- If such write requests get reordered by the software or hardware queue
  mechanism, nr_hw_queues * nr_requests - 1 retries are sufficient to
  reorder the write requests.
- It happens infrequently that zoned write requests are reordered by the
  block layer.
- Either no I/O scheduler is used or an I/O scheduler is used that
  submits write requests per zone in LBA order.

DD_BE_PRIO is selected for sequential writes to preserve the LBA order.

See also commit 5700f69178e9 ("mq-deadline: Introduce zone locking
support").

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 74 ++++-----------------------------------------
 1 file changed, 6 insertions(+), 68 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 6ed602b2f80a..e168fc9a980a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -104,7 +104,6 @@ struct deadline_data {
 	int prio_aging_expire;
 
 	spinlock_t lock;
-	spinlock_t zone_lock;
 };
 
 /* Maps an I/O priority class to a deadline scheduler priority. */
@@ -285,30 +284,10 @@ static struct request *
 deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		      enum dd_data_dir data_dir)
 {
-	struct request *rq;
-	unsigned long flags;
-
 	if (list_empty(&per_prio->fifo_list[data_dir]))
 		return NULL;
 
-	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
-		return rq;
-
-	/*
-	 * Look for a write request that can be dispatched, that is one with
-	 * an unlocked target zone.
-	 */
-	spin_lock_irqsave(&dd->zone_lock, flags);
-	list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
-		if (blk_req_can_dispatch_to_zone(rq))
-			goto out;
-	}
-	rq = NULL;
-out:
-	spin_unlock_irqrestore(&dd->zone_lock, flags);
-
-	return rq;
+	return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
 }
 
 /*
@@ -319,29 +298,7 @@ static struct request *
 deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		      enum dd_data_dir data_dir)
 {
-	struct request *rq;
-	unsigned long flags;
-
-	rq = per_prio->next_rq[data_dir];
-	if (!rq)
-		return NULL;
-
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
-		return rq;
-
-	/*
-	 * Look for a write request that can be dispatched, that is one with
-	 * an unlocked target zone.
-	 */
-	spin_lock_irqsave(&dd->zone_lock, flags);
-	while (rq) {
-		if (blk_req_can_dispatch_to_zone(rq))
-			break;
-		rq = deadline_latter_request(rq);
-	}
-	spin_unlock_irqrestore(&dd->zone_lock, flags);
-
-	return rq;
+	return per_prio->next_rq[data_dir];
 }
 
 /*
@@ -467,10 +424,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	ioprio_class = dd_rq_ioclass(rq);
 	prio = ioprio_class_to_prio[ioprio_class];
 	dd->per_prio[prio].stats.dispatched++;
-	/*
-	 * If the request needs its target zone locked, do it.
-	 */
-	blk_req_zone_write_lock(rq);
 	rq->rq_flags |= RQF_STARTED;
 	return rq;
 }
@@ -640,7 +593,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	dd->fifo_batch = fifo_batch;
 	dd->prio_aging_expire = prio_aging_expire;
 	spin_lock_init(&dd->lock);
-	spin_lock_init(&dd->zone_lock);
 
 	q->elevator = eq;
 	return 0;
@@ -716,17 +668,13 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
 	struct dd_per_prio *per_prio;
 	enum dd_prio prio;
+	bool seq_write = blk_rq_is_seq_write(rq);
 	LIST_HEAD(free);
 
 	lockdep_assert_held(&dd->lock);
 
-	/*
-	 * This may be a requeue of a write request that has locked its
-	 * target zone. If it is the case, this releases the zone lock.
-	 */
-	blk_req_zone_write_unlock(rq);
-
-	prio = ioprio_class_to_prio[ioprio_class];
+	prio = seq_write ? DD_BE_PRIO :
+		ioprio_class_to_prio[ioprio_class];
 	per_prio = &dd->per_prio[prio];
 	if (!rq->elv.priv[0]) {
 		per_prio->stats.inserted++;
@@ -740,7 +688,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	trace_block_rq_insert(rq);
 
-	if (at_head) {
+	if (at_head && !seq_write) {
 		list_add(&rq->queuelist, &per_prio->dispatch);
 		rq->fifo_time = jiffies;
 	} else {
@@ -819,16 +767,6 @@ static void dd_finish_request(struct request *rq)
 		return;
 
 	atomic_inc(&per_prio->stats.completed);
-
-	if (blk_queue_is_zoned(q)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->zone_lock, flags);
-		blk_req_zone_write_unlock(rq);
-		if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
-			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
-		spin_unlock_irqrestore(&dd->zone_lock, flags);
-	}
 }
 
 static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)

  parent reply	other threads:[~2022-06-14 17:50 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-14 17:49 [PATCH 0/5] Improve zoned storage write performance Bart Van Assche
2022-06-14 17:49 ` [PATCH 1/5] block: Introduce the blk_rq_is_seq_write() function Bart Van Assche
2022-06-16 20:41   ` Jens Axboe
2022-06-16 21:16     ` Bart Van Assche
2022-06-14 17:49 ` [PATCH 2/5] scsi: Retry unaligned zoned writes Bart Van Assche
2022-06-14 21:47   ` Khazhy Kumykov
2022-06-14 22:39     ` Bart Van Assche
2022-06-14 23:50       ` Damien Le Moal
2022-06-14 23:54         ` Bart Van Assche
2022-06-15  0:55           ` Damien Le Moal
2022-06-14 23:29   ` Damien Le Moal
2022-06-14 23:56     ` Bart Van Assche
2022-06-15  1:09       ` Damien Le Moal
2022-06-15  5:49       ` Christoph Hellwig
2022-06-15  7:21         ` Damien Le Moal
2022-06-15 19:38           ` Bart Van Assche
2022-06-16  0:14             ` Damien Le Moal
2022-06-15 19:42         ` Bart Van Assche
2022-06-16 18:36         ` Bart Van Assche
2022-06-14 17:49 ` [PATCH 3/5] nvme: Make the number of retries request specific Bart Van Assche
2022-06-14 17:49 ` [PATCH 4/5] nvme: Increase the number of retries for zoned writes Bart Van Assche
2022-06-14 18:03   ` Keith Busch
2022-06-14 17:49 ` Bart Van Assche [this message]
2022-06-14 23:40   ` [PATCH 5/5] block/mq-deadline: Remove zone locking Damien Le Moal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220614174943.611369-6-bvanassche@acm.org \
    --to=bvanassche@acm.org \
    --cc=axboe@kernel.dk \
    --cc=damien.lemoal@wdc.com \
    --cc=hch@lst.de \
    --cc=jaegeuk@kernel.org \
    --cc=khazhy@google.com \
    --cc=linux-block@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.