All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3 0/2] block/loop: improve performance
@ 2017-09-01  5:09 Shaohua Li
  2017-09-01  5:09 ` [PATCH V3 1/2] block/loop: set hw_sectors Shaohua Li
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Shaohua Li @ 2017-09-01  5:09 UTC (permalink / raw)
  To: linux-block; +Cc: kernel-team, axboe, Shaohua Li

From: Shaohua Li <shli@fb.com>

two small patches to improve performance for loop in directio mode. The goal is
to increase IO size sending to underlayer disks.

Thanks,
Shaohua

V2 -> V3:
- Use GFP_NOIO pointed out by Ming
- Rebase to latest for-next branch

Shaohua Li (2):
  block/loop: set hw_sectors
  block/loop: allow request merge for directio mode

 drivers/block/loop.c | 67 ++++++++++++++++++++++++++++++++++++++++------------
 drivers/block/loop.h |  1 +
 2 files changed, 53 insertions(+), 15 deletions(-)

-- 
2.9.5

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH V3 1/2] block/loop: set hw_sectors
  2017-09-01  5:09 [PATCH V3 0/2] block/loop: improve performance Shaohua Li
@ 2017-09-01  5:09 ` Shaohua Li
  2017-09-01  5:09 ` [PATCH V3 2/2] block/loop: allow request merge for directio mode Shaohua Li
  2017-09-01 14:44 ` [PATCH V3 0/2] block/loop: improve performance Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Shaohua Li @ 2017-09-01  5:09 UTC (permalink / raw)
  To: linux-block; +Cc: kernel-team, axboe, Shaohua Li

From: Shaohua Li <shli@fb.com>

Loop can handle any size of request. Limiting it to 255 sectors just
burns the CPU for bio split and request merge for underlayer disk and
also cause bad fs block allocation in directio mode.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/block/loop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f6c204f6..9eff4d3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1736,6 +1736,7 @@ static int loop_add(struct loop_device **l, int i)
 
 	blk_queue_physical_block_size(lo->lo_queue, PAGE_SIZE);
 
+	blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
 	/*
 	 * It doesn't make sense to enable merge because the I/O
 	 * submitted to backing file is handled page by page.
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH V3 2/2] block/loop: allow request merge for directio mode
  2017-09-01  5:09 [PATCH V3 0/2] block/loop: improve performance Shaohua Li
  2017-09-01  5:09 ` [PATCH V3 1/2] block/loop: set hw_sectors Shaohua Li
@ 2017-09-01  5:09 ` Shaohua Li
  2017-09-01 14:44 ` [PATCH V3 0/2] block/loop: improve performance Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Shaohua Li @ 2017-09-01  5:09 UTC (permalink / raw)
  To: linux-block; +Cc: kernel-team, axboe, Shaohua Li

From: Shaohua Li <shli@fb.com>

Currently loop disables merge. While it makes sense for buffer IO mode,
directio mode can benefit from request merge. Without merge, loop could
send small size IO to underlayer disk and harm performance.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/block/loop.c | 66 ++++++++++++++++++++++++++++++++++++++++------------
 drivers/block/loop.h |  1 +
 2 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9eff4d3..3a35121 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -213,10 +213,13 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 	 */
 	blk_mq_freeze_queue(lo->lo_queue);
 	lo->use_dio = use_dio;
-	if (use_dio)
+	if (use_dio) {
+		queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 		lo->lo_flags |= LO_FLAGS_DIRECT_IO;
-	else
+	} else {
+		queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 		lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
+	}
 	blk_mq_unfreeze_queue(lo->lo_queue);
 }
 
@@ -464,6 +467,8 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
 {
 	struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
 
+	kfree(cmd->bvec);
+	cmd->bvec = NULL;
 	cmd->ret = ret;
 	blk_mq_complete_request(cmd->rq);
 }
@@ -473,22 +478,50 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 {
 	struct iov_iter iter;
 	struct bio_vec *bvec;
-	struct bio *bio = cmd->rq->bio;
+	struct request *rq = cmd->rq;
+	struct bio *bio = rq->bio;
 	struct file *file = lo->lo_backing_file;
+	unsigned int offset;
+	int segments = 0;
 	int ret;
 
-	/* nomerge for loop request queue */
-	WARN_ON(cmd->rq->bio != cmd->rq->biotail);
+	if (rq->bio != rq->biotail) {
+		struct req_iterator iter;
+		struct bio_vec tmp;
+
+		__rq_for_each_bio(bio, rq)
+			segments += bio_segments(bio);
+		bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
+		if (!bvec)
+			return -EIO;
+		cmd->bvec = bvec;
+
+		/*
+		 * The bios of the request may be started from the middle of
+		 * the 'bvec' because of bio splitting, so we can't directly
+		 * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
+		 * API will take care of all details for us.
+		 */
+		rq_for_each_segment(tmp, rq, iter) {
+			*bvec = tmp;
+			bvec++;
+		}
+		bvec = cmd->bvec;
+		offset = 0;
+	} else {
+		/*
+		 * Same here, this bio may be started from the middle of the
+		 * 'bvec' because of bio splitting, so offset from the bvec
+		 * must be passed to iov iterator
+		 */
+		offset = bio->bi_iter.bi_bvec_done;
+		bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
+		segments = bio_segments(bio);
+	}
 
-	bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
 	iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
-		      bio_segments(bio), blk_rq_bytes(cmd->rq));
-	/*
-	 * This bio may be started from the middle of the 'bvec'
-	 * because of bio splitting, so offset from the bvec must
-	 * be passed to iov iterator
-	 */
-	iter.iov_offset = bio->bi_iter.bi_bvec_done;
+		      segments, blk_rq_bytes(rq));
+	iter.iov_offset = offset;
 
 	cmd->iocb.ki_pos = pos;
 	cmd->iocb.ki_filp = file;
@@ -1737,9 +1770,12 @@ static int loop_add(struct loop_device **l, int i)
 	blk_queue_physical_block_size(lo->lo_queue, PAGE_SIZE);
 
 	blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
+
 	/*
-	 * It doesn't make sense to enable merge because the I/O
-	 * submitted to backing file is handled page by page.
+	 * By default, we do buffer IO, so it doesn't make sense to enable
+	 * merge because the I/O submitted to backing file is handled page by
+	 * page. For directio mode, merge does help to dispatch bigger request
+	 * to underlayer disk. We will enable merge once directio is enabled.
 	 */
 	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index efe5718..43d20d3 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -71,6 +71,7 @@ struct loop_cmd {
 	bool use_aio;           /* use AIO interface to handle I/O */
 	long ret;
 	struct kiocb iocb;
+	struct bio_vec *bvec;
 };
 
 /* Support for loadable transfer modules */
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH V3 0/2] block/loop: improve performance
  2017-09-01  5:09 [PATCH V3 0/2] block/loop: improve performance Shaohua Li
  2017-09-01  5:09 ` [PATCH V3 1/2] block/loop: set hw_sectors Shaohua Li
  2017-09-01  5:09 ` [PATCH V3 2/2] block/loop: allow request merge for directio mode Shaohua Li
@ 2017-09-01 14:44 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2017-09-01 14:44 UTC (permalink / raw)
  To: Shaohua Li, linux-block; +Cc: kernel-team, Shaohua Li

On 08/31/2017 11:09 PM, Shaohua Li wrote:
> From: Shaohua Li <shli@fb.com>
> 
> two small patches to improve performance for loop in directio mode. The goal is
> to increase IO size sending to underlayer disks.

Applied for 4.14, thanks.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-09-01 14:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01  5:09 [PATCH V3 0/2] block/loop: improve performance Shaohua Li
2017-09-01  5:09 ` [PATCH V3 1/2] block/loop: set hw_sectors Shaohua Li
2017-09-01  5:09 ` [PATCH V3 2/2] block/loop: allow request merge for directio mode Shaohua Li
2017-09-01 14:44 ` [PATCH V3 0/2] block/loop: improve performance Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.