All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: handle fast path of bio splitting inline
@ 2021-10-13 20:44 Jens Axboe
  2021-10-14  5:38 ` Christoph Hellwig
  0 siblings, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2021-10-13 20:44 UTC (permalink / raw)
  To: linux-block

The fast path is no splitting needed. Separate the handling into a
check part we can inline, and an out-of-line handling path if we do
need to split.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 9a55b5070829..20ec9f00801a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -14,6 +14,7 @@
 #include "blk.h"
 #include "blk-rq-qos.h"
 #include "blk-throttle.h"
+#include "blk-merge.h"
 
 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
 {
@@ -92,10 +93,8 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
 	return bio_will_gap(req->q, NULL, bio, req->bio);
 }
 
-static struct bio *blk_bio_discard_split(struct request_queue *q,
-					 struct bio *bio,
-					 struct bio_set *bs,
-					 unsigned *nsegs)
+struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio,
+				  struct bio_set *bs, unsigned *nsegs)
 {
 	unsigned int max_discard_sectors, granularity;
 	int alignment;
@@ -136,8 +135,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
 	return bio_split(bio, split_sectors, GFP_NOIO, bs);
 }
 
-static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
-		struct bio *bio, struct bio_set *bs, unsigned *nsegs)
+struct bio *blk_bio_write_zeroes_split(struct request_queue *q, struct bio *bio,
+				       struct bio_set *bs, unsigned *nsegs)
 {
 	*nsegs = 0;
 
@@ -150,10 +149,8 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
 	return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
 }
 
-static struct bio *blk_bio_write_same_split(struct request_queue *q,
-					    struct bio *bio,
-					    struct bio_set *bs,
-					    unsigned *nsegs)
+struct bio *blk_bio_write_same_split(struct request_queue *q, struct bio *bio,
+				     struct bio_set *bs, unsigned *nsegs)
 {
 	*nsegs = 1;
 
@@ -275,10 +272,8 @@ static bool bvec_split_segs(const struct request_queue *q,
  * responsible for ensuring that @bs is only destroyed after processing of the
  * split bio has finished.
  */
-static struct bio *blk_bio_segment_split(struct request_queue *q,
-					 struct bio *bio,
-					 struct bio_set *bs,
-					 unsigned *segs)
+struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio,
+				  struct bio_set *bs, unsigned *segs)
 {
 	struct bio_vec bv, bvprv, *bvprvp = NULL;
 	struct bvec_iter iter;
@@ -322,67 +317,17 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	return bio_split(bio, sectors, GFP_NOIO, bs);
 }
 
-/**
- * __blk_queue_split - split a bio and submit the second half
- * @bio:     [in, out] bio to be split
- * @nr_segs: [out] number of segments in the first bio
- *
- * Split a bio into two bios, chain the two bios, submit the second half and
- * store a pointer to the first half in *@bio. If the second bio is still too
- * big it will be split by a recursive call to this function. Since this
- * function may allocate a new bio from q->bio_split, it is the responsibility
- * of the caller to ensure that q->bio_split is only released after processing
- * of the split bio has finished.
- */
-void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
+void blk_bio_handle_split(struct bio **bio, struct bio *split)
 {
-	struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
-	struct bio *split = NULL;
-
-	switch (bio_op(*bio)) {
-	case REQ_OP_DISCARD:
-	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
-		break;
-	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
-				nr_segs);
-		break;
-	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, &q->bio_split,
-				nr_segs);
-		break;
-	default:
-		/*
-		 * All drivers must accept single-segments bios that are <=
-		 * PAGE_SIZE.  This is a quick and dirty check that relies on
-		 * the fact that bi_io_vec[0] is always valid if a bio has data.
-		 * The check might lead to occasional false negatives when bios
-		 * are cloned, but compared to the performance impact of cloned
-		 * bios themselves the loop below doesn't matter anyway.
-		 */
-		if (!q->limits.chunk_sectors &&
-		    (*bio)->bi_vcnt == 1 &&
-		    ((*bio)->bi_io_vec[0].bv_len +
-		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
-			*nr_segs = 1;
-			break;
-		}
-		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
-		break;
-	}
-
-	if (split) {
-		/* there isn't chance to merge the splitted bio */
-		split->bi_opf |= REQ_NOMERGE;
+	/* there isn't chance to merge the splitted bio */
+	split->bi_opf |= REQ_NOMERGE;
 
-		bio_chain(split, *bio);
-		trace_block_split(split, (*bio)->bi_iter.bi_sector);
-		submit_bio_noacct(*bio);
-		*bio = split;
+	bio_chain(split, *bio);
+	trace_block_split(split, (*bio)->bi_iter.bi_sector);
+	submit_bio_noacct(*bio);
+	*bio = split;
 
-		blk_throtl_charge_bio_split(*bio);
-	}
+	blk_throtl_charge_bio_split(*bio);
 }
 
 /**
@@ -397,9 +342,10 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
  */
 void blk_queue_split(struct bio **bio)
 {
+	struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
 	unsigned int nr_segs;
 
-	__blk_queue_split(bio, &nr_segs);
+	__blk_queue_split(q, bio, &nr_segs);
 }
 EXPORT_SYMBOL(blk_queue_split);
 
diff --git a/block/blk-merge.h b/block/blk-merge.h
new file mode 100644
index 000000000000..8f0b7dec2dd2
--- /dev/null
+++ b/block/blk-merge.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BLK_MERGE_H
+#define BLK_MERGE_H
+
+#include "blk-mq.h"
+
+struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio,
+				  struct bio_set *bs, unsigned *nsegs);
+struct bio *blk_bio_write_zeroes_split(struct request_queue *q, struct bio *bio,
+				       struct bio_set *bs, unsigned *nsegs);
+struct bio *blk_bio_write_same_split(struct request_queue *q, struct bio *bio,
+				     struct bio_set *bs, unsigned *nsegs);
+struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio,
+				  struct bio_set *bs, unsigned *segs);
+void blk_bio_handle_split(struct bio **bio, struct bio *split);
+
+/**
+ * blk_queue_split - split a bio and submit the second half
+ * @bio:     [in, out] bio to be split
+ * @nr_segs: [out] number of segments in the first bio
+ *
+ * Split a bio into two bios, chain the two bios, submit the second half and
+ * store a pointer to the first half in *@bio. If the second bio is still too
+ * big it will be split by a recursive call to this function. Since this
+ * function may allocate a new bio from q->bio_split, it is the responsibility
+ * of the caller to ensure that q->bio_split is only released after processing
+ * of the split bio has finished.
+ */
+static inline void __blk_queue_split(struct request_queue *q, struct bio **bio,
+				     unsigned int *nr_segs)
+{
+	struct bio *split = NULL;
+
+	switch (bio_op(*bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+		split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
+				nr_segs);
+		break;
+	case REQ_OP_WRITE_SAME:
+		split = blk_bio_write_same_split(q, *bio, &q->bio_split,
+				nr_segs);
+		break;
+	default:
+		/*
+		 * All drivers must accept single-segments bios that are <=
+		 * PAGE_SIZE.  This is a quick and dirty check that relies on
+		 * the fact that bi_io_vec[0] is always valid if a bio has data.
+		 * The check might lead to occasional false negatives when bios
+		 * are cloned, but compared to the performance impact of cloned
+		 * bios themselves the loop below doesn't matter anyway.
+		 */
+		if (!q->limits.chunk_sectors &&
+		    (*bio)->bi_vcnt == 1 &&
+		    ((*bio)->bi_io_vec[0].bv_len +
+		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
+			*nr_segs = 1;
+			break;
+		}
+		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
+		break;
+	}
+
+	if (split)
+		blk_bio_handle_split(bio, split);
+}
+
+#endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7dadfac5bad0..54c510c0f3ee 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,6 +42,7 @@
 #include "blk-stat.h"
 #include "blk-mq-sched.h"
 #include "blk-rq-qos.h"
+#include "blk-merge.h"
 
 static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
 
@@ -2514,7 +2515,7 @@ void blk_mq_submit_bio(struct bio *bio)
 	blk_status_t ret;
 
 	blk_queue_bounce(q, &bio);
-	__blk_queue_split(&bio, &nr_segs);
+	__blk_queue_split(q, &bio, &nr_segs);
 
 	if (!bio_integrity_prep(bio))
 		goto queue_exit;
diff --git a/block/blk.h b/block/blk.h
index 0afee3e6a7c1..c0b59bc38a2e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -262,7 +262,6 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
 				const char *, size_t);
 
-void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
 int ll_back_merge_fn(struct request *req, struct bio *bio,
 		unsigned int nr_segs);
 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] block: handle fast path of bio splitting inline
  2021-10-13 20:44 [PATCH] block: handle fast path of bio splitting inline Jens Axboe
@ 2021-10-14  5:38 ` Christoph Hellwig
  2021-10-14 13:16   ` Jens Axboe
  0 siblings, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2021-10-14  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block

On Wed, Oct 13, 2021 at 02:44:14PM -0600, Jens Axboe wrote:
> The fast path is no splitting needed. Separate the handling into a
> check part we can inline, and an out-of-line handling path if we do
> need to split.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

What about something like this version instead?

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 9a55b50708293..f333afb45eb15 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -353,21 +353,6 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
 				nr_segs);
 		break;
 	default:
-		/*
-		 * All drivers must accept single-segments bios that are <=
-		 * PAGE_SIZE.  This is a quick and dirty check that relies on
-		 * the fact that bi_io_vec[0] is always valid if a bio has data.
-		 * The check might lead to occasional false negatives when bios
-		 * are cloned, but compared to the performance impact of cloned
-		 * bios themselves the loop below doesn't matter anyway.
-		 */
-		if (!q->limits.chunk_sectors &&
-		    (*bio)->bi_vcnt == 1 &&
-		    ((*bio)->bi_io_vec[0].bv_len +
-		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
-			*nr_segs = 1;
-			break;
-		}
 		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
 		break;
 	}
@@ -399,7 +384,8 @@ void blk_queue_split(struct bio **bio)
 {
 	unsigned int nr_segs;
 
-	__blk_queue_split(bio, &nr_segs);
+	if (blk_may_split(*bio))
+		__blk_queue_split(bio, &nr_segs);
 }
 EXPORT_SYMBOL(blk_queue_split);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 608b270a7f6b8..7c82e052ca83f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2251,11 +2251,12 @@ void blk_mq_submit_bio(struct bio *bio)
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct request *rq;
 	struct blk_plug *plug;
-	unsigned int nr_segs;
+	unsigned int nr_segs = 1;
 	blk_status_t ret;
 
 	blk_queue_bounce(q, &bio);
-	__blk_queue_split(&bio, &nr_segs);
+	if (blk_may_split(bio))
+		__blk_queue_split(&bio, &nr_segs);
 
 	if (!bio_integrity_prep(bio))
 		goto queue_exit;
diff --git a/block/blk.h b/block/blk.h
index 0afee3e6a7c1e..34b31baf51324 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -262,6 +262,31 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
 				const char *, size_t);
 
+static inline bool blk_may_split(struct bio *bio)
+{
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+		return true; /* non-trivial splitting decisions */
+	default:
+		break;
+	}
+
+	/*
+	 * All drivers must accept single-segments bios that are <= PAGE_SIZE.
+	 * This is a quick and dirty check that relies on the fact that
+	 * bi_io_vec[0] is always valid if a bio has data.  The check might
+	 * lead to occasional false negatives when bios are cloned, but compared
+	 * to the performance impact of cloned bios themselves the loop below
+	 * doesn't matter anyway.
+	 */
+	return bio->bi_bdev->bd_disk->queue->limits.chunk_sectors ||
+		bio->bi_vcnt != 1 ||
+		bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset >PAGE_SIZE;
+}
+
 void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
 int ll_back_merge_fn(struct request *req, struct bio *bio,
 		unsigned int nr_segs);

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] block: handle fast path of bio splitting inline
  2021-10-14  5:38 ` Christoph Hellwig
@ 2021-10-14 13:16   ` Jens Axboe
  2021-10-16  4:40     ` Christoph Hellwig
  0 siblings, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2021-10-14 13:16 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-block

On 10/13/21 11:38 PM, Christoph Hellwig wrote:
> On Wed, Oct 13, 2021 at 02:44:14PM -0600, Jens Axboe wrote:
>> The fast path is no splitting needed. Separate the handling into a
>> check part we can inline, and an out-of-line handling path if we do
>> need to split.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> What about something like this version instead?

A bit of a combo, I think this will do fine.


commit d997c5f4001031863de1c8c437bd2fcc6a4f79a2
Author: Jens Axboe <axboe@kernel.dk>
Date:   Wed Oct 13 12:43:41 2021 -0600

    block: handle fast path of bio splitting inline
    
    The fast path is no splitting needed. Separate the handling into a
    check part we can inline, and an out-of-line handling path if we do
    need to split.
    
    Signed-off-by: Jens Axboe <axboe@kernel.dk>

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4da2bf18fa4d..f390a8753268 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -324,6 +324,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 
 /**
  * __blk_queue_split - split a bio and submit the second half
+ * @q:       [in] request_queue new bio is being queued at
  * @bio:     [in, out] bio to be split
  * @nr_segs: [out] number of segments in the first bio
  *
@@ -334,9 +335,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
  * of the caller to ensure that q->bio_split is only released after processing
  * of the split bio has finished.
  */
-void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
+void __blk_queue_split(struct request_queue *q, struct bio **bio,
+		       unsigned int *nr_segs)
 {
-	struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
 	struct bio *split = NULL;
 
 	switch (bio_op(*bio)) {
@@ -353,21 +354,6 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
 				nr_segs);
 		break;
 	default:
-		/*
-		 * All drivers must accept single-segments bios that are <=
-		 * PAGE_SIZE.  This is a quick and dirty check that relies on
-		 * the fact that bi_io_vec[0] is always valid if a bio has data.
-		 * The check might lead to occasional false negatives when bios
-		 * are cloned, but compared to the performance impact of cloned
-		 * bios themselves the loop below doesn't matter anyway.
-		 */
-		if (!q->limits.chunk_sectors &&
-		    (*bio)->bi_vcnt == 1 &&
-		    ((*bio)->bi_io_vec[0].bv_len +
-		     (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
-			*nr_segs = 1;
-			break;
-		}
 		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
 		break;
 	}
@@ -397,9 +383,11 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
  */
 void blk_queue_split(struct bio **bio)
 {
+	struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
 	unsigned int nr_segs;
 
-	__blk_queue_split(bio, &nr_segs);
+	if (blk_may_split(q, *bio))
+		__blk_queue_split(q, bio, &nr_segs);
 }
 EXPORT_SYMBOL(blk_queue_split);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index dd4121dcd3ce..0cca4b7a4d16 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2521,11 +2521,12 @@ void blk_mq_submit_bio(struct bio *bio)
 	struct request *rq;
 	struct blk_plug *plug;
 	struct request *same_queue_rq = NULL;
-	unsigned int nr_segs;
+	unsigned int nr_segs = 1;
 	blk_status_t ret;
 
 	blk_queue_bounce(q, &bio);
-	__blk_queue_split(&bio, &nr_segs);
+	if (blk_may_split(q, bio))
+		__blk_queue_split(q, &bio, &nr_segs);
 
 	if (!bio_integrity_prep(bio))
 		goto queue_exit;
diff --git a/block/blk.h b/block/blk.h
index fa23338449ed..f6e61cebd6ae 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -264,7 +264,32 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
 				const char *, size_t);
 
-void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
+static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
+{
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+		return true; /* non-trivial splitting decisions */
+	default:
+		break;
+	}
+
+	/*
+	 * All drivers must accept single-segments bios that are <= PAGE_SIZE.
+	 * This is a quick and dirty check that relies on the fact that
+	 * bi_io_vec[0] is always valid if a bio has data.  The check might
+	 * lead to occasional false negatives when bios are cloned, but compared
+	 * to the performance impact of cloned bios themselves the loop below
+	 * doesn't matter anyway.
+	 */
+	return q->limits.chunk_sectors || bio->bi_vcnt != 1 ||
+		bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
+}
+
+void __blk_queue_split(struct request_queue *q, struct bio **bio,
+			unsigned int *nr_segs);
 int ll_back_merge_fn(struct request *req, struct bio *bio,
 		unsigned int nr_segs);
 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] block: handle fast path of bio splitting inline
  2021-10-14 13:16   ` Jens Axboe
@ 2021-10-16  4:40     ` Christoph Hellwig
  0 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2021-10-16  4:40 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Christoph Hellwig, linux-block

On Thu, Oct 14, 2021 at 07:16:50AM -0600, Jens Axboe wrote:
> On 10/13/21 11:38 PM, Christoph Hellwig wrote:
> > On Wed, Oct 13, 2021 at 02:44:14PM -0600, Jens Axboe wrote:
> >> The fast path is no splitting needed. Separate the handling into a
> >> check part we can inline, and an out-of-line handling path if we do
> >> need to split.
> >>
> >> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> > 
> > What about something like this version instead?
> 
> A bit of a combo, I think this will do fine.

Fine with me:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-10-16  4:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-13 20:44 [PATCH] block: handle fast path of bio splitting inline Jens Axboe
2021-10-14  5:38 ` Christoph Hellwig
2021-10-14 13:16   ` Jens Axboe
2021-10-16  4:40     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.