linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
  2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:24   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 07/11] pktcdvd: use bio_clone_fast() instead of bio_clone() NeilBrown
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

This patch converts bioset_create() and
bioset_create_nobvec() to not create a workqueue so
alloctions will never trigger punt_bios_to_rescuer().  It
also introduces bioset_create_rescued() and
bioset_create_nobvec_rescued() which preserve the old
behaviour.

All callers of bioset_create() and bioset_create_nobvec(),
that are inside block device drivers, are converted to the
_rescued() version.

biosets used by filesystems or other top-level users do not
need rescuing as the bio can never be queued behind other
bios.  This includes fs_bio_set, blkdev_dio_pool,
btrfs_bioset, xfs_ioend_bioset, drbd_md_io_bio_set,
and one allocated by target_core_iblock.c.

biosets used by md/raid to not need rescuing as
their usage was recently audited to revised to never
risk deadlock.

It is hoped that most, if not all, of the remaining biosets
can end up being the non-rescued version.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 block/bio.c               |   28 ++++++++++++++++++++++++----
 block/blk-core.c          |    2 +-
 drivers/md/bcache/super.c |    4 ++--
 drivers/md/dm-crypt.c     |    2 +-
 drivers/md/dm-io.c        |    2 +-
 drivers/md/dm.c           |    5 +++--
 include/linux/bio.h       |    2 ++
 7 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..b8e304015dc8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -363,6 +363,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	struct bio_list punt, nopunt;
 	struct bio *bio;
 
+	if (!WARN_ON_ONCE(!bs->rescue_workqueue))
+		return;
 	/*
 	 * In order to guarantee forward progress we must punt only bios that
 	 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -474,7 +476,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
 		if (current->bio_list &&
 		    (!bio_list_empty(&current->bio_list[0]) ||
-		     !bio_list_empty(&current->bio_list[1])))
+		     !bio_list_empty(&current->bio_list[1])) &&
+		    bs->rescue_workqueue)
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -1923,7 +1926,8 @@ EXPORT_SYMBOL(bioset_free);
 
 static struct bio_set *__bioset_create(unsigned int pool_size,
 				       unsigned int front_pad,
-				       bool create_bvec_pool)
+				       bool create_bvec_pool,
+				       bool create_rescue_workqueue)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1954,6 +1958,9 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
 			goto bad;
 	}
 
+	if (!create_rescue_workqueue)
+		return bs;
+
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
 		goto bad;
@@ -1979,10 +1986,16 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
  */
 struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 {
-	return __bioset_create(pool_size, front_pad, true);
+	return __bioset_create(pool_size, front_pad, true, false);
 }
 EXPORT_SYMBOL(bioset_create);
 
+struct bio_set *bioset_create_rescued(unsigned int pool_size, unsigned int front_pad)
+{
+	return __bioset_create(pool_size, front_pad, true, true);
+}
+EXPORT_SYMBOL(bioset_create_rescued);
+
 /**
  * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
  * @pool_size:	Number of bio to cache in the mempool
@@ -1994,10 +2007,17 @@ EXPORT_SYMBOL(bioset_create);
  */
 struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
 {
-	return __bioset_create(pool_size, front_pad, false);
+	return __bioset_create(pool_size, front_pad, false, false);
 }
 EXPORT_SYMBOL(bioset_create_nobvec);
 
+struct bio_set *bioset_create_nobvec_rescued(unsigned int pool_size,
+					     unsigned int front_pad)
+{
+	return __bioset_create(pool_size, front_pad, false, true);
+}
+EXPORT_SYMBOL(bioset_create_nobvec_rescued);
+
 #ifdef CONFIG_BLK_CGROUP
 
 /**
diff --git a/block/blk-core.c b/block/blk-core.c
index f5d64ad75b36..23f20cb84b2f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -728,7 +728,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	q->bio_split = bioset_create_rescued(BIO_POOL_SIZE, 0);
 	if (!q->bio_split)
 		goto fail_id;
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 85e3f21c2514..6cb30792f0ed 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -786,7 +786,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	if (!(d->bio_split = bioset_create_rescued(4, offsetof(struct bbio, bio))) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1520,7 +1520,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	    !(c->bio_split = bioset_create_rescued(4, offsetof(struct bbio, bio))) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ef1d836bd81b..b7b1df84fe4a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1936,7 +1936,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0);
+	cc->bs = bioset_create_rescued(MIN_IOS, 0);
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..5557d5d97b5b 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,7 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0);
+	client->bios = bioset_create_rescued(min_ios, 0);
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8bf397729bbd..5590c571c0e7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1014,7 +1014,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 
 		while ((bio = bio_list_pop(&list))) {
 			struct bio_set *bs = bio->bi_pool;
-			if (unlikely(!bs) || bs == fs_bio_set) {
+			if (unlikely(!bs) || bs == fs_bio_set ||
+			    !bs->rescue_workqueue) {
 				bio_list_add(&current->bio_list[i], bio);
 				continue;
 			}
@@ -2601,7 +2602,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 		BUG();
 	}
 
-	pools->bs = bioset_create_nobvec(pool_size, front_pad);
+	pools->bs = bioset_create_nobvec_rescued(pool_size, front_pad);
 	if (!pools->bs)
 		goto out;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..2eb8bfae5276 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -374,7 +374,9 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 }
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
+extern struct bio_set *bioset_create_rescued(unsigned int, unsigned int);
 extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create_nobvec_rescued(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 01/11] blk: remove bio_set arg from blk_queue_split()
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:21   ` Christoph Hellwig
                     ` (2 more replies)
  2017-04-20  5:38 ` [PATCH 02/11] blk: make the bioset rescue_workqueue optional NeilBrown
                   ` (9 subsequent siblings)
  10 siblings, 3 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

blk_queue_split() is always called with the last arg being q->bio_split,
where 'q' is the first arg.

Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses
q->bio_split.

This is inconsistent and unnecessary.  Remove the last arg and always use
q->bio_split inside blk_queue_split()

Signed-off-by: NeilBrown <neilb@suse.com>
---
 block/blk-core.c              |    2 +-
 block/blk-merge.c             |    9 ++++-----
 block/blk-mq.c                |    2 +-
 drivers/block/drbd/drbd_req.c |    2 +-
 drivers/block/pktcdvd.c       |    2 +-
 drivers/block/ps3vram.c       |    2 +-
 drivers/block/rsxx/dev.c      |    2 +-
 drivers/block/umem.c          |    2 +-
 drivers/block/zram/zram_drv.c |    2 +-
 drivers/lightnvm/rrpc.c       |    2 +-
 drivers/md/md.c               |    2 +-
 drivers/s390/block/dcssblk.c  |    2 +-
 drivers/s390/block/xpram.c    |    2 +-
 include/linux/blkdev.h        |    3 +--
 14 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 25aea293ee98..f5d64ad75b36 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1662,7 +1662,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio->bi_error = -EIO;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae406341..d59074556703 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -202,8 +202,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	return do_split ? new : NULL;
 }
 
-void blk_queue_split(struct request_queue *q, struct bio **bio,
-		     struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
 {
 	struct bio *split, *res;
 	unsigned nsegs;
@@ -211,13 +210,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	default:
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c496692ecc5b..365cb17308e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1549,7 +1549,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index b5730e17b455..fa62dd8a4d46 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1557,7 +1557,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	start_jif = jiffies;
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 66d846ba85a9..98394d034c29 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2414,7 +2414,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	pd = q->queuedata;
 	if (!pd) {
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..48072c0c1010 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 
 	dev_dbg(&dev->core, "%s\n", __func__);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&priv->lock);
 	busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..01624eaefcba 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -151,7 +151,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	struct rsxx_bio_meta *bio_meta;
 	int st = -EINVAL;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	might_sleep();
 
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..c8d8a2f16f8e 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 		 (unsigned long long)bio->bi_iter.bi_sector,
 		 bio->bi_iter.bi_size);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 1710b06f04a7..9db3a375d551 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -884,7 +884,7 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct zram *zram = queue->queuedata;
 
-	blk_queue_split(queue, &bio, queue->bio_split);
+	blk_queue_split(queue, &bio);
 
 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
 					bio->bi_iter.bi_size)) {
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..8e241056b141 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	struct nvm_rq *rqd;
 	int err;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_op(bio) == REQ_OP_DISCARD) {
 		rrpc_discard(rrpc, bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6cc6dd74c153..de37ace40470 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int sectors;
 	int cpu;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (mddev == NULL || mddev->pers == NULL) {
 		bio_io_error(bio);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 415d10a67b7a..10ece6f3c7eb 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -829,7 +829,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long source_addr;
 	unsigned long bytes_done;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	bytes_done = 0;
 	dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long page_addr;
 	unsigned long bytes;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if ((bio->bi_iter.bi_sector & 7) != 0 ||
 	    (bio->bi_iter.bi_size & 4095) != 0)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 51c9e391798e..6105bf775586 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -939,8 +939,7 @@ extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
-			    struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 00/11] block: assorted cleanup for bio splitting and cloning.
@ 2017-04-20  5:38 NeilBrown
  2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
                   ` (10 more replies)
  0 siblings, 11 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, linux-kernel, Ilya Dryomov, Sage Weil, Alex Elder,
	ceph-devel, Philipp Reisner, Lars Ellenberg, drbd-dev,
	Konrad Rzeszutek Wilk, Roger Pau Monne, xen-devel,
	Kent Overstreet, linux-bcache

This series contains more work towards getting rid of the bioset work
queues and generally improving the splitting and cloning of bios.

The first three patches are similar to ones that I sent previously.
They have been rebased on the current block for-next tree and improved
a little.  Specifically I no longer change all instances of
bioset_create() to bioset_create_rescued().  Instead I only change
those which cannot easily be shown to not need changing.

The next patch fixes some issues with bounce.c.  Most significantly it
used to be called after blk_queue_split(), but now it is called
before.  This means that it can no longer assume that the
bio has been split if it contained more than BIO_MAX_PAGES.

Most of the rest change a number of bio_clone() calls to
bio_clone_fast() as that is often what is needed.  bio_clone() does
not work with bios larger than BIO_MAX_PAGES, bio_clone_fast() does.

Finally we remove bio_clone() (which is no longer used) and stop
blk_bio_segment_split() from splitting after BIO_MAX_PAGES, as that
is no longer necessary.

---

NeilBrown (11):
      blk: remove bio_set arg from blk_queue_split()
      blk: make the bioset rescue_workqueue optional.
      blk: use non-rescuing bioset for q->bio_split.
      block: Improvements to bounce-buffer handling
      rbd: use bio_clone_fast() instead of bio_clone()
      drbd: use bio_clone_fast() instead of bio_clone()
      pktcdvd: use bio_clone_fast() instead of bio_clone()
      xen-blkfront: remove bio splitting.
      bcache: use kmalloc to allocate bio in bch_data_verify()
      block: remove bio_clone() and all references.
      block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()


 Documentation/block/biodoc.txt |    2 +
 block/bio.c                    |   30 +++++++++++++++++++---
 block/blk-core.c               |    2 +
 block/blk-merge.c              |   31 +++--------------------
 block/blk-mq.c                 |    2 +
 block/bounce.c                 |   27 +++++++++++++++++++-
 drivers/block/drbd/drbd_int.h  |    3 ++
 drivers/block/drbd/drbd_main.c |    9 +++++++
 drivers/block/drbd/drbd_req.c  |    2 +
 drivers/block/drbd/drbd_req.h  |    2 +
 drivers/block/pktcdvd.c        |   14 ++++++++--
 drivers/block/ps3vram.c        |    2 +
 drivers/block/rbd.c            |   16 +++++++++++-
 drivers/block/rsxx/dev.c       |    2 +
 drivers/block/umem.c           |    2 +
 drivers/block/xen-blkfront.c   |   54 ++--------------------------------------
 drivers/block/zram/zram_drv.c  |    2 +
 drivers/lightnvm/rrpc.c        |    2 +
 drivers/md/bcache/debug.c      |    2 +
 drivers/md/bcache/super.c      |    4 +--
 drivers/md/dm-crypt.c          |    2 +
 drivers/md/dm-io.c             |    2 +
 drivers/md/dm.c                |    5 ++--
 drivers/md/md.c                |    4 +--
 drivers/s390/block/dcssblk.c   |    2 +
 drivers/s390/block/xpram.c     |    2 +
 include/linux/bio.h            |    7 +----
 include/linux/blkdev.h         |    3 +-
 28 files changed, 121 insertions(+), 116 deletions(-)

--
Signature

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 06/11] drbd: use bio_clone_fast() instead of bio_clone()
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (5 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 04/11] block: Improvements to bounce-buffer handling NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:30   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify() NeilBrown
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, linux-kernel, Philipp Reisner, Lars Ellenberg, drbd-dev

drbd does not modify the bi_io_vec of the cloned bio,
so there is no need to clone that part.  So bio_clone_fast()
is the better choice.
For bio_clone_fast() we need to specify a bio_set.
We could use fs_bio_set, which bio_clone() uses, or
drbd_md_io_bio_set, which drbd uses for metadata, but it is
generally best to avoid sharing bio_sets unless you can
be certain that there are no interdependencies.

So create a new bio_set, drbd_io_bio_set, and use bio_clone_fast().

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/block/drbd/drbd_int.h  |    3 +++
 drivers/block/drbd/drbd_main.c |    9 +++++++++
 drivers/block/drbd/drbd_req.h  |    2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..f91982515a6b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
 /* to allocate from that set */
 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
 extern struct mutex resources_mutex;
 
 extern int conn_lowest_minor(struct drbd_connection *connection);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..0cc50a7ca1c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
 mempool_t *drbd_ee_mempool;
 mempool_t *drbd_md_io_page_pool;
 struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
 
 /* I do not use a standard mempool, because:
    1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
 
 	/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
 
+	if (drbd_io_bio_set)
+		bioset_free(drbd_io_bio_set);
 	if (drbd_md_io_bio_set)
 		bioset_free(drbd_md_io_bio_set);
 	if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
 	if (drbd_al_ext_cache)
 		kmem_cache_destroy(drbd_al_ext_cache);
 
+	drbd_io_bio_set      = NULL;
 	drbd_md_io_bio_set   = NULL;
 	drbd_md_io_page_pool = NULL;
 	drbd_ee_mempool      = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
 	drbd_pp_pool         = NULL;
 	drbd_md_io_page_pool = NULL;
 	drbd_md_io_bio_set   = NULL;
+	drbd_io_bio_set      = NULL;
 
 	/* caches */
 	drbd_request_cache = kmem_cache_create(
@@ -2165,6 +2170,10 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
+	drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	if (drbd_io_bio_set == NULL)
+		goto Enomem;
+
 	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f2da91..a24e870853eb 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
 	struct bio *bio;
-	bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+	bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set); /* XXX cannot fail!! */
 
 	req->private_bio = bio;
 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 05/11] rbd: use bio_clone_fast() instead of bio_clone()
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (2 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 07/11] pktcdvd: use bio_clone_fast() instead of bio_clone() NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:31   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 03/11] blk: use non-rescuing bioset for q->bio_split NeilBrown
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, linux-kernel, Ilya Dryomov, Sage Weil, Alex Elder,
	ceph-devel

bio_clone() makes a copy of the bi_io_vec, but rbd never changes that,
so there is no need for a copy.
bio_clone_fast() can be used instead, which avoids making the copy.

This requires that we provide a bio_set.  bio_clone() uses fs_bio_set,
but it isn't, in general, safe to use the same bio_set at different
levels of the stack, as that can lead to deadlocks.  As filesystems
use fs_bio_set, block devices shouldn't.

As rbd never stacks, it is safe to have a single global bio_set for
all rbd devices to use.  So allocate that when the module is
initialised, and use it with bio_clone_fast().

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/block/rbd.c |   16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 089ac4179919..48eecffc612e 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -441,6 +441,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
 
+static struct bio_set		*rbd_bio_clone;
+
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -1362,7 +1364,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
 {
 	struct bio *bio;
 
-	bio = bio_clone(bio_src, gfpmask);
+	bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
 	if (!bio)
 		return NULL;	/* ENOMEM */
 
@@ -6342,8 +6344,16 @@ static int rbd_slab_init(void)
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
+	rbd_assert(!rbd_bio_clone);
+	rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0);
+	if (!rbd_bio_clone)
+		goto out_err_clone;
+
 	return 0;
 
+out_err_clone:
+	kmem_cache_destroy(rbd_obj_request_cache);
+	rbd_obj_request_cache = NULL;
 out_err:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
@@ -6359,6 +6369,10 @@ static void rbd_slab_exit(void)
 	rbd_assert(rbd_img_request_cache);
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
+
+	rbd_assert(rbd_bio_clone);
+	bioset_free(rbd_bio_clone);
+	rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 03/11] blk: use non-rescuing bioset for q->bio_split.
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (3 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 05/11] rbd: " NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:25   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 04/11] block: Improvements to bounce-buffer handling NeilBrown
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

A rescuing bioset is only useful if there might be bios from
that same bioset on the bio_list_on_stack queue at a time
when bio_alloc_bioset() is called.  This never applies to
q->bio_split.

Allocations from q->bio_split are only ever made from
blk_queue_split() which is only ever called early in each of
various make_request_fn()s.  The original bio (call this A)
is then passed to generic_make_request() and is placed on
the bio_list_on_stack queue, and the bio that was allocated
from q->bio_split (B) is processed.

The processing of this may cause other bios to be passed to
generic_make_request() or may even cause the bio B itself to
be passed, possible after some prefix has been split off
(using some other bioset).

generic_make_request() now guarantees that all of these bios
(B and dependants) will be fully processed before the tail
of the original bio A gets handled.  None of these early bios
can possible trigger an allocation from the original
q->bio_split as they are either too small to require
splitting or (more likely) are destined for a different queue.

The next time that the original q->bio_split might be used
by this thread is when A is processed again, as it might
still be too big to handle directly.  By this time there
cannot be any other bios allocated from q->bio_split in the
generic_make_request() queue.  So no rescuing will ever be
needed.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 block/blk-core.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 23f20cb84b2f..f5d64ad75b36 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -728,7 +728,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create_rescued(BIO_POOL_SIZE, 0);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
 	if (!q->bio_split)
 		goto fail_id;
 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 07/11] pktcdvd: use bio_clone_fast() instead of bio_clone()
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
  2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
  2017-04-20  5:38 ` [PATCH 02/11] blk: make the bioset rescue_workqueue optional NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:29   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 05/11] rbd: " NeilBrown
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

pktcdvd doesn't change the bi_io_vec of the clone bio,
so it is more efficient to use bio_clone_fast(), and not clone
the bi_io_vec.
This requires providing a bio_set, and it is safest to
provide a dedicated bio_set rather than sharing
fs_bio_set, which filesytems use.
This new bio_set, pkt_bio_set, can also be use for the bio_split()
call as the two allocations (bio_clone_fast, and bio_split) are
independent, neither can block a bio allocated by the other.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/block/pktcdvd.c |   12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 98394d034c29..7a437b5b8804 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -98,6 +98,7 @@ static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
 static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
 static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
 static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
 
 static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
 static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -2310,7 +2311,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 
 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-	struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
 	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
 
 	psd->pd = pd;
@@ -2455,7 +2456,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
 			split = bio_split(bio, last_zone -
 					  bio->bi_iter.bi_sector,
-					  GFP_NOIO, fs_bio_set);
+					  GFP_NOIO, pkt_bio_set);
 			bio_chain(split, bio);
 		} else {
 			split = bio;
@@ -2919,6 +2920,11 @@ static int __init pkt_init(void)
 					sizeof(struct packet_stacked_data));
 	if (!psd_pool)
 		return -ENOMEM;
+	pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	if (!pkt_bio_set) {
+		mempool_destroy(psd_pool);
+		return -ENOMEM;
+	}
 
 	ret = register_blkdev(pktdev_major, DRIVER_NAME);
 	if (ret < 0) {
@@ -2951,6 +2957,7 @@ static int __init pkt_init(void)
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 out2:
 	mempool_destroy(psd_pool);
+	bioset_free(pkt_bio_set);
 	return ret;
 }
 
@@ -2964,6 +2971,7 @@ static void __exit pkt_exit(void)
 
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 	mempool_destroy(psd_pool);
+	bioset_free(pkt_bio_set);
 }
 
 MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 04/11] block: Improvements to bounce-buffer handling
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (4 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 03/11] blk: use non-rescuing bioset for q->bio_split NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:28   ` Christoph Hellwig
  2017-04-21 15:39   ` Ming Lei
  2017-04-20  5:38 ` [PATCH 06/11] drbd: use bio_clone_fast() instead of bio_clone() NeilBrown
                   ` (4 subsequent siblings)
  10 siblings, 2 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

Since commit 23688bf4f830 ("block: ensure to split after potentially
bouncing a bio") blk_queue_bounce() is called *before*
blk_queue_split().
This means that:
 1/ the comments blk_queue_split() about bounce buffers are
    irrelevant, and
 2/ a very large bio (more than BIO_MAX_PAGES) will no longer be
    split before it arrives at blk_queue_bounce(), leading to the
    possibility that bio_clone_bioset() will fail and a NULL
    will be dereferenced.

Separately, blk_queue_bounce() shouldn't use fs_bio_set as the bio
being copied could be from the same set, and this could lead to a
deadlock.

So:
 - allocate 2 private biosets for blk_queue_bounce, one for
   splitting enormous bios and one for cloning bios.
 - add code to split a bio that exceeds BIO_MAX_PAGES.
 - Fix up the comments in blk_queue_split()

Signed-off-by: NeilBrown <neilb@suse.com>
---
 block/blk-merge.c |   14 ++++----------
 block/bounce.c    |   27 ++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index d59074556703..51c84540d3bb 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -117,17 +117,11 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		 * each holds at most BIO_MAX_PAGES bvecs because
 		 * bio_clone() can fail to allocate big bvecs.
 		 *
-		 * It should have been better to apply the limit per
-		 * request queue in which bio_clone() is involved,
-		 * instead of globally. The biggest blocker is the
-		 * bio_clone() in bio bounce.
+		 * Those drivers which will need to use bio_clone()
+		 * should tell us in some way.  For now, impose the
+		 * BIO_MAX_PAGES limit on all queues.
 		 *
-		 * If bio is splitted by this reason, we should have
-		 * allowed to continue bios merging, but don't do
-		 * that now for making the change simple.
-		 *
-		 * TODO: deal with bio bounce's bio_clone() gracefully
-		 * and convert the global limit into per-queue limit.
+		 * TODO: handle users of bio_clone() differently.
 		 */
 		if (bvecs++ >= BIO_MAX_PAGES)
 			goto split;
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3a5da1..51fb538b504d 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -26,6 +26,7 @@
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
+struct bio_set *bounce_bio_set, *bounce_bio_split;
 static mempool_t *page_pool, *isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +41,14 @@ static __init int init_emergency_pool(void)
 	BUG_ON(!page_pool);
 	pr_info("pool size: %d pages\n", POOL_SIZE);
 
+	bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	BUG_ON(!bounce_bio_set);
+	if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+		BUG_ON(1);
+
+	bounce_bio_split = bioset_create_nobvec(BIO_POOL_SIZE, 0);
+	BUG_ON(!bounce_bio_split);
+
 	return 0;
 }
 
@@ -194,7 +203,23 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 
 	return;
 bounce:
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+	if (bio_segments(*bio_orig) > BIO_MAX_PAGES) {
+		int cnt = 0;
+		int sectors = 0;
+		struct bio_vec bv;
+		struct bvec_iter iter;
+		bio_for_each_segment(bv, *bio_orig, iter) {
+			if (cnt++ < BIO_MAX_PAGES)
+				sectors += bv.bv_len >> 9;
+			else
+				break;
+		}
+		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+		bio_chain(bio, *bio_orig);
+		generic_make_request(*bio_orig);
+		*bio_orig = bio;
+	}
+	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify()
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (6 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 06/11] drbd: use bio_clone_fast() instead of bio_clone() NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:31   ` Christoph Hellwig
                     ` (2 more replies)
  2017-04-20  5:38 ` [PATCH 08/11] xen-blkfront: remove bio splitting NeilBrown
                   ` (2 subsequent siblings)
  10 siblings, 3 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, Kent Overstreet, linux-bcache

This function allocates a bio, then a collection
of pages.  It copes with failure.

It currently uses a mempool() to allocate the bio,
but alloc_page() to allocate the pages.  These fail
in different ways, so the usage is inconsistent.

Change the bio_clone() to bio_clone_kmalloc()
so that no pool is used either for the bio or the pages.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/bcache/debug.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 	struct bio_vec bv, cbv;
 	struct bvec_iter iter, citer = { 0 };
 
-	check = bio_clone(bio, GFP_NOIO);
+	check = bio_clone_kmalloc(bio, GFP_NOIO);
 	if (!check)
 		return;
 	check->bi_opf = REQ_OP_READ;

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 10/11] block: remove bio_clone() and all references.
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (9 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split() NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:32   ` Christoph Hellwig
  2017-04-21 15:43   ` Ming Lei
  10 siblings, 2 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

bio_clone() is no longer used.
Only bio_clone_bioset() or bio_clone_fast().
This is for the best, as bio_clone() used fs_bio_set,
and filesystems are unlikely to want to use bio_clone().

So remove bio_clone() and all references.
This includes a fix to some incorrect documentation.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 Documentation/block/biodoc.txt |    2 +-
 block/bio.c                    |    2 +-
 block/blk-merge.c              |    6 +++---
 drivers/md/md.c                |    2 +-
 include/linux/bio.h            |    5 -----
 5 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf64b0f..9490f2845f06 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
 shares the bio_vec_list with the original bio (i.e. both point to the
 same bio_vec_list). This would typically be used for splitting i/o requests
 in lvm or md.
diff --git a/block/bio.c b/block/bio.c
index b8e304015dc8..9ef1da1830e4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -547,7 +547,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 51c84540d3bb..e7862e9dcc39 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -115,13 +115,13 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		 * With arbitrary bio size, the incoming bio may be very
 		 * big. We have to split the bio into small bios so that
 		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone() can fail to allocate big bvecs.
+		 * bio_clone_bioset() can fail to allocate big bvecs.
 		 *
-		 * Those drivers which will need to use bio_clone()
+		 * Those drivers which will need to use bio_clone_bioset()
 		 * should tell us in some way.  For now, impose the
 		 * BIO_MAX_PAGES limit on all queues.
 		 *
-		 * TODO: handle users of bio_clone() differently.
+		 * TODO: handle users of bio_clone_bioset() differently.
 		 */
 		if (bvecs++ >= BIO_MAX_PAGES)
 			goto split;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index de37ace40470..8b415e13d2f8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
 static bool create_on_open = true;
 
 /* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
  */
 
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2eb8bfae5276..5227850592cf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -394,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 }
 
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (8 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 08/11] xen-blkfront: remove bio splitting NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-21 11:34   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 10/11] block: remove bio_clone() and all references NeilBrown
  10 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel

blk_bio_segment_split() makes sure bios have no more than
BIO_MAX_PAGES entries in the bi_io_vec.
This was done because bio_clone_bioset() (when given a
mempool bioset) could not handle larger io_vecs.

No driver uses bio_clone_bioset() any more, they all
use bio_clone_fast() if anything, and bio_clone_fast()
doesn't clone the bi_io_vec.

The main user of of bio_clone_bioset() at this level
is bounce.c, and bouncing now happens before blk_bio_segment_split(),
so that is not of concern.

So remove the big helpful comment and the code.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 block/blk-merge.c |   16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index e7862e9dcc39..cea544ec5d96 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -108,25 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	bool do_split = true;
 	struct bio *new = NULL;
 	const unsigned max_sectors = get_max_io_size(q, bio);
-	unsigned bvecs = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
-		 * With arbitrary bio size, the incoming bio may be very
-		 * big. We have to split the bio into small bios so that
-		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone_bioset() can fail to allocate big bvecs.
-		 *
-		 * Those drivers which will need to use bio_clone_bioset()
-		 * should tell us in some way.  For now, impose the
-		 * BIO_MAX_PAGES limit on all queues.
-		 *
-		 * TODO: handle users of bio_clone_bioset() differently.
-		 */
-		if (bvecs++ >= BIO_MAX_PAGES)
-			goto split;
-
-		/*
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 08/11] xen-blkfront: remove bio splitting.
  2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
                   ` (7 preceding siblings ...)
  2017-04-20  5:38 ` [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify() NeilBrown
@ 2017-04-20  5:38 ` NeilBrown
  2017-04-20 10:00   ` Roger Pau Monné
  2017-04-21 11:36   ` Christoph Hellwig
  2017-04-20  5:38 ` [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split() NeilBrown
  2017-04-20  5:38 ` [PATCH 10/11] block: remove bio_clone() and all references NeilBrown
  10 siblings, 2 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-20  5:38 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, linux-kernel, Konrad Rzeszutek Wilk,
	Roger Pau Monne, xen-devel

bios that are re-submitted will pass through blk_queue_split() when
blk_queue_bio() is called, and this will split the bio if necessary.
There is no longer any need to do this splitting in xen-blkfront.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/block/xen-blkfront.c |   54 ++----------------------------------------
 1 file changed, 3 insertions(+), 51 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index abed296ce605..b8930d9b7102 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -110,11 +110,6 @@ struct blk_shadow {
 	unsigned long associated_id;
 };
 
-struct split_bio {
-	struct bio *bio;
-	atomic_t pending;
-};
-
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
@@ -1982,28 +1977,13 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
-static void split_bio_end(struct bio *bio)
-{
-	struct split_bio *split_bio = bio->bi_private;
-
-	if (atomic_dec_and_test(&split_bio->pending)) {
-		split_bio->bio->bi_phys_segments = 0;
-		split_bio->bio->bi_error = bio->bi_error;
-		bio_endio(split_bio->bio);
-		kfree(split_bio);
-	}
-	bio_put(bio);
-}
-
 static int blkif_recover(struct blkfront_info *info)
 {
-	unsigned int i, r_index;
+	unsigned int r_index;
 	struct request *req, *n;
 	int rc;
-	struct bio *bio, *cloned_bio;
-	unsigned int segs, offset;
-	int pending, size;
-	struct split_bio *split_bio;
+	struct bio *bio;
+	unsigned int segs;
 
 	blkfront_gather_backend_features(info);
 	/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2042,34 +2022,6 @@ static int blkif_recover(struct blkfront_info *info)
 
 	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */
-		if (bio_segments(bio) > segs) {
-			/*
-			 * This bio has more segments than what we can
-			 * handle, we have to split it.
-			 */
-			pending = (bio_segments(bio) + segs - 1) / segs;
-			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
-			BUG_ON(split_bio == NULL);
-			atomic_set(&split_bio->pending, pending);
-			split_bio->bio = bio;
-			for (i = 0; i < pending; i++) {
-				offset = (i * segs * XEN_PAGE_SIZE) >> 9;
-				size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
-					   (unsigned int)bio_sectors(bio) - offset);
-				cloned_bio = bio_clone(bio, GFP_NOIO);
-				BUG_ON(cloned_bio == NULL);
-				bio_trim(cloned_bio, offset, size);
-				cloned_bio->bi_private = split_bio;
-				cloned_bio->bi_end_io = split_bio_end;
-				submit_bio(cloned_bio);
-			}
-			/*
-			 * Now we have to wait for all those smaller bios to
-			 * end, so we can also end the "parent" bio.
-			 */
-			continue;
-		}
-		/* We don't need to split this bio */
 		submit_bio(bio);
 	}
 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/11] xen-blkfront: remove bio splitting.
  2017-04-20  5:38 ` [PATCH 08/11] xen-blkfront: remove bio splitting NeilBrown
@ 2017-04-20 10:00   ` Roger Pau Monné
  2017-04-21 11:36   ` Christoph Hellwig
  1 sibling, 0 replies; 40+ messages in thread
From: Roger Pau Monné @ 2017-04-20 10:00 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jens Axboe, linux-block, linux-kernel, Konrad Rzeszutek Wilk, xen-devel

On Thu, Apr 20, 2017 at 03:38:50PM +1000, NeilBrown wrote:
> bios that are re-submitted will pass through blk_queue_split() when
> blk_queue_bio() is called, and this will split the bio if necessary.
> There is no longer any need to do this splitting in xen-blkfront.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
>  drivers/block/xen-blkfront.c |   54 ++----------------------------------------
>  1 file changed, 3 insertions(+), 51 deletions(-)

Nice!

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/11] blk: remove bio_set arg from blk_queue_split()
  2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
@ 2017-04-21 11:21   ` Christoph Hellwig
  2017-04-21 15:14   ` Ming Lei
  2017-04-22  9:16   ` Javier González
  2 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:21 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-04-20  5:38 ` [PATCH 02/11] blk: make the bioset rescue_workqueue optional NeilBrown
@ 2017-04-21 11:24   ` Christoph Hellwig
  2017-04-24  1:51     ` NeilBrown
  0 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:24 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

On Thu, Apr 20, 2017 at 03:38:48PM +1000, NeilBrown wrote:
> This patch converts bioset_create() and
> bioset_create_nobvec() to not create a workqueue so
> alloctions will never trigger punt_bios_to_rescuer().  It
> also introduces bioset_create_rescued() and
> bioset_create_nobvec_rescued() which preserve the old
> behaviour.

Why these super-early line breaks in the committ message?  They make
the text much more awkware compared to say:

This patch converts bioset_create() and bioset_create_nobvec() to not
create a workqueue so alloctions will never trigger
punt_bios_to_rescuer(). It also introduces bioset_create_rescued() and
bioset_create_nobvec_rescued() which preserve the old behaviour.

>  static struct bio_set *__bioset_create(unsigned int pool_size,
>  				       unsigned int front_pad,
> -				       bool create_bvec_pool)
> +				       bool create_bvec_pool,
> +				       bool create_rescue_workqueue)

I'd much prefer a single new bioset_create with a bunch of flags
arguments over the number of new functions and these bool arguments.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 03/11] blk: use non-rescuing bioset for q->bio_split.
  2017-04-20  5:38 ` [PATCH 03/11] blk: use non-rescuing bioset for q->bio_split NeilBrown
@ 2017-04-21 11:25   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:25 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

Looks fine:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 04/11] block: Improvements to bounce-buffer handling
  2017-04-20  5:38 ` [PATCH 04/11] block: Improvements to bounce-buffer handling NeilBrown
@ 2017-04-21 11:28   ` Christoph Hellwig
  2017-04-21 15:39   ` Ming Lei
  1 sibling, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:28 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

Do we want to doctor around the bio bouncing or leave it to the dma_ops
like other subsystems do?  These days dma_map_* is supposed to handle
any memory we throw at it, even if that means using bounce buffers
at that level.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 07/11] pktcdvd: use bio_clone_fast() instead of bio_clone()
  2017-04-20  5:38 ` [PATCH 07/11] pktcdvd: use bio_clone_fast() instead of bio_clone() NeilBrown
@ 2017-04-21 11:29   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:29 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

On Thu, Apr 20, 2017 at 03:38:49PM +1000, NeilBrown wrote:
> pktcdvd doesn't change the bi_io_vec of the clone bio,
> so it is more efficient to use bio_clone_fast(), and not clone
> the bi_io_vec.
> This requires providing a bio_set, and it is safest to
> provide a dedicated bio_set rather than sharing
> fs_bio_set, which filesytems use.
> This new bio_set, pkt_bio_set, can also be use for the bio_split()
> call as the two allocations (bio_clone_fast, and bio_split) are
> independent, neither can block a bio allocated by the other.

Looks fine:

Reviewed-by: Christoph Hellwig <hch@lst.de>

But didn't we decide we want to kill off pktcdvd a while ago?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/11] drbd: use bio_clone_fast() instead of bio_clone()
  2017-04-20  5:38 ` [PATCH 06/11] drbd: use bio_clone_fast() instead of bio_clone() NeilBrown
@ 2017-04-21 11:30   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:30 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jens Axboe, linux-block, linux-kernel, Philipp Reisner,
	Lars Ellenberg, drbd-dev

On Thu, Apr 20, 2017 at 03:38:49PM +1000, NeilBrown wrote:
> drbd does not modify the bi_io_vec of the cloned bio,
> so there is no need to clone that part.  So bio_clone_fast()
> is the better choice.
> For bio_clone_fast() we need to specify a bio_set.
> We could use fs_bio_set, which bio_clone() uses, or
> drbd_md_io_bio_set, which drbd uses for metadata, but it is
> generally best to avoid sharing bio_sets unless you can
> be certain that there are no interdependencies.
> 
> So create a new bio_set, drbd_io_bio_set, and use bio_clone_fast().
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

All the zeroing of the gblobal variable looks unessecary / stupid.

But given that the surrounding code already does it:

Reviewed-by: Christoph Hellwig <hch@lst.de>

> +	bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set); /* XXX cannot fail!! */

but maybe I'd drop this comment..

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/11] rbd: use bio_clone_fast() instead of bio_clone()
  2017-04-20  5:38 ` [PATCH 05/11] rbd: " NeilBrown
@ 2017-04-21 11:31   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:31 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jens Axboe, linux-block, linux-kernel, Ilya Dryomov, Sage Weil,
	Alex Elder, ceph-devel

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify()
  2017-04-20  5:38 ` [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify() NeilBrown
@ 2017-04-21 11:31   ` Christoph Hellwig
  2017-04-21 11:32   ` Kent Overstreet
  2017-04-21 15:41   ` Ming Lei
  2 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:31 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jens Axboe, linux-block, linux-kernel, Kent Overstreet, linux-bcache

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/11] block: remove bio_clone() and all references.
  2017-04-20  5:38 ` [PATCH 10/11] block: remove bio_clone() and all references NeilBrown
@ 2017-04-21 11:32   ` Christoph Hellwig
  2017-04-21 15:43   ` Ming Lei
  1 sibling, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:32 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify()
  2017-04-20  5:38 ` [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify() NeilBrown
  2017-04-21 11:31   ` Christoph Hellwig
@ 2017-04-21 11:32   ` Kent Overstreet
  2017-04-21 15:41   ` Ming Lei
  2 siblings, 0 replies; 40+ messages in thread
From: Kent Overstreet @ 2017-04-21 11:32 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel, linux-bcache

On Thu, Apr 20, 2017 at 03:38:50PM +1000, NeilBrown wrote:
> This function allocates a bio, then a collection
> of pages.  It copes with failure.
> 
> It currently uses a mempool() to allocate the bio,
> but alloc_page() to allocate the pages.  These fail
> in different ways, so the usage is inconsistent.
> 
> Change the bio_clone() to bio_clone_kmalloc()
> so that no pool is used either for the bio or the pages.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Acked-by: Kent Overstreet <kent.overstreet@gmail.com>`

> ---
>  drivers/md/bcache/debug.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
> index 06f55056aaae..35a5a7210e51 100644
> --- a/drivers/md/bcache/debug.c
> +++ b/drivers/md/bcache/debug.c
> @@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
>  	struct bio_vec bv, cbv;
>  	struct bvec_iter iter, citer = { 0 };
>  
> -	check = bio_clone(bio, GFP_NOIO);
> +	check = bio_clone_kmalloc(bio, GFP_NOIO);
>  	if (!check)
>  		return;
>  	check->bi_opf = REQ_OP_READ;
> 
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()
  2017-04-20  5:38 ` [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split() NeilBrown
@ 2017-04-21 11:34   ` Christoph Hellwig
  2017-04-21 15:48     ` Ming Lei
  2017-04-24  3:14     ` NeilBrown
  0 siblings, 2 replies; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:34 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

On Thu, Apr 20, 2017 at 03:38:50PM +1000, NeilBrown wrote:
> blk_bio_segment_split() makes sure bios have no more than
> BIO_MAX_PAGES entries in the bi_io_vec.
> This was done because bio_clone_bioset() (when given a
> mempool bioset) could not handle larger io_vecs.
> 
> No driver uses bio_clone_bioset() any more, they all
> use bio_clone_fast() if anything, and bio_clone_fast()
> doesn't clone the bi_io_vec.

Hmm.  From Jens tree:

drivers/lightnvm/pblk-read.c:           int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,

I did not see your series touching either of those.  They probably
don't matter for the code removed as the bios are controlled by the
drivers, but the changelog still seems odd.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/11] xen-blkfront: remove bio splitting.
  2017-04-20  5:38 ` [PATCH 08/11] xen-blkfront: remove bio splitting NeilBrown
  2017-04-20 10:00   ` Roger Pau Monné
@ 2017-04-21 11:36   ` Christoph Hellwig
  2017-04-21 11:46     ` Roger Pau Monne
  1 sibling, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-21 11:36 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jens Axboe, linux-block, linux-kernel, Konrad Rzeszutek Wilk,
	Roger Pau Monne, xen-devel

Btw, I really don't understand why this code even looks at bios over
just requeueing the request.  Can someone explain that bit to me?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/11] xen-blkfront: remove bio splitting.
  2017-04-21 11:36   ` Christoph Hellwig
@ 2017-04-21 11:46     ` Roger Pau Monne
  0 siblings, 0 replies; 40+ messages in thread
From: Roger Pau Monne @ 2017-04-21 11:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: NeilBrown, Jens Axboe, linux-block, linux-kernel,
	Konrad Rzeszutek Wilk, xen-devel

On Fri, Apr 21, 2017 at 04:36:20AM -0700, Christoph Hellwig wrote:
> Btw, I really don't understand why this code even looks at bios over
> just requeueing the request.  Can someone explain that bit to me?

This was done because Linux could migrate from a host supporting indirect
descriptors to a host not supporting them, and so the maximum number of
segments per request could change, and the requests already on the queue might
need to be split.

Roger.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/11] blk: remove bio_set arg from blk_queue_split()
  2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
  2017-04-21 11:21   ` Christoph Hellwig
@ 2017-04-21 15:14   ` Ming Lei
  2017-04-22  9:16   ` Javier González
  2 siblings, 0 replies; 40+ messages in thread
From: Ming Lei @ 2017-04-21 15:14 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, Linux Kernel Mailing List

On Thu, Apr 20, 2017 at 1:38 PM, NeilBrown <neilb@suse.com> wrote:
> blk_queue_split() is always called with the last arg being q->bio_split,
> where 'q' is the first arg.
>
> Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses
> q->bio_split.
>
> This is inconsistent and unnecessary.  Remove the last arg and always use
> q->bio_split inside blk_queue_split()
>
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by: Ming Lei <ming.lei@redhat.com>

Thanks,

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 04/11] block: Improvements to bounce-buffer handling
  2017-04-20  5:38 ` [PATCH 04/11] block: Improvements to bounce-buffer handling NeilBrown
  2017-04-21 11:28   ` Christoph Hellwig
@ 2017-04-21 15:39   ` Ming Lei
  1 sibling, 0 replies; 40+ messages in thread
From: Ming Lei @ 2017-04-21 15:39 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, Linux Kernel Mailing List

On Thu, Apr 20, 2017 at 1:38 PM, NeilBrown <neilb@suse.com> wrote:
> Since commit 23688bf4f830 ("block: ensure to split after potentially
> bouncing a bio") blk_queue_bounce() is called *before*
> blk_queue_split().
> This means that:
>  1/ the comments blk_queue_split() about bounce buffers are
>     irrelevant, and
>  2/ a very large bio (more than BIO_MAX_PAGES) will no longer be
>     split before it arrives at blk_queue_bounce(), leading to the
>     possibility that bio_clone_bioset() will fail and a NULL
>     will be dereferenced.
>
> Separately, blk_queue_bounce() shouldn't use fs_bio_set as the bio
> being copied could be from the same set, and this could lead to a
> deadlock.
>
> So:
>  - allocate 2 private biosets for blk_queue_bounce, one for
>    splitting enormous bios and one for cloning bios.
>  - add code to split a bio that exceeds BIO_MAX_PAGES.
>  - Fix up the comments in blk_queue_split()
>
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  block/blk-merge.c |   14 ++++----------
>  block/bounce.c    |   27 ++++++++++++++++++++++++++-
>  2 files changed, 30 insertions(+), 11 deletions(-)
>
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index d59074556703..51c84540d3bb 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -117,17 +117,11 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
>                  * each holds at most BIO_MAX_PAGES bvecs because
>                  * bio_clone() can fail to allocate big bvecs.
>                  *
> -                * It should have been better to apply the limit per
> -                * request queue in which bio_clone() is involved,
> -                * instead of globally. The biggest blocker is the
> -                * bio_clone() in bio bounce.
> +                * Those drivers which will need to use bio_clone()
> +                * should tell us in some way.  For now, impose the
> +                * BIO_MAX_PAGES limit on all queues.
>                  *
> -                * If bio is splitted by this reason, we should have
> -                * allowed to continue bios merging, but don't do
> -                * that now for making the change simple.
> -                *
> -                * TODO: deal with bio bounce's bio_clone() gracefully
> -                * and convert the global limit into per-queue limit.
> +                * TODO: handle users of bio_clone() differently.
>                  */
>                 if (bvecs++ >= BIO_MAX_PAGES)
>                         goto split;
> diff --git a/block/bounce.c b/block/bounce.c
> index 1cb5dd3a5da1..51fb538b504d 100644
> --- a/block/bounce.c
> +++ b/block/bounce.c
> @@ -26,6 +26,7 @@
>  #define POOL_SIZE      64
>  #define ISA_POOL_SIZE  16
>
> +struct bio_set *bounce_bio_set, *bounce_bio_split;
>  static mempool_t *page_pool, *isa_page_pool;
>
>  #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
> @@ -40,6 +41,14 @@ static __init int init_emergency_pool(void)
>         BUG_ON(!page_pool);
>         pr_info("pool size: %d pages\n", POOL_SIZE);
>
> +       bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0);
> +       BUG_ON(!bounce_bio_set);
> +       if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
> +               BUG_ON(1);
> +
> +       bounce_bio_split = bioset_create_nobvec(BIO_POOL_SIZE, 0);
> +       BUG_ON(!bounce_bio_split);
> +
>         return 0;
>  }
>
> @@ -194,7 +203,23 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
>
>         return;
>  bounce:
> -       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
> +       if (bio_segments(*bio_orig) > BIO_MAX_PAGES) {
> +               int cnt = 0;
> +               int sectors = 0;
> +               struct bio_vec bv;
> +               struct bvec_iter iter;
> +               bio_for_each_segment(bv, *bio_orig, iter) {

The two bio_for_each_segment()(one is in bio_segments()) can be merged to
one.

> +                       if (cnt++ < BIO_MAX_PAGES)
> +                               sectors += bv.bv_len >> 9;
> +                       else
> +                               break;
> +               }
> +               bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
> +               bio_chain(bio, *bio_orig);
> +               generic_make_request(*bio_orig);
> +               *bio_orig = bio;
> +       }
> +       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
>
>         bio_for_each_segment_all(to, bio, i) {
>                 struct page *page = to->bv_page;
>
>



Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify()
  2017-04-20  5:38 ` [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify() NeilBrown
  2017-04-21 11:31   ` Christoph Hellwig
  2017-04-21 11:32   ` Kent Overstreet
@ 2017-04-21 15:41   ` Ming Lei
  2 siblings, 0 replies; 40+ messages in thread
From: Ming Lei @ 2017-04-21 15:41 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jens Axboe, linux-block, Linux Kernel Mailing List,
	Kent Overstreet, open list:BCACHE (BLOCK LAYER CACHE)

On Thu, Apr 20, 2017 at 1:38 PM, NeilBrown <neilb@suse.com> wrote:
> This function allocates a bio, then a collection
> of pages.  It copes with failure.
>
> It currently uses a mempool() to allocate the bio,
> but alloc_page() to allocate the pages.  These fail
> in different ways, so the usage is inconsistent.
>
> Change the bio_clone() to bio_clone_kmalloc()
> so that no pool is used either for the bio or the pages.
>
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by : Ming Lei <ming.lei@redhat.com>


Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/11] block: remove bio_clone() and all references.
  2017-04-20  5:38 ` [PATCH 10/11] block: remove bio_clone() and all references NeilBrown
  2017-04-21 11:32   ` Christoph Hellwig
@ 2017-04-21 15:43   ` Ming Lei
  1 sibling, 0 replies; 40+ messages in thread
From: Ming Lei @ 2017-04-21 15:43 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, Linux Kernel Mailing List

On Thu, Apr 20, 2017 at 1:38 PM, NeilBrown <neilb@suse.com> wrote:
> bio_clone() is no longer used.
> Only bio_clone_bioset() or bio_clone_fast().
> This is for the best, as bio_clone() used fs_bio_set,
> and filesystems are unlikely to want to use bio_clone().
>
> So remove bio_clone() and all references.
> This includes a fix to some incorrect documentation.
>
> Signed-off-by: NeilBrown <neilb@suse.com>

Reviewed-by: Ming Lei <ming.lei@redhat.com>

Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()
  2017-04-21 11:34   ` Christoph Hellwig
@ 2017-04-21 15:48     ` Ming Lei
  2017-04-24  3:16       ` NeilBrown
  2017-04-24  3:14     ` NeilBrown
  1 sibling, 1 reply; 40+ messages in thread
From: Ming Lei @ 2017-04-21 15:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: NeilBrown, Jens Axboe, linux-block, Linux Kernel Mailing List

On Fri, Apr 21, 2017 at 7:34 PM, Christoph Hellwig <hch@infradead.org> wrote:
> On Thu, Apr 20, 2017 at 03:38:50PM +1000, NeilBrown wrote:
>> blk_bio_segment_split() makes sure bios have no more than
>> BIO_MAX_PAGES entries in the bi_io_vec.
>> This was done because bio_clone_bioset() (when given a
>> mempool bioset) could not handle larger io_vecs.
>>
>> No driver uses bio_clone_bioset() any more, they all
>> use bio_clone_fast() if anything, and bio_clone_fast()
>> doesn't clone the bi_io_vec.
>
> Hmm.  From Jens tree:
>
> drivers/lightnvm/pblk-read.c:           int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
> drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
> drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,

Btrfs use bio_clone_bioset() too:

     fs/btrfs/extent_io.c:2703:      new = bio_clone_bioset(bio,
gfp_mask, btrfs_bioset);

Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/11] blk: remove bio_set arg from blk_queue_split()
  2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
  2017-04-21 11:21   ` Christoph Hellwig
  2017-04-21 15:14   ` Ming Lei
@ 2017-04-22  9:16   ` Javier González
  2017-04-24  2:32     ` NeilBrown
  2 siblings, 1 reply; 40+ messages in thread
From: Javier González @ 2017-04-22  9:16 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jens Axboe, linux-block, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1288 bytes --]

> On 20 Apr 2017, at 07.38, NeilBrown <neilb@suse.com> wrote:
> 
> blk_queue_split() is always called with the last arg being q->bio_split,
> where 'q' is the first arg.
> 
> Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses
> q->bio_split.
> 
> This is inconsistent and unnecessary.  Remove the last arg and always use
> q->bio_split inside blk_queue_split()
> 
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
> block/blk-core.c              |    2 +-
> block/blk-merge.c             |    9 ++++-----
> block/blk-mq.c                |    2 +-
> drivers/block/drbd/drbd_req.c |    2 +-
> drivers/block/pktcdvd.c       |    2 +-
> drivers/block/ps3vram.c       |    2 +-
> drivers/block/rsxx/dev.c      |    2 +-
> drivers/block/umem.c          |    2 +-
> drivers/block/zram/zram_drv.c |    2 +-
> drivers/lightnvm/rrpc.c       |    2 +-
> drivers/md/md.c               |    2 +-
> drivers/s390/block/dcssblk.c  |    2 +-
> drivers/s390/block/xpram.c    |    2 +-
> include/linux/blkdev.h        |    3 +--
> 14 files changed, 17 insertions(+), 19 deletions(-)
> 

It most probably made it to Jens' tree after you made these changes, but
in drivers/lightnvm/pblk-init.c we also use blk_queue_split() in a
couple of places inside pblk_rw_io().

Thanks,
Javier.

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-04-21 11:24   ` Christoph Hellwig
@ 2017-04-24  1:51     ` NeilBrown
  2017-04-24 15:10       ` Christoph Hellwig
  0 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-04-24  1:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, linux-block, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1638 bytes --]

On Fri, Apr 21 2017, Christoph Hellwig wrote:

> On Thu, Apr 20, 2017 at 03:38:48PM +1000, NeilBrown wrote:
>> This patch converts bioset_create() and
>> bioset_create_nobvec() to not create a workqueue so
>> alloctions will never trigger punt_bios_to_rescuer().  It
>> also introduces bioset_create_rescued() and
>> bioset_create_nobvec_rescued() which preserve the old
>> behaviour.
>
> Why these super-early line breaks in the committ message?  They make
> the text much more awkware compared to say:

I usually set a smaller wrap-margin for git comments because of the
extra space that git inserts on the left.  Maybe I over-do it.

>
> This patch converts bioset_create() and bioset_create_nobvec() to not
> create a workqueue so alloctions will never trigger
> punt_bios_to_rescuer(). It also introduces bioset_create_rescued() and
> bioset_create_nobvec_rescued() which preserve the old behaviour.
>
>>  static struct bio_set *__bioset_create(unsigned int pool_size,
>>  				       unsigned int front_pad,
>> -				       bool create_bvec_pool)
>> +				       bool create_bvec_pool,
>> +				       bool create_rescue_workqueue)
>
> I'd much prefer a single new bioset_create with a bunch of flags
> arguments over the number of new functions and these bool arguments.

I was following the existing practice exemplified by
bioset_create_nobvec().
By not changing the signature of the function, I can avoid touching
quite a few places where it is called.
I hope to get rid of the _rescued() versions eventually.  That is easier
if there are a separate function rather than an extra arg that needs
to be removed everywhere.

NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/11] blk: remove bio_set arg from blk_queue_split()
  2017-04-22  9:16   ` Javier González
@ 2017-04-24  2:32     ` NeilBrown
  0 siblings, 0 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-24  2:32 UTC (permalink / raw)
  To: Javier González; +Cc: Jens Axboe, linux-block, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1562 bytes --]

On Sat, Apr 22 2017, Javier González wrote:

>> On 20 Apr 2017, at 07.38, NeilBrown <neilb@suse.com> wrote:
>> 
>> blk_queue_split() is always called with the last arg being q->bio_split,
>> where 'q' is the first arg.
>> 
>> Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses
>> q->bio_split.
>> 
>> This is inconsistent and unnecessary.  Remove the last arg and always use
>> q->bio_split inside blk_queue_split()
>> 
>> Signed-off-by: NeilBrown <neilb@suse.com>
>> ---
>> block/blk-core.c              |    2 +-
>> block/blk-merge.c             |    9 ++++-----
>> block/blk-mq.c                |    2 +-
>> drivers/block/drbd/drbd_req.c |    2 +-
>> drivers/block/pktcdvd.c       |    2 +-
>> drivers/block/ps3vram.c       |    2 +-
>> drivers/block/rsxx/dev.c      |    2 +-
>> drivers/block/umem.c          |    2 +-
>> drivers/block/zram/zram_drv.c |    2 +-
>> drivers/lightnvm/rrpc.c       |    2 +-
>> drivers/md/md.c               |    2 +-
>> drivers/s390/block/dcssblk.c  |    2 +-
>> drivers/s390/block/xpram.c    |    2 +-
>> include/linux/blkdev.h        |    3 +--
>> 14 files changed, 17 insertions(+), 19 deletions(-)
>> 
>
> It most probably made it to Jens' tree after you made these changes, but
> in drivers/lightnvm/pblk-init.c we also use blk_queue_split() in a
> couple of places inside pblk_rw_io().

Ahh, yes - thanks.  That will need to same small change.

Jens, do you prefer a resend, or and update patch, or to just add this
change manually?

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()
  2017-04-21 11:34   ` Christoph Hellwig
  2017-04-21 15:48     ` Ming Lei
@ 2017-04-24  3:14     ` NeilBrown
  1 sibling, 0 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-24  3:14 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, linux-block, linux-kernel, Javier González

[-- Attachment #1: Type: text/plain, Size: 1418 bytes --]

On Fri, Apr 21 2017, Christoph Hellwig wrote:

> On Thu, Apr 20, 2017 at 03:38:50PM +1000, NeilBrown wrote:
>> blk_bio_segment_split() makes sure bios have no more than
>> BIO_MAX_PAGES entries in the bi_io_vec.
>> This was done because bio_clone_bioset() (when given a
>> mempool bioset) could not handle larger io_vecs.
>> 
>> No driver uses bio_clone_bioset() any more, they all
>> use bio_clone_fast() if anything, and bio_clone_fast()
>> doesn't clone the bi_io_vec.
>
> Hmm.  From Jens tree:
>
> drivers/lightnvm/pblk-read.c:           int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);

That is new and I missed it.
It should be bio_clone_fast(), and it shouldn't use fs_bio_set (as it is
not a filesystem).


> drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
> drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,

These have since been changed to bio_clone_fast() or similar (in the md
tree) - and bio_clone_bioset_partial() is gone.

>
> I did not see your series touching either of those.  They probably
> don't matter for the code removed as the bios are controlled by the
> drivers, but the changelog still seems odd.

Depending one what base it ends up being applied to, it may or may not
be correct...  the md changes have been in -next since 28march.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split()
  2017-04-21 15:48     ` Ming Lei
@ 2017-04-24  3:16       ` NeilBrown
  0 siblings, 0 replies; 40+ messages in thread
From: NeilBrown @ 2017-04-24  3:16 UTC (permalink / raw)
  To: Ming Lei, Christoph Hellwig
  Cc: Jens Axboe, linux-block, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 1207 bytes --]

On Fri, Apr 21 2017, Ming Lei wrote:

> On Fri, Apr 21, 2017 at 7:34 PM, Christoph Hellwig <hch@infradead.org> wrote:
>> On Thu, Apr 20, 2017 at 03:38:50PM +1000, NeilBrown wrote:
>>> blk_bio_segment_split() makes sure bios have no more than
>>> BIO_MAX_PAGES entries in the bi_io_vec.
>>> This was done because bio_clone_bioset() (when given a
>>> mempool bioset) could not handle larger io_vecs.
>>>
>>> No driver uses bio_clone_bioset() any more, they all
>>> use bio_clone_fast() if anything, and bio_clone_fast()
>>> doesn't clone the bi_io_vec.
>>
>> Hmm.  From Jens tree:
>>
>> drivers/lightnvm/pblk-read.c:           int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
>> drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
>> drivers/md/raid1.c:                             mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
>
> Btrfs use bio_clone_bioset() too:
>
>      fs/btrfs/extent_io.c:2703:      new = bio_clone_bioset(bio,
> gfp_mask, btrfs_bioset);
>

btrfs is a filesystem, not a driver.  So it doesn't count.
The bios it uses were not yet processed by blk_bio_segment_split(),
so changes there cannot be relevant to btrfs.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-04-24  1:51     ` NeilBrown
@ 2017-04-24 15:10       ` Christoph Hellwig
  2017-05-01  5:00         ` NeilBrown
  0 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2017-04-24 15:10 UTC (permalink / raw)
  To: NeilBrown; +Cc: Christoph Hellwig, Jens Axboe, linux-block, linux-kernel

On Mon, Apr 24, 2017 at 11:51:01AM +1000, NeilBrown wrote:
> 
> I was following the existing practice exemplified by
> bioset_create_nobvec().

Which is pretty ugly to start with..

> By not changing the signature of the function, I can avoid touching
> quite a few places where it is called.

There are 13 callers of bioset_create and one caller of
bioset_create_nobvec, and your series touches many of those.

So just adding a flags argument to bioset_create and passing
BIOSET_NEED_BVECS and BIOSET_NEED_RESUER flags to it doesn't seem
to much of an effort, and it's going to create a much nicer and easier
to extend interface.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-04-24 15:10       ` Christoph Hellwig
@ 2017-05-01  5:00         ` NeilBrown
  2017-05-01 14:02           ` Jens Axboe
  0 siblings, 1 reply; 40+ messages in thread
From: NeilBrown @ 2017-05-01  5:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, linux-block, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1217 bytes --]

On Mon, Apr 24 2017, Christoph Hellwig wrote:

> On Mon, Apr 24, 2017 at 11:51:01AM +1000, NeilBrown wrote:
>> 
>> I was following the existing practice exemplified by
>> bioset_create_nobvec().
>
> Which is pretty ugly to start with..

That is a matter of personal taste.
As such, it is up to the maintainer to change it if they want it
changed.

>
>> By not changing the signature of the function, I can avoid touching
>> quite a few places where it is called.
>
> There are 13 callers of bioset_create and one caller of
> bioset_create_nobvec, and your series touches many of those.
>
> So just adding a flags argument to bioset_create and passing
> BIOSET_NEED_BVECS and BIOSET_NEED_RESUER flags to it doesn't seem
> to much of an effort, and it's going to create a much nicer and easier
> to extend interface.

If someone else submitted a patch to discard bioset_create_nobvec in
favour of BIOSET_NEED_BVECS and got it accepted, then I would rebase my
series on that.  As it is, I'm basing my patches on the style currently
present in the tree.

Of course, if Jens says he'll only take my patches if I change to style
to match your preference, I'll do that.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-05-01  5:00         ` NeilBrown
@ 2017-05-01 14:02           ` Jens Axboe
  2017-05-02  3:33             ` NeilBrown
  0 siblings, 1 reply; 40+ messages in thread
From: Jens Axboe @ 2017-05-01 14:02 UTC (permalink / raw)
  To: NeilBrown, Christoph Hellwig; +Cc: linux-block, linux-kernel

On 04/30/2017 11:00 PM, NeilBrown wrote:
> On Mon, Apr 24 2017, Christoph Hellwig wrote:
> 
>> On Mon, Apr 24, 2017 at 11:51:01AM +1000, NeilBrown wrote:
>>>
>>> I was following the existing practice exemplified by
>>> bioset_create_nobvec().
>>
>> Which is pretty ugly to start with..
> 
> That is a matter of personal taste.
> As such, it is up to the maintainer to change it if they want it
> changed.
> 
>>
>>> By not changing the signature of the function, I can avoid touching
>>> quite a few places where it is called.
>>
>> There are 13 callers of bioset_create and one caller of
>> bioset_create_nobvec, and your series touches many of those.
>>
>> So just adding a flags argument to bioset_create and passing
>> BIOSET_NEED_BVECS and BIOSET_NEED_RESUER flags to it doesn't seem
>> to much of an effort, and it's going to create a much nicer and easier
>> to extend interface.
> 
> If someone else submitted a patch to discard bioset_create_nobvec in
> favour of BIOSET_NEED_BVECS and got it accepted, then I would rebase my
> series on that.  As it is, I'm basing my patches on the style currently
> present in the tree.
> 
> Of course, if Jens says he'll only take my patches if I change to style
> to match your preference, I'll do that.

I generally tend to prefer tree wide cleanups to improve our APIs, even
if it does cause an extra bit of pain. Would you mind doing that as a
prep patch?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/11] blk: make the bioset rescue_workqueue optional.
  2017-05-01 14:02           ` Jens Axboe
@ 2017-05-02  3:33             ` NeilBrown
  0 siblings, 0 replies; 40+ messages in thread
From: NeilBrown @ 2017-05-02  3:33 UTC (permalink / raw)
  To: Jens Axboe, Christoph Hellwig; +Cc: linux-block, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1636 bytes --]

On Mon, May 01 2017, Jens Axboe wrote:

> On 04/30/2017 11:00 PM, NeilBrown wrote:
>> On Mon, Apr 24 2017, Christoph Hellwig wrote:
>> 
>>> On Mon, Apr 24, 2017 at 11:51:01AM +1000, NeilBrown wrote:
>>>>
>>>> I was following the existing practice exemplified by
>>>> bioset_create_nobvec().
>>>
>>> Which is pretty ugly to start with..
>> 
>> That is a matter of personal taste.
>> As such, it is up to the maintainer to change it if they want it
>> changed.
>> 
>>>
>>>> By not changing the signature of the function, I can avoid touching
>>>> quite a few places where it is called.
>>>
>>> There are 13 callers of bioset_create and one caller of
>>> bioset_create_nobvec, and your series touches many of those.
>>>
>>> So just adding a flags argument to bioset_create and passing
>>> BIOSET_NEED_BVECS and BIOSET_NEED_RESUER flags to it doesn't seem
>>> to much of an effort, and it's going to create a much nicer and easier
>>> to extend interface.
>> 
>> If someone else submitted a patch to discard bioset_create_nobvec in
>> favour of BIOSET_NEED_BVECS and got it accepted, then I would rebase my
>> series on that.  As it is, I'm basing my patches on the style currently
>> present in the tree.
>> 
>> Of course, if Jens says he'll only take my patches if I change to style
>> to match your preference, I'll do that.
>
> I generally tend to prefer tree wide cleanups to improve our APIs, even
> if it does cause an extra bit of pain. Would you mind doing that as a
> prep patch?

OK, will do.

I have rebased and fixed up a couple of issues.  Will repost shortly.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2017-05-02  3:34 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-20  5:38 [PATCH 00/11] block: assorted cleanup for bio splitting and cloning NeilBrown
2017-04-20  5:38 ` [PATCH 01/11] blk: remove bio_set arg from blk_queue_split() NeilBrown
2017-04-21 11:21   ` Christoph Hellwig
2017-04-21 15:14   ` Ming Lei
2017-04-22  9:16   ` Javier González
2017-04-24  2:32     ` NeilBrown
2017-04-20  5:38 ` [PATCH 02/11] blk: make the bioset rescue_workqueue optional NeilBrown
2017-04-21 11:24   ` Christoph Hellwig
2017-04-24  1:51     ` NeilBrown
2017-04-24 15:10       ` Christoph Hellwig
2017-05-01  5:00         ` NeilBrown
2017-05-01 14:02           ` Jens Axboe
2017-05-02  3:33             ` NeilBrown
2017-04-20  5:38 ` [PATCH 07/11] pktcdvd: use bio_clone_fast() instead of bio_clone() NeilBrown
2017-04-21 11:29   ` Christoph Hellwig
2017-04-20  5:38 ` [PATCH 05/11] rbd: " NeilBrown
2017-04-21 11:31   ` Christoph Hellwig
2017-04-20  5:38 ` [PATCH 03/11] blk: use non-rescuing bioset for q->bio_split NeilBrown
2017-04-21 11:25   ` Christoph Hellwig
2017-04-20  5:38 ` [PATCH 04/11] block: Improvements to bounce-buffer handling NeilBrown
2017-04-21 11:28   ` Christoph Hellwig
2017-04-21 15:39   ` Ming Lei
2017-04-20  5:38 ` [PATCH 06/11] drbd: use bio_clone_fast() instead of bio_clone() NeilBrown
2017-04-21 11:30   ` Christoph Hellwig
2017-04-20  5:38 ` [PATCH 09/11] bcache: use kmalloc to allocate bio in bch_data_verify() NeilBrown
2017-04-21 11:31   ` Christoph Hellwig
2017-04-21 11:32   ` Kent Overstreet
2017-04-21 15:41   ` Ming Lei
2017-04-20  5:38 ` [PATCH 08/11] xen-blkfront: remove bio splitting NeilBrown
2017-04-20 10:00   ` Roger Pau Monné
2017-04-21 11:36   ` Christoph Hellwig
2017-04-21 11:46     ` Roger Pau Monne
2017-04-20  5:38 ` [PATCH 11/11] block: don't check for BIO_MAX_PAGES in blk_bio_segment_split() NeilBrown
2017-04-21 11:34   ` Christoph Hellwig
2017-04-21 15:48     ` Ming Lei
2017-04-24  3:16       ` NeilBrown
2017-04-24  3:14     ` NeilBrown
2017-04-20  5:38 ` [PATCH 10/11] block: remove bio_clone() and all references NeilBrown
2017-04-21 11:32   ` Christoph Hellwig
2017-04-21 15:43   ` Ming Lei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).