All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far
@ 2019-02-19 22:17 Mike Snitzer
  2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
                   ` (7 more replies)
  0 siblings, 8 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

I seized on Mikulas's noclone patches and took it upon myself to add
noclone support to bio-based dm-multipath.  Pretty happy with how
things turned out but also open to all review/feedback.

I've verified that all mptest tests pass (with tcmloop, mptest's
nvmefcloop based test setup panics with 5.0-rc6.. I should probably
report and/or fix that).

I've yet to do the before vs after multipath benchmarking to see if
there is a performance win but I'm betting there will be (otherwise I
wouldn't have done this work).  I'll backfill benchmarking results
later in the week.

Mike Snitzer (6):
  dm: update dm_process_bio() to split bio if in ->make_request_fn()
  dm: eliminate 'split_discard_bios' flag from DM target interface
  dm: improve noclone bio support
  dm: add the ability to attach per-bio-data to dm_noclone bio
  dm: improve noclone_endio() to support multipath target
  dm mpath: add support for dm_noclone and its per-bio-data

Mikulas Patocka (2):
  dm: refactor start_io_acct and end_io_acct
  dm: implement noclone optimization for bio-based

 drivers/md/dm-cache-target.c  |   1 -
 drivers/md/dm-core.h          |   1 +
 drivers/md/dm-linear.c        |   1 +
 drivers/md/dm-mpath.c         |  65 ++++++--
 drivers/md/dm-raid.c          |  14 +-
 drivers/md/dm-stripe.c        |   1 +
 drivers/md/dm-table.c         |  11 ++
 drivers/md/dm-thin.c          |   1 -
 drivers/md/dm-zero.c          |   1 +
 drivers/md/dm-zoned-target.c  |   1 -
 drivers/md/dm.c               | 357 +++++++++++++++++++++++++++++-------------
 drivers/md/dm.h               |   1 +
 include/linux/device-mapper.h |  22 ++-
 13 files changed, 346 insertions(+), 131 deletions(-)

-- 
2.15.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn()
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-19 22:17 ` [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

Must call blk_queue_split() otherwise queue_limits for abnormal requests
(e.g. discard, writesame, etc) won't be imposed.

In addition, add dm_queue_split() to simplify DM specific splitting that
is needed for targets that impose ti->max_io_len.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 93 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 66 insertions(+), 27 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 515e6af9bed2..7a774fcd0194 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1533,6 +1533,22 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
 	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
 }
 
+static bool is_abnormal_io(struct bio *bio)
+{
+	bool r = false;
+
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE_ZEROES:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
 static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
 				  int *result)
 {
@@ -1565,7 +1581,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 	if (!dm_target_is_valid(ti))
 		return -EIO;
 
-	if (unlikely(__process_abnormal_io(ci, ti, &r)))
+	if (__process_abnormal_io(ci, ti, &r))
 		return r;
 
 	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
@@ -1601,13 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 	blk_qc_t ret = BLK_QC_T_NONE;
 	int error = 0;
 
-	if (unlikely(!map)) {
-		bio_io_error(bio);
-		return ret;
-	}
-
-	blk_queue_split(md->queue, &bio);
-
 	init_clone_info(&ci, md, map, bio);
 
 	if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1675,18 +1684,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
  * Optimized variant of __split_and_process_bio that leverages the
  * fact that targets that use it do _not_ have a need to split bios.
  */
-static blk_qc_t __process_bio(struct mapped_device *md,
-			      struct dm_table *map, struct bio *bio)
+static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map,
+			      struct bio *bio, struct dm_target *ti)
 {
 	struct clone_info ci;
 	blk_qc_t ret = BLK_QC_T_NONE;
 	int error = 0;
 
-	if (unlikely(!map)) {
-		bio_io_error(bio);
-		return ret;
-	}
-
 	init_clone_info(&ci, md, map, bio);
 
 	if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1704,21 +1708,11 @@ static blk_qc_t __process_bio(struct mapped_device *md,
 		error = __send_empty_flush(&ci);
 		/* dec_pending submits any data associated with flush */
 	} else {
-		struct dm_target *ti = md->immutable_target;
 		struct dm_target_io *tio;
 
-		/*
-		 * Defend against IO still getting in during teardown
-		 * - as was seen for a time with nvme-fcloop
-		 */
-		if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
-			error = -EIO;
-			goto out;
-		}
-
 		ci.bio = bio;
 		ci.sector_count = bio_sectors(bio);
-		if (unlikely(__process_abnormal_io(&ci, ti, &error)))
+		if (__process_abnormal_io(&ci, ti, &error))
 			goto out;
 
 		tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
@@ -1730,11 +1724,56 @@ static blk_qc_t __process_bio(struct mapped_device *md,
 	return ret;
 }
 
+static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio)
+{
+	unsigned len, sector_count;
+
+	sector_count = bio_sectors(*bio);
+	len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count);
+
+	if (sector_count > len) {
+		struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split);
+
+		bio_chain(split, *bio);
+		trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
+		generic_make_request(*bio);
+		*bio = split;
+	}
+}
+
 static blk_qc_t dm_process_bio(struct mapped_device *md,
 			       struct dm_table *map, struct bio *bio)
 {
+	blk_qc_t ret = BLK_QC_T_NONE;
+	struct dm_target *ti = md->immutable_target;
+
+	if (unlikely(!map)) {
+		bio_io_error(bio);
+		return ret;
+	}
+
+	if (!ti) {
+		ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+		if (unlikely(!ti || !dm_target_is_valid(ti))) {
+			bio_io_error(bio);
+			return ret;
+		}
+	}
+
+	/*
+	 * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+	 * queue_limits for abnormal requests (e.g. discard, writesame, etc)
+	 * won't be imposed.
+	 */
+	if (current->bio_list) {
+		if (is_abnormal_io(bio))
+			blk_queue_split(md->queue, &bio);
+		else
+			dm_queue_split(md, ti, &bio);
+	}
+
 	if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
-		return __process_bio(md, map, bio);
+		return __process_bio(md, map, bio, ti);
 	else
 		return __split_and_process_bio(md, map, bio);
 }
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
  2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-19 22:17 ` [PATCH 3/8] dm: refactor start_io_acct and end_io_acct Mike Snitzer
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

There is no need to have DM core split discards on behalf of a DM target
now that blk_queue_split() handles splitting discards based on the
queue_limits.  A DM target just needs to set max_discard_sectors,
discard_granularity, etc, in queue_limits.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c  |  1 -
 drivers/md/dm-raid.c          | 14 +++++++++-----
 drivers/md/dm-thin.c          |  1 -
 drivers/md/dm-zoned-target.c  |  1 -
 drivers/md/dm.c               | 28 ++++++----------------------
 include/linux/device-mapper.h |  6 ------
 6 files changed, 15 insertions(+), 36 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index b29a8327eed1..adc529f12b6b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2496,7 +2496,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	ti->num_discard_bios = 1;
 	ti->discards_supported = true;
-	ti->split_discard_bios = false;
 
 	ti->per_io_data_size = sizeof(struct per_bio_data);
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index adcfe8ae10aa..9fdef6897316 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs)
 		}
 	}
 
-	/*
-	 * RAID1 and RAID10 personalities require bio splitting,
-	 * RAID0/4/5/6 don't and process large discard bios properly.
-	 */
-	ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
 	ti->num_discard_bios = 1;
 }
 
@@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 	blk_limits_io_min(limits, chunk_size);
 	blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
+
+	/*
+	 * RAID1 and RAID10 personalities require bio splitting,
+	 * RAID0/4/5/6 don't and process large discard bios properly.
+	 */
+	if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+		limits->discard_granularity = chunk_size;
+		limits->max_discard_sectors = chunk_size;
+	}
 }
 
 static void raid_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e83b63608262..0d9ded0f5e50 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4227,7 +4227,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (tc->pool->pf.discard_enabled) {
 		ti->discards_supported = true;
 		ti->num_discard_bios = 1;
-		ti->split_discard_bios = false;
 	}
 
 	mutex_unlock(&dm_thin_pool_table.mutex);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 6af5babe6837..8865c1709e16 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->per_io_data_size = sizeof(struct dmz_bioctx);
 	ti->flush_supported = true;
 	ti->discards_supported = true;
-	ti->split_discard_bios = true;
 
 	/* The exposed capacity is the number of chunks that can be mapped */
 	ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7a774fcd0194..b988e178a523 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1478,17 +1478,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
 	return ti->num_write_zeroes_bios;
 }
 
-typedef bool (*is_split_required_fn)(struct dm_target *ti);
-
-static bool is_split_required_for_discard(struct dm_target *ti)
-{
-	return ti->split_discard_bios;
-}
-
 static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
-				       unsigned num_bios, bool is_split_required)
+				       unsigned num_bios)
 {
-	unsigned len;
+	unsigned len = ci->sector_count;
 
 	/*
 	 * Even though the device advertised support for this type of
@@ -1499,38 +1492,29 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
 	if (!num_bios)
 		return -EOPNOTSUPP;
 
-	if (!is_split_required)
-		len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
-	else
-		len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
-
 	__send_duplicate_bios(ci, ti, num_bios, &len);
 
-	ci->sector += len;
-	ci->sector_count -= len;
-
 	return 0;
 }
 
 static int __send_discard(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
-					   is_split_required_for_discard(ti));
+	return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti));
 }
 
 static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
+	return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti));
 }
 
 static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
+	return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti));
 }
 
 static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
+	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti));
 }
 
 static bool is_abnormal_io(struct bio *bio)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e528baebad69..0f5b3d7c6cb3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,12 +315,6 @@ struct dm_target {
 	 * whether or not its underlying devices have support.
 	 */
 	bool discards_supported:1;
-
-	/*
-	 * Set if the target required discard bios to be split
-	 * on max_io_len boundary.
-	 */
-	bool split_discard_bios:1;
 };
 
 /* Each target can link one of these into the table */
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/8] dm: refactor start_io_acct and end_io_acct
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
  2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
  2019-02-19 22:17 ` [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-19 22:17 ` [PATCH 4/8] dm: implement noclone optimization for bio-based Mike Snitzer
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

This refactoring of start_io_acct() and end_io_acct() eliminates their
dependency on 'struct dm_io' which will not be created for all IO
(following commit introduces the ability for targets to avoid cloning
bios for READ or WRITE bios).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 68 +++++++++++++++++++++++++--------------------------------
 1 file changed, 30 insertions(+), 38 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b988e178a523..1b87d20041e7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -580,7 +580,19 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	return r;
 }
 
-static void start_io_acct(struct dm_io *io);
+static void start_io_acct(struct mapped_device *md, struct bio *bio)
+{
+	generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), &dm_disk(md)->part0);
+}
+
+static void end_io_acct(struct mapped_device *md, struct bio *bio, unsigned long start_time)
+{
+	generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, start_time);
+
+	/* nudge anyone waiting on suspend queue */
+	if (unlikely(wq_has_sleeper(&md->wait)))
+		wake_up(&md->wait);
+}
 
 static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 {
@@ -604,7 +616,14 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	io->md = md;
 	spin_lock_init(&io->endio_lock);
 
-	start_io_acct(io);
+	io->start_time = jiffies;
+
+	start_io_acct(md, bio);
+
+	if (unlikely(dm_stats_used(&md->stats)))
+		dm_stats_account_io(&md->stats, bio_data_dir(bio),
+				    bio->bi_iter.bi_sector, bio_sectors(bio),
+				    false, 0, &io->stats_aux);
 
 	return io;
 }
@@ -668,41 +687,6 @@ static bool md_in_flight(struct mapped_device *md)
 		return md_in_flight_bios(md);
 }
 
-static void start_io_acct(struct dm_io *io)
-{
-	struct mapped_device *md = io->md;
-	struct bio *bio = io->orig_bio;
-
-	io->start_time = jiffies;
-
-	generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
-			      &dm_disk(md)->part0);
-
-	if (unlikely(dm_stats_used(&md->stats)))
-		dm_stats_account_io(&md->stats, bio_data_dir(bio),
-				    bio->bi_iter.bi_sector, bio_sectors(bio),
-				    false, 0, &io->stats_aux);
-}
-
-static void end_io_acct(struct dm_io *io)
-{
-	struct mapped_device *md = io->md;
-	struct bio *bio = io->orig_bio;
-	unsigned long duration = jiffies - io->start_time;
-
-	generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
-			    io->start_time);
-
-	if (unlikely(dm_stats_used(&md->stats)))
-		dm_stats_account_io(&md->stats, bio_data_dir(bio),
-				    bio->bi_iter.bi_sector, bio_sectors(bio),
-				    true, duration, &io->stats_aux);
-
-	/* nudge anyone waiting on suspend queue */
-	if (unlikely(wq_has_sleeper(&md->wait)))
-		wake_up(&md->wait);
-}
-
 /*
  * Add the bio to the list of deferred io.
  */
@@ -941,7 +925,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
 
 		io_error = io->status;
 		bio = io->orig_bio;
-		end_io_acct(io);
+
+		if (unlikely(dm_stats_used(&md->stats))) {
+			unsigned long duration = jiffies - io->start_time;
+			dm_stats_account_io(&md->stats, bio_data_dir(bio),
+					    bio->bi_iter.bi_sector, bio_sectors(bio),
+					    true, duration, &io->stats_aux);
+		}
+
+		end_io_acct(md, bio, io->start_time);
 		free_io(md, io);
 
 		if (io_error == BLK_STS_DM_REQUEUE)
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/8] dm: implement noclone optimization for bio-based
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
                   ` (2 preceding siblings ...)
  2019-02-19 22:17 ` [PATCH 3/8] dm: refactor start_io_acct and end_io_acct Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-19 22:17 ` [PATCH 5/8] dm: improve noclone bio support Mike Snitzer
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

From: Mikulas Patocka <mpatocka@redhat.com>

Until now bio-based DM core has always cloned an incoming bio, remapped
the clone bio to a low layer, dealt with the clone's completion and then
finally completed the original bio.  This cloning can be avoided for
READ and WRITE bios if the target opts-in by setting ti->no_clone.

Avoiding cloning for READ and WRITE bios improves performance of targets
that do very little work in response to each bio (e.g. dm-linear and
dm-striped).

The improvement is accomplished by changing DM core to allocate a
'dm_noclone' structure (that is quite small) instead of cloning the bio.
The bio's bi_end_io and bi_private are saved in the 'dm_noclone' before
they are overwritten and the bio passed to the lower block device.

When the bio is finished, the function noclone_endio restores the values
bi_end_io and bi_private and passes the bio to the original bi_end_io
function.

If the allocation of the 'struct dm_noclone' fails then bio-based DM
falls back to the traditional bio cloning IO path that is backed my
mempool reservations.

Performance improvement for dm-linear:

x86-64, 2x six-core
/dev/ram0					2449MiB/s
/dev/mapper/lin 5.0-rc without optimization	1970MiB/s
/dev/mapper/lin 5.0-rc with optimization	2238MiB/s

arm64, quad core:
/dev/ram0					457MiB/s
/dev/mapper/lin 5.0-rc without optimization	325MiB/s
/dev/mapper/lin 5.0-rc with optimization	364MiB/s

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-core.h          |  1 +
 drivers/md/dm-linear.c        |  1 +
 drivers/md/dm-stripe.c        |  1 +
 drivers/md/dm-table.c         | 11 +++++++
 drivers/md/dm-zero.c          |  1 +
 drivers/md/dm.c               | 71 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/dm.h               |  1 +
 include/linux/device-mapper.h |  5 +++
 8 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 95c6d86ab5e8..b4832bba9d64 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -87,6 +87,7 @@ struct mapped_device {
 	 */
 	struct bio_set io_bs;
 	struct bio_set bs;
+	struct kmem_cache *noclone_cache;
 
 	/*
 	 * Processing queue (flush)
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ad980a38fb1e..6e1df9fdfcc8 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->num_secure_erase_bios = 1;
 	ti->num_write_same_bios = 1;
 	ti->num_write_zeroes_bios = 1;
+	ti->no_clone = true;
 	ti->private = lc;
 	return 0;
 
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 8547d7594338..32181b7ca34a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -172,6 +172,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->num_secure_erase_bios = stripes;
 	ti->num_write_same_bios = stripes;
 	ti->num_write_zeroes_bios = stripes;
+	ti->no_clone = true;
 
 	sc->chunk_size = chunk_size;
 	if (chunk_size & (chunk_size - 1))
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4b1be754cc41..6a3e23faeb7d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -47,6 +47,7 @@ struct dm_table {
 
 	bool integrity_supported:1;
 	bool singleton:1;
+	bool no_clone:1;
 	unsigned integrity_added:1;
 
 	/*
@@ -191,6 +192,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 	if (!t)
 		return -ENOMEM;
 
+	t->no_clone = true;
+
 	INIT_LIST_HEAD(&t->devices);
 	INIT_LIST_HEAD(&t->target_callbacks);
 
@@ -789,6 +792,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 	if (r)
 		goto bad;
 
+	if (!tgt->no_clone)
+		t->no_clone = false;
+
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
 	if (!tgt->num_discard_bios && tgt->discards_supported)
@@ -1376,6 +1382,11 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
 	return 0;
 }
 
+bool dm_table_supports_noclone(struct dm_table *table)
+{
+	return table->no_clone;
+}
+
 /*
  * Check whether a table has no data devices attached using each
  * target's iterate_devices method.
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b65ca8dcfbdc..436a5ee89698 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -26,6 +26,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	 * Silently drop discards, avoiding -EOPNOTSUPP.
 	 */
 	ti->num_discard_bios = 1;
+	ti->no_clone = true;
 
 	return 0;
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1b87d20041e7..cbda11b34635 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -158,8 +158,16 @@ struct table_device {
 	struct dm_dev dm_dev;
 };
 
+struct dm_noclone {
+	struct mapped_device *md;
+	bio_end_io_t *orig_bi_end_io;
+	void *orig_bi_private;
+	unsigned long start_time;
+};
+
 static struct kmem_cache *_rq_tio_cache;
 static struct kmem_cache *_rq_cache;
+static struct kmem_cache *_noclone_cache;
 
 /*
  * Bio-based DM's mempools' reserved IOs set by the user.
@@ -233,9 +241,13 @@ static int __init local_init(void)
 	if (!_rq_cache)
 		goto out_free_rq_tio_cache;
 
+	_noclone_cache = KMEM_CACHE(dm_noclone, 0);
+	if (!_rq_tio_cache)
+		goto out_free_rq_cache;
+
 	r = dm_uevent_init();
 	if (r)
-		goto out_free_rq_cache;
+		goto out_free_noclone_cache;
 
 	deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
 	if (!deferred_remove_workqueue) {
@@ -257,6 +269,8 @@ static int __init local_init(void)
 	destroy_workqueue(deferred_remove_workqueue);
 out_uevent_exit:
 	dm_uevent_exit();
+out_free_noclone_cache:
+	kmem_cache_destroy(_noclone_cache);
 out_free_rq_cache:
 	kmem_cache_destroy(_rq_cache);
 out_free_rq_tio_cache:
@@ -270,6 +284,7 @@ static void local_exit(void)
 	flush_scheduled_work();
 	destroy_workqueue(deferred_remove_workqueue);
 
+	kmem_cache_destroy(_noclone_cache);
 	kmem_cache_destroy(_rq_cache);
 	kmem_cache_destroy(_rq_tio_cache);
 	unregister_blkdev(_major, _name);
@@ -1009,6 +1024,20 @@ static void clone_endio(struct bio *bio)
 	dec_pending(io, error);
 }
 
+static void noclone_endio(struct bio *bio)
+{
+	struct dm_noclone *noclone = bio->bi_private;
+	struct mapped_device *md = noclone->md;
+
+	end_io_acct(md, bio, noclone->start_time);
+
+	bio->bi_end_io = noclone->orig_bi_end_io;
+	bio->bi_private = noclone->orig_bi_private;
+	kmem_cache_free(_noclone_cache, noclone);
+
+	bio_endio(bio);
+}
+
 /*
  * Return maximum size of I/O possible at the supplied sector up to the current
  * target boundary.
@@ -1774,8 +1803,48 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 		return ret;
 	}
 
+	if (dm_table_supports_noclone(map) &&
+	    (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
+	    likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
+	    !bio_flagged(bio, BIO_CHAIN) &&
+	    likely(!bio_integrity(bio)) &&
+	    likely(!dm_stats_used(&md->stats))) {
+		int r;
+		struct dm_noclone *noclone;
+		struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+		if (unlikely(!dm_target_is_valid(ti)))
+			goto no_fast_path;
+		if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
+			goto no_fast_path;
+		noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT);
+		if (unlikely(!noclone))
+			goto no_fast_path;
+		noclone->md = md;
+		noclone->start_time = jiffies;
+		noclone->orig_bi_end_io = bio->bi_end_io;
+		noclone->orig_bi_private = bio->bi_private;
+		bio->bi_end_io = noclone_endio;
+		bio->bi_private = noclone;
+		start_io_acct(md, bio);
+		r = ti->type->map(ti, bio);
+		ret = BLK_QC_T_NONE;
+		if (likely(r == DM_MAPIO_REMAPPED)) {
+			ret = generic_make_request(bio);
+		} else if (likely(r == DM_MAPIO_SUBMITTED)) {
+		} else if (r == DM_MAPIO_KILL) {
+			bio->bi_status = BLK_STS_IOERR;
+			noclone_endio(bio);
+		} else {
+			DMWARN("unimplemented target map return value: %d", r);
+			BUG();
+		}
+		goto put_table_ret;
+	}
+
+no_fast_path:
 	ret = dm_process_bio(md, map, bio);
 
+put_table_ret:
 	dm_put_live_table(md, srcu_idx);
 	return ret;
 }
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 2d539b82ec08..c3c78123dfb3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -53,6 +53,7 @@ void dm_table_event_callback(struct dm_table *t,
 			     void (*fn)(void *), void *context);
 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+bool dm_table_supports_noclone(struct dm_table *t);
 bool dm_table_has_no_data_devices(struct dm_table *table);
 int dm_calculate_queue_limits(struct dm_table *table,
 			      struct queue_limits *limits);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0f5b3d7c6cb3..4ab2b0f53ae8 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,6 +315,11 @@ struct dm_target {
 	 * whether or not its underlying devices have support.
 	 */
 	bool discards_supported:1;
+
+	/*
+	 * The target can process bios without cloning them.
+	 */
+	bool no_clone:1;
 };
 
 /* Each target can link one of these into the table */
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/8] dm: improve noclone bio support
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
                   ` (3 preceding siblings ...)
  2019-02-19 22:17 ` [PATCH 4/8] dm: implement noclone optimization for bio-based Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-19 22:17 ` [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio Mike Snitzer
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

Leverage fact that dm_queue_split() enables use of noclone support even
for targets that require additional splitting (via ti->max_io_len).

To achieve this move noclone processing from dm_make_request() to
dm_process_bio() and check if bio->bi_end_io is already set to
noclone_endio.  This also fixes dm_wq_work() to be able to support
noclone bios (because it calls dm_process_bio()).

While at it, clean up ->map() return processing to be comparable to
__map_bio() and add some code comments that explain why certain
conditionals exist to prevent the use of noclone.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 102 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 61 insertions(+), 41 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index cbda11b34635..6868b80fc70c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1769,14 +1769,74 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 	 * If in ->make_request_fn we need to use blk_queue_split(), otherwise
 	 * queue_limits for abnormal requests (e.g. discard, writesame, etc)
 	 * won't be imposed.
+	 *
+	 * For normal READ and WRITE requests we benefit from upfront splitting
+	 * via dm_queue_split() because we can then leverage noclone support below.
 	 */
-	if (current->bio_list) {
+	if (current->bio_list && (bio->bi_end_io != noclone_endio)) {
+		/*
+		 * It is fine to use {blk,dm}_queue_split() before opting to use noclone
+		 * because any ->bi_private and ->bi_end_io will get saved off below.
+		 * Once noclone_endio() is called the bio_chain's endio will kick in.
+		 * - __split_and_process_bio() can split further, but any targets that
+		 *   require late _DM_ initiated splitting (e.g. dm_accept_partial_bio()
+		 *   callers) shouldn't set ti->no_clone.
+		 */
 		if (is_abnormal_io(bio))
 			blk_queue_split(md->queue, &bio);
 		else
 			dm_queue_split(md, ti, &bio);
 	}
 
+	if (dm_table_supports_noclone(map) &&
+	    (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
+	    likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
+	    likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
+	    likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
+		int r;
+		struct dm_noclone *noclone;
+
+		/* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
+		if (bio->bi_end_io != noclone_endio) {
+			noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT);
+			if (unlikely(!noclone))
+				goto no_fast_path;
+
+			noclone->md = md;
+			noclone->start_time = jiffies;
+			noclone->orig_bi_end_io = bio->bi_end_io;
+			noclone->orig_bi_private = bio->bi_private;
+			bio->bi_end_io = noclone_endio;
+			bio->bi_private = noclone;
+		}
+
+		start_io_acct(md, bio);
+		r = ti->type->map(ti, bio);
+		switch (r) {
+		case DM_MAPIO_SUBMITTED:
+			break;
+		case DM_MAPIO_REMAPPED:
+			/* the bio has been remapped so dispatch it */
+			if (md->type == DM_TYPE_NVME_BIO_BASED)
+				ret = direct_make_request(bio);
+			else
+				ret = generic_make_request(bio);
+			break;
+		case DM_MAPIO_KILL:
+			bio_io_error(bio);
+			break;
+		case DM_MAPIO_REQUEUE:
+			bio->bi_status = BLK_STS_DM_REQUEUE;
+			noclone_endio(bio);
+			break;
+		default:
+			DMWARN("unimplemented target map return value: %d", r);
+			BUG();
+		}
+
+		return ret;
+	}
+no_fast_path:
 	if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
 		return __process_bio(md, map, bio, ti);
 	else
@@ -1803,48 +1863,8 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
 		return ret;
 	}
 
-	if (dm_table_supports_noclone(map) &&
-	    (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
-	    likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
-	    !bio_flagged(bio, BIO_CHAIN) &&
-	    likely(!bio_integrity(bio)) &&
-	    likely(!dm_stats_used(&md->stats))) {
-		int r;
-		struct dm_noclone *noclone;
-		struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
-		if (unlikely(!dm_target_is_valid(ti)))
-			goto no_fast_path;
-		if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
-			goto no_fast_path;
-		noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT);
-		if (unlikely(!noclone))
-			goto no_fast_path;
-		noclone->md = md;
-		noclone->start_time = jiffies;
-		noclone->orig_bi_end_io = bio->bi_end_io;
-		noclone->orig_bi_private = bio->bi_private;
-		bio->bi_end_io = noclone_endio;
-		bio->bi_private = noclone;
-		start_io_acct(md, bio);
-		r = ti->type->map(ti, bio);
-		ret = BLK_QC_T_NONE;
-		if (likely(r == DM_MAPIO_REMAPPED)) {
-			ret = generic_make_request(bio);
-		} else if (likely(r == DM_MAPIO_SUBMITTED)) {
-		} else if (r == DM_MAPIO_KILL) {
-			bio->bi_status = BLK_STS_IOERR;
-			noclone_endio(bio);
-		} else {
-			DMWARN("unimplemented target map return value: %d", r);
-			BUG();
-		}
-		goto put_table_ret;
-	}
-
-no_fast_path:
 	ret = dm_process_bio(md, map, bio);
 
-put_table_ret:
 	dm_put_live_table(md, srcu_idx);
 	return ret;
 }
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
                   ` (4 preceding siblings ...)
  2019-02-19 22:17 ` [PATCH 5/8] dm: improve noclone bio support Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
  2019-02-19 22:17 ` [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data Mike Snitzer
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

A DM target can manage its own per-bio-data objects and attach them to
the new 'noclone_private' member in struct dm_noclone.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c               | 16 ++++++++--------
 include/linux/device-mapper.h | 17 +++++++++++++++++
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6868b80fc70c..8ff0ced278d7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -158,13 +158,6 @@ struct table_device {
 	struct dm_dev dm_dev;
 };
 
-struct dm_noclone {
-	struct mapped_device *md;
-	bio_end_io_t *orig_bi_end_io;
-	void *orig_bi_private;
-	unsigned long start_time;
-};
-
 static struct kmem_cache *_rq_tio_cache;
 static struct kmem_cache *_rq_cache;
 static struct kmem_cache *_noclone_cache;
@@ -1802,13 +1795,15 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 			if (unlikely(!noclone))
 				goto no_fast_path;
 
+			noclone->magic = DM_NOCLONE_MAGIC;
 			noclone->md = md;
 			noclone->start_time = jiffies;
 			noclone->orig_bi_end_io = bio->bi_end_io;
 			noclone->orig_bi_private = bio->bi_private;
 			bio->bi_end_io = noclone_endio;
 			bio->bi_private = noclone;
-		}
+		} else
+			noclone = bio->bi_private;
 
 		start_io_acct(md, bio);
 		r = ti->type->map(ti, bio);
@@ -1822,6 +1817,11 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 			else
 				ret = generic_make_request(bio);
 			break;
+		case DM_MAPIO_NOCLONE_FAILED:
+			end_io_acct(md, bio, noclone->start_time);
+			kmem_cache_free(_noclone_cache, noclone);
+			goto no_fast_path;
+			break;
 		case DM_MAPIO_KILL:
 			bio_io_error(bio);
 			break;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 4ab2b0f53ae8..b3e33d5cae8b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -322,6 +322,22 @@ struct dm_target {
 	bool no_clone:1;
 };
 
+#define DM_NOCLONE_MAGIC 9693664
+struct dm_noclone {
+	unsigned magic;
+	struct mapped_device *md;
+	bio_end_io_t *orig_bi_end_io;
+	void *orig_bi_private;
+	void *noclone_private;
+	unsigned long start_time;
+};
+
+static inline struct dm_noclone *dm_get_noclone_from_bio(struct bio *bio)
+{
+	struct dm_noclone *noclone = bio->bi_private;
+	return (noclone && noclone->magic == DM_NOCLONE_MAGIC) ? noclone : NULL;
+}
+
 /* Each target can link one of these into the table */
 struct dm_target_callbacks {
 	struct list_head list;
@@ -572,6 +588,7 @@ do {									\
 #define DM_MAPIO_REQUEUE	DM_ENDIO_REQUEUE
 #define DM_MAPIO_DELAY_REQUEUE	DM_ENDIO_DELAY_REQUEUE
 #define DM_MAPIO_KILL		4
+#define DM_MAPIO_NOCLONE_FAILED	5
 
 #define dm_sector_div64(x, y)( \
 { \
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 7/8] dm: improve noclone_endio() to support multipath target
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
                   ` (5 preceding siblings ...)
  2019-02-19 22:17 ` [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  2019-02-20 15:08   ` Mikulas Patocka
  2019-02-19 22:17 ` [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data Mike Snitzer
  7 siblings, 1 reply; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

noclone_endio() must call the target's ->endio method if it exists.
Also must handle the various DM_ENDIO_* returns that are possible.

Factor out dm_bio_pushback_needed() for both dec_pending() and
noclone_endio() to use when handling BLK_STS_DM_REQUEUE.

Lastly, there is no need to conditionally set md->immutable_target in
__bind().  If the device only uses a single immutable target then it
should always be reflected in md->immutable_target.  This is important
because noclone_endio() benefits from this to get access to the
multipath dm_target without needing to add another member to the
'dm_noclone' structure.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 77 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 22 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8ff0ced278d7..2299f946c175 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -897,6 +897,28 @@ static int __noflush_suspending(struct mapped_device *md)
 	return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 }
 
+static bool dm_bio_pushback_needed(struct mapped_device *md,
+				   struct bio *bio, blk_status_t *error)
+{
+	unsigned long flags;
+
+	/*
+	 * Target requested pushing back the I/O.
+	 */
+	if (__noflush_suspending(md)) {
+		spin_lock_irqsave(&md->deferred_lock, flags);
+		// FIXME: using bio_list_add_head() creates LIFO
+		bio_list_add_head(&md->deferred, bio);
+		spin_unlock_irqrestore(&md->deferred_lock, flags);
+		return true;
+	} else {
+		/* noflush suspend was interrupted. */
+		*error = BLK_STS_IOERR;
+	}
+
+	return false;
+}
+
 /*
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
@@ -917,19 +939,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
 	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
-		if (io->status == BLK_STS_DM_REQUEUE) {
-			/*
-			 * Target requested pushing back the I/O.
-			 */
-			spin_lock_irqsave(&md->deferred_lock, flags);
-			if (__noflush_suspending(md))
-				/* NOTE early return due to BLK_STS_DM_REQUEUE below */
-				bio_list_add_head(&md->deferred, io->orig_bio);
-			else
-				/* noflush suspend was interrupted. */
-				io->status = BLK_STS_IOERR;
-			spin_unlock_irqrestore(&md->deferred_lock, flags);
-		}
+		if (unlikely(io->status == BLK_STS_DM_REQUEUE))
+			(void) dm_bio_pushback_needed(md, bio, &io->status);
 
 		io_error = io->status;
 		bio = io->orig_bio;
@@ -1019,8 +1030,33 @@ static void clone_endio(struct bio *bio)
 
 static void noclone_endio(struct bio *bio)
 {
+	blk_status_t error = bio->bi_status;
 	struct dm_noclone *noclone = bio->bi_private;
 	struct mapped_device *md = noclone->md;
+	struct dm_target *ti = NULL;
+	dm_endio_fn endio = NULL;
+
+	if (md->immutable_target) {
+		ti = md->immutable_target;
+		endio = ti->type->end_io;
+	}
+
+	if (endio) {
+		int r = endio(ti, bio, &error);
+		switch (r) {
+		case DM_ENDIO_REQUEUE:
+			error = BLK_STS_DM_REQUEUE;
+			/*FALLTHRU*/
+		case DM_ENDIO_DONE:
+			break;
+		case DM_ENDIO_INCOMPLETE:
+			/* The target will handle the io */
+			return;
+		default:
+			DMWARN("unimplemented target endio return value: %d", r);
+			BUG();
+		}
+	}
 
 	end_io_acct(md, bio, noclone->start_time);
 
@@ -1028,6 +1064,11 @@ static void noclone_endio(struct bio *bio)
 	bio->bi_private = noclone->orig_bi_private;
 	kmem_cache_free(_noclone_cache, noclone);
 
+	if (unlikely(error == BLK_STS_DM_REQUEUE)) {
+		if (dm_bio_pushback_needed(md, bio, &bio->bi_status))
+			return;
+	}
+
 	bio_endio(bio);
 }
 
@@ -2229,15 +2270,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 	if (request_based)
 		dm_stop_queue(q);
 
-	if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
-		/*
-		 * Leverage the fact that request-based DM targets and
-		 * NVMe bio based targets are immutable singletons
-		 * - used to optimize both dm_request_fn and dm_mq_queue_rq;
-		 *   and __process_bio.
-		 */
-		md->immutable_target = dm_table_get_immutable_target(t);
-	}
+	md->immutable_target = dm_table_get_immutable_target(t);
 
 	ret = __bind_mempools(md, t);
 	if (ret) {
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data
  2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
                   ` (6 preceding siblings ...)
  2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
  7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
  To: dm-devel

Update multipath_init_per_bio_data() to allocate a dm_mpath_per_bio_data
structure and associate it with the noclone bio using the dm_noclone
structure's 'noclone_private' member.

If kmem_cache_alloc() fails to allocate a struct dm_mpath_per_bio_data,
DM_MAPIO_NOCLONE_FAILED is returned from ->map and DM core falls back to
using traditional bio-based DM's bio cloning.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 65 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2ee5e357a0a7..ac7ca5334796 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -30,6 +30,8 @@
 #define DM_PG_INIT_DELAY_MSECS 2000
 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
 
+static struct kmem_cache *_noclone_per_bio_data_cache;
+
 /* Path properties */
 struct pgpath {
 	struct list_head list;
@@ -101,6 +103,11 @@ struct dm_mpath_io {
 	size_t nr_bytes;
 };
 
+struct dm_mpath_per_bio_data {
+	struct dm_mpath_io mpio;
+	struct dm_bio_details bio_details;
+};
+
 typedef int (*action_fn) (struct pgpath *pgpath);
 
 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -250,11 +257,16 @@ static struct dm_mpath_io *get_mpio(union map_info *info)
 
 static size_t multipath_per_bio_data_size(void)
 {
-	return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
+	return sizeof(struct dm_mpath_per_bio_data);
 }
 
 static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
 {
+	struct dm_noclone *noclone = dm_get_noclone_from_bio(bio);
+
+	if (noclone)
+		return noclone->noclone_private;
+
 	return dm_per_bio_data(bio, multipath_per_bio_data_size());
 }
 
@@ -265,16 +277,29 @@ static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio
 	return bio_details;
 }
 
-static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p)
+static bool multipath_init_per_bio_data(struct dm_target *ti, struct bio *bio,
+					struct dm_mpath_io **mpio_p)
 {
-	struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
-	struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio);
+	struct dm_mpath_io *mpio;
+	struct dm_bio_details *bio_details;
+	struct dm_noclone *noclone = dm_get_noclone_from_bio(bio);
+
+	if (noclone) {
+		noclone->noclone_private =
+			kmem_cache_alloc(_noclone_per_bio_data_cache, GFP_NOWAIT);
+		if (unlikely(!noclone->noclone_private))
+			return false;
+	}
+
+	mpio = get_mpio_from_bio(bio);
+	bio_details = get_bio_details_from_mpio(mpio);
 
 	mpio->nr_bytes = bio->bi_iter.bi_size;
 	mpio->pgpath = NULL;
 	*mpio_p = mpio;
 
 	dm_bio_record(bio_details, bio);
+	return true;
 }
 
 /*-----------------------------------------------
@@ -652,7 +677,9 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
 	struct multipath *m = ti->private;
 	struct dm_mpath_io *mpio = NULL;
 
-	multipath_init_per_bio_data(bio, &mpio);
+	if (!multipath_init_per_bio_data(ti, bio, &mpio))
+		return DM_MAPIO_NOCLONE_FAILED;
+
 	return __multipath_map_bio(m, bio, mpio);
 }
 
@@ -1178,9 +1205,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->num_discard_bios = 1;
 	ti->num_write_same_bios = 1;
 	ti->num_write_zeroes_bios = 1;
-	if (m->queue_mode == DM_TYPE_BIO_BASED)
+	if (m->queue_mode == DM_TYPE_BIO_BASED) {
+		ti->no_clone = true;
 		ti->per_io_data_size = multipath_per_bio_data_size();
-	else
+	} else
 		ti->per_io_data_size = sizeof(struct dm_mpath_io);
 
 	return 0;
@@ -1584,12 +1612,13 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *bio,
 				blk_status_t *error)
 {
 	struct multipath *m = ti->private;
-	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+	struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
 	struct pgpath *pgpath = mpio->pgpath;
+	struct dm_noclone *noclone = NULL;
 	unsigned long flags;
 	int r = DM_ENDIO_DONE;
 
@@ -1611,7 +1640,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
 	}
 
 	spin_lock_irqsave(&m->lock, flags);
-	bio_list_add(&m->queued_bios, clone);
+	bio_list_add(&m->queued_bios, bio);
 	spin_unlock_irqrestore(&m->lock, flags);
 	if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
 		queue_work(kmultipathd, &m->process_queued_bios);
@@ -1625,6 +1654,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
 			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
 	}
 
+	if (r != DM_ENDIO_INCOMPLETE) {
+		noclone = dm_get_noclone_from_bio(bio);
+		if (noclone)
+			kmem_cache_free(_noclone_per_bio_data_cache, noclone->noclone_private);
+	}
+
 	return r;
 }
 
@@ -2057,6 +2092,13 @@ static int __init dm_multipath_init(void)
 		goto bad_alloc_kmpath_handlerd;
 	}
 
+	_noclone_per_bio_data_cache = KMEM_CACHE(dm_mpath_per_bio_data, 0);
+	if (!_noclone_per_bio_data_cache) {
+		DMERR("failed to create noclone_per_bio_data_cache");
+		r = -ENOMEM;
+		goto bad_noclone_pbd;
+	}
+
 	r = dm_register_target(&multipath_target);
 	if (r < 0) {
 		DMERR("request-based register failed %d", r);
@@ -2067,6 +2109,8 @@ static int __init dm_multipath_init(void)
 	return 0;
 
 bad_register_target:
+	kmem_cache_destroy(_noclone_per_bio_data_cache);
+bad_noclone_pbd:
 	destroy_workqueue(kmpath_handlerd);
 bad_alloc_kmpath_handlerd:
 	destroy_workqueue(kmultipathd);
@@ -2078,6 +2122,7 @@ static void __exit dm_multipath_exit(void)
 {
 	destroy_workqueue(kmpath_handlerd);
 	destroy_workqueue(kmultipathd);
+	kmem_cache_destroy(_noclone_per_bio_data_cache);
 
 	dm_unregister_target(&multipath_target);
 }
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 7/8] dm: improve noclone_endio() to support multipath target
  2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
@ 2019-02-20 15:08   ` Mikulas Patocka
  2019-02-20 15:14     ` Mike Snitzer
  0 siblings, 1 reply; 11+ messages in thread
From: Mikulas Patocka @ 2019-02-20 15:08 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel



On Tue, 19 Feb 2019, Mike Snitzer wrote:

> noclone_endio() must call the target's ->endio method if it exists.
> Also must handle the various DM_ENDIO_* returns that are possible.
> 
> Factor out dm_bio_pushback_needed() for both dec_pending() and
> noclone_endio() to use when handling BLK_STS_DM_REQUEUE.
> 
> Lastly, there is no need to conditionally set md->immutable_target in
> __bind().  If the device only uses a single immutable target then it
> should always be reflected in md->immutable_target.  This is important
> because noclone_endio() benefits from this to get access to the
> multipath dm_target without needing to add another member to the
> 'dm_noclone' structure.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm.c | 77 ++++++++++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 55 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 8ff0ced278d7..2299f946c175 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -897,6 +897,28 @@ static int __noflush_suspending(struct mapped_device *md)
>  	return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
>  }
>  
> +static bool dm_bio_pushback_needed(struct mapped_device *md,
> +				   struct bio *bio, blk_status_t *error)
> +{
> +	unsigned long flags;
> +
> +	/*
> +	 * Target requested pushing back the I/O.
> +	 */
> +	if (__noflush_suspending(md)) {
> +		spin_lock_irqsave(&md->deferred_lock, flags);
> +		// FIXME: using bio_list_add_head() creates LIFO
> +		bio_list_add_head(&md->deferred, bio);
> +		spin_unlock_irqrestore(&md->deferred_lock, flags);
> +		return true;
> +	} else {
> +		/* noflush suspend was interrupted. */
> +		*error = BLK_STS_IOERR;
> +	}
> +
> +	return false;
> +}
> +
>  /*
>   * Decrements the number of outstanding ios that a bio has been
>   * cloned into, completing the original io if necc.
> @@ -917,19 +939,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
>  	}
>  
>  	if (atomic_dec_and_test(&io->io_count)) {
> -		if (io->status == BLK_STS_DM_REQUEUE) {
> -			/*
> -			 * Target requested pushing back the I/O.
> -			 */
> -			spin_lock_irqsave(&md->deferred_lock, flags);
> -			if (__noflush_suspending(md))
> -				/* NOTE early return due to BLK_STS_DM_REQUEUE below */
> -				bio_list_add_head(&md->deferred, io->orig_bio);
> -			else
> -				/* noflush suspend was interrupted. */
> -				io->status = BLK_STS_IOERR;
> -			spin_unlock_irqrestore(&md->deferred_lock, flags);
> -		}
> +		if (unlikely(io->status == BLK_STS_DM_REQUEUE))
> +			(void) dm_bio_pushback_needed(md, bio, &io->status);

This triggers compiler warning because the variable bio is uninitialized 
here.

Mikulas

>  
>  		io_error = io->status;
>  		bio = io->orig_bio;
> @@ -1019,8 +1030,33 @@ static void clone_endio(struct bio *bio)
>  
>  static void noclone_endio(struct bio *bio)
>  {
> +	blk_status_t error = bio->bi_status;
>  	struct dm_noclone *noclone = bio->bi_private;
>  	struct mapped_device *md = noclone->md;
> +	struct dm_target *ti = NULL;
> +	dm_endio_fn endio = NULL;
> +
> +	if (md->immutable_target) {
> +		ti = md->immutable_target;
> +		endio = ti->type->end_io;
> +	}
> +
> +	if (endio) {
> +		int r = endio(ti, bio, &error);
> +		switch (r) {
> +		case DM_ENDIO_REQUEUE:
> +			error = BLK_STS_DM_REQUEUE;
> +			/*FALLTHRU*/
> +		case DM_ENDIO_DONE:
> +			break;
> +		case DM_ENDIO_INCOMPLETE:
> +			/* The target will handle the io */
> +			return;
> +		default:
> +			DMWARN("unimplemented target endio return value: %d", r);
> +			BUG();
> +		}
> +	}
>  
>  	end_io_acct(md, bio, noclone->start_time);
>  
> @@ -1028,6 +1064,11 @@ static void noclone_endio(struct bio *bio)
>  	bio->bi_private = noclone->orig_bi_private;
>  	kmem_cache_free(_noclone_cache, noclone);
>  
> +	if (unlikely(error == BLK_STS_DM_REQUEUE)) {
> +		if (dm_bio_pushback_needed(md, bio, &bio->bi_status))
> +			return;
> +	}
> +
>  	bio_endio(bio);
>  }
>  
> @@ -2229,15 +2270,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
>  	if (request_based)
>  		dm_stop_queue(q);
>  
> -	if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
> -		/*
> -		 * Leverage the fact that request-based DM targets and
> -		 * NVMe bio based targets are immutable singletons
> -		 * - used to optimize both dm_request_fn and dm_mq_queue_rq;
> -		 *   and __process_bio.
> -		 */
> -		md->immutable_target = dm_table_get_immutable_target(t);
> -	}
> +	md->immutable_target = dm_table_get_immutable_target(t);
>  
>  	ret = __bind_mempools(md, t);
>  	if (ret) {
> -- 
> 2.15.0
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 7/8] dm: improve noclone_endio() to support multipath target
  2019-02-20 15:08   ` Mikulas Patocka
@ 2019-02-20 15:14     ` Mike Snitzer
  0 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-20 15:14 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel

On Wed, Feb 20 2019 at 10:08am -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> 
> 
> On Tue, 19 Feb 2019, Mike Snitzer wrote:
> 
> > noclone_endio() must call the target's ->endio method if it exists.
> > Also must handle the various DM_ENDIO_* returns that are possible.
> > 
> > Factor out dm_bio_pushback_needed() for both dec_pending() and
> > noclone_endio() to use when handling BLK_STS_DM_REQUEUE.
> > 
> > Lastly, there is no need to conditionally set md->immutable_target in
> > __bind().  If the device only uses a single immutable target then it
> > should always be reflected in md->immutable_target.  This is important
> > because noclone_endio() benefits from this to get access to the
> > multipath dm_target without needing to add another member to the
> > 'dm_noclone' structure.
> > 
> > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > ---
> >  drivers/md/dm.c | 77 ++++++++++++++++++++++++++++++++++++++++-----------------
> >  1 file changed, 55 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> > index 8ff0ced278d7..2299f946c175 100644
> > --- a/drivers/md/dm.c
> > +++ b/drivers/md/dm.c
> > @@ -897,6 +897,28 @@ static int __noflush_suspending(struct mapped_device *md)
> >  	return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
> >  }
> >  
> > +static bool dm_bio_pushback_needed(struct mapped_device *md,
> > +				   struct bio *bio, blk_status_t *error)
> > +{
> > +	unsigned long flags;
> > +
> > +	/*
> > +	 * Target requested pushing back the I/O.
> > +	 */
> > +	if (__noflush_suspending(md)) {
> > +		spin_lock_irqsave(&md->deferred_lock, flags);
> > +		// FIXME: using bio_list_add_head() creates LIFO
> > +		bio_list_add_head(&md->deferred, bio);
> > +		spin_unlock_irqrestore(&md->deferred_lock, flags);
> > +		return true;
> > +	} else {
> > +		/* noflush suspend was interrupted. */
> > +		*error = BLK_STS_IOERR;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> >  /*
> >   * Decrements the number of outstanding ios that a bio has been
> >   * cloned into, completing the original io if necc.
> > @@ -917,19 +939,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
> >  	}
> >  
> >  	if (atomic_dec_and_test(&io->io_count)) {
> > -		if (io->status == BLK_STS_DM_REQUEUE) {
> > -			/*
> > -			 * Target requested pushing back the I/O.
> > -			 */
> > -			spin_lock_irqsave(&md->deferred_lock, flags);
> > -			if (__noflush_suspending(md))
> > -				/* NOTE early return due to BLK_STS_DM_REQUEUE below */
> > -				bio_list_add_head(&md->deferred, io->orig_bio);
> > -			else
> > -				/* noflush suspend was interrupted. */
> > -				io->status = BLK_STS_IOERR;
> > -			spin_unlock_irqrestore(&md->deferred_lock, flags);
> > -		}
> > +		if (unlikely(io->status == BLK_STS_DM_REQUEUE))
> > +			(void) dm_bio_pushback_needed(md, bio, &io->status);
> 
> This triggers compiler warning because the variable bio is uninitialized 
> here.

Right, I fixed it up in latest git, please see latest 'dm-5.1' branch:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/log/?h=dm-5.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-02-20 15:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
2019-02-19 22:17 ` [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
2019-02-19 22:17 ` [PATCH 3/8] dm: refactor start_io_acct and end_io_acct Mike Snitzer
2019-02-19 22:17 ` [PATCH 4/8] dm: implement noclone optimization for bio-based Mike Snitzer
2019-02-19 22:17 ` [PATCH 5/8] dm: improve noclone bio support Mike Snitzer
2019-02-19 22:17 ` [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio Mike Snitzer
2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
2019-02-20 15:08   ` Mikulas Patocka
2019-02-20 15:14     ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.