* [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far
@ 2019-02-19 22:17 Mike Snitzer
2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
` (7 more replies)
0 siblings, 8 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
I seized on Mikulas's noclone patches and took it upon myself to add
noclone support to bio-based dm-multipath. Pretty happy with how
things turned out but also open to all review/feedback.
I've verified that all mptest tests pass (with tcmloop, mptest's
nvmefcloop based test setup panics with 5.0-rc6.. I should probably
report and/or fix that).
I've yet to do the before vs after multipath benchmarking to see if
there is a performance win but I'm betting there will be (otherwise I
wouldn't have done this work). I'll backfill benchmarking results
later in the week.
Mike Snitzer (6):
dm: update dm_process_bio() to split bio if in ->make_request_fn()
dm: eliminate 'split_discard_bios' flag from DM target interface
dm: improve noclone bio support
dm: add the ability to attach per-bio-data to dm_noclone bio
dm: improve noclone_endio() to support multipath target
dm mpath: add support for dm_noclone and its per-bio-data
Mikulas Patocka (2):
dm: refactor start_io_acct and end_io_acct
dm: implement noclone optimization for bio-based
drivers/md/dm-cache-target.c | 1 -
drivers/md/dm-core.h | 1 +
drivers/md/dm-linear.c | 1 +
drivers/md/dm-mpath.c | 65 ++++++--
drivers/md/dm-raid.c | 14 +-
drivers/md/dm-stripe.c | 1 +
drivers/md/dm-table.c | 11 ++
drivers/md/dm-thin.c | 1 -
drivers/md/dm-zero.c | 1 +
drivers/md/dm-zoned-target.c | 1 -
drivers/md/dm.c | 357 +++++++++++++++++++++++++++++-------------
drivers/md/dm.h | 1 +
include/linux/device-mapper.h | 22 ++-
13 files changed, 346 insertions(+), 131 deletions(-)
--
2.15.0
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn()
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
` (6 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
Must call blk_queue_split() otherwise queue_limits for abnormal requests
(e.g. discard, writesame, etc) won't be imposed.
In addition, add dm_queue_split() to simplify DM specific splitting that
is needed for targets that impose ti->max_io_len.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 93 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 66 insertions(+), 27 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 515e6af9bed2..7a774fcd0194 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1533,6 +1533,22 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
}
+static bool is_abnormal_io(struct bio *bio)
+{
+ bool r = false;
+
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE_ZEROES:
+ r = true;
+ break;
+ }
+
+ return r;
+}
+
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
int *result)
{
@@ -1565,7 +1581,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
if (!dm_target_is_valid(ti))
return -EIO;
- if (unlikely(__process_abnormal_io(ci, ti, &r)))
+ if (__process_abnormal_io(ci, ti, &r))
return r;
len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
@@ -1601,13 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
- if (unlikely(!map)) {
- bio_io_error(bio);
- return ret;
- }
-
- blk_queue_split(md->queue, &bio);
-
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1675,18 +1684,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
* Optimized variant of __split_and_process_bio that leverages the
* fact that targets that use it do _not_ have a need to split bios.
*/
-static blk_qc_t __process_bio(struct mapped_device *md,
- struct dm_table *map, struct bio *bio)
+static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map,
+ struct bio *bio, struct dm_target *ti)
{
struct clone_info ci;
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
- if (unlikely(!map)) {
- bio_io_error(bio);
- return ret;
- }
-
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1704,21 +1708,11 @@ static blk_qc_t __process_bio(struct mapped_device *md,
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
} else {
- struct dm_target *ti = md->immutable_target;
struct dm_target_io *tio;
- /*
- * Defend against IO still getting in during teardown
- * - as was seen for a time with nvme-fcloop
- */
- if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
- error = -EIO;
- goto out;
- }
-
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
- if (unlikely(__process_abnormal_io(&ci, ti, &error)))
+ if (__process_abnormal_io(&ci, ti, &error))
goto out;
tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
@@ -1730,11 +1724,56 @@ static blk_qc_t __process_bio(struct mapped_device *md,
return ret;
}
+static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio)
+{
+ unsigned len, sector_count;
+
+ sector_count = bio_sectors(*bio);
+ len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count);
+
+ if (sector_count > len) {
+ struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split);
+
+ bio_chain(split, *bio);
+ trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
+ generic_make_request(*bio);
+ *bio = split;
+ }
+}
+
static blk_qc_t dm_process_bio(struct mapped_device *md,
struct dm_table *map, struct bio *bio)
{
+ blk_qc_t ret = BLK_QC_T_NONE;
+ struct dm_target *ti = md->immutable_target;
+
+ if (unlikely(!map)) {
+ bio_io_error(bio);
+ return ret;
+ }
+
+ if (!ti) {
+ ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+ if (unlikely(!ti || !dm_target_is_valid(ti))) {
+ bio_io_error(bio);
+ return ret;
+ }
+ }
+
+ /*
+ * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+ * queue_limits for abnormal requests (e.g. discard, writesame, etc)
+ * won't be imposed.
+ */
+ if (current->bio_list) {
+ if (is_abnormal_io(bio))
+ blk_queue_split(md->queue, &bio);
+ else
+ dm_queue_split(md, ti, &bio);
+ }
+
if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
- return __process_bio(md, map, bio);
+ return __process_bio(md, map, bio, ti);
else
return __split_and_process_bio(md, map, bio);
}
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 3/8] dm: refactor start_io_acct and end_io_acct Mike Snitzer
` (5 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
There is no need to have DM core split discards on behalf of a DM target
now that blk_queue_split() handles splitting discards based on the
queue_limits. A DM target just needs to set max_discard_sectors,
discard_granularity, etc, in queue_limits.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-cache-target.c | 1 -
drivers/md/dm-raid.c | 14 +++++++++-----
drivers/md/dm-thin.c | 1 -
drivers/md/dm-zoned-target.c | 1 -
drivers/md/dm.c | 28 ++++++----------------------
include/linux/device-mapper.h | 6 ------
6 files changed, 15 insertions(+), 36 deletions(-)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index b29a8327eed1..adc529f12b6b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2496,7 +2496,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->num_discard_bios = 1;
ti->discards_supported = true;
- ti->split_discard_bios = false;
ti->per_io_data_size = sizeof(struct per_bio_data);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index adcfe8ae10aa..9fdef6897316 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs)
}
}
- /*
- * RAID1 and RAID10 personalities require bio splitting,
- * RAID0/4/5/6 don't and process large discard bios properly.
- */
- ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
ti->num_discard_bios = 1;
}
@@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, chunk_size);
blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
+
+ /*
+ * RAID1 and RAID10 personalities require bio splitting,
+ * RAID0/4/5/6 don't and process large discard bios properly.
+ */
+ if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+ limits->discard_granularity = chunk_size;
+ limits->max_discard_sectors = chunk_size;
+ }
}
static void raid_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e83b63608262..0d9ded0f5e50 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4227,7 +4227,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
ti->num_discard_bios = 1;
- ti->split_discard_bios = false;
}
mutex_unlock(&dm_thin_pool_table.mutex);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 6af5babe6837..8865c1709e16 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->per_io_data_size = sizeof(struct dmz_bioctx);
ti->flush_supported = true;
ti->discards_supported = true;
- ti->split_discard_bios = true;
/* The exposed capacity is the number of chunks that can be mapped */
ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7a774fcd0194..b988e178a523 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1478,17 +1478,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
return ti->num_write_zeroes_bios;
}
-typedef bool (*is_split_required_fn)(struct dm_target *ti);
-
-static bool is_split_required_for_discard(struct dm_target *ti)
-{
- return ti->split_discard_bios;
-}
-
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, bool is_split_required)
+ unsigned num_bios)
{
- unsigned len;
+ unsigned len = ci->sector_count;
/*
* Even though the device advertised support for this type of
@@ -1499,38 +1492,29 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
if (!num_bios)
return -EOPNOTSUPP;
- if (!is_split_required)
- len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
- else
- len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
-
__send_duplicate_bios(ci, ti, num_bios, &len);
- ci->sector += len;
- ci->sector_count -= len;
-
return 0;
}
static int __send_discard(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
- is_split_required_for_discard(ti));
+ return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti));
}
static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti));
}
static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti));
}
static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti));
}
static bool is_abnormal_io(struct bio *bio)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e528baebad69..0f5b3d7c6cb3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,12 +315,6 @@ struct dm_target {
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
-
- /*
- * Set if the target required discard bios to be split
- * on max_io_len boundary.
- */
- bool split_discard_bios:1;
};
/* Each target can link one of these into the table */
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 3/8] dm: refactor start_io_acct and end_io_acct
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
2019-02-19 22:17 ` [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 4/8] dm: implement noclone optimization for bio-based Mike Snitzer
` (4 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
From: Mikulas Patocka <mpatocka@redhat.com>
This refactoring of start_io_acct() and end_io_acct() eliminates their
dependency on 'struct dm_io' which will not be created for all IO
(following commit introduces the ability for targets to avoid cloning
bios for READ or WRITE bios).
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 68 +++++++++++++++++++++++++--------------------------------
1 file changed, 30 insertions(+), 38 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b988e178a523..1b87d20041e7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -580,7 +580,19 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
return r;
}
-static void start_io_acct(struct dm_io *io);
+static void start_io_acct(struct mapped_device *md, struct bio *bio)
+{
+ generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), &dm_disk(md)->part0);
+}
+
+static void end_io_acct(struct mapped_device *md, struct bio *bio, unsigned long start_time)
+{
+ generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, start_time);
+
+ /* nudge anyone waiting on suspend queue */
+ if (unlikely(wq_has_sleeper(&md->wait)))
+ wake_up(&md->wait);
+}
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
{
@@ -604,7 +616,14 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->md = md;
spin_lock_init(&io->endio_lock);
- start_io_acct(io);
+ io->start_time = jiffies;
+
+ start_io_acct(md, bio);
+
+ if (unlikely(dm_stats_used(&md->stats)))
+ dm_stats_account_io(&md->stats, bio_data_dir(bio),
+ bio->bi_iter.bi_sector, bio_sectors(bio),
+ false, 0, &io->stats_aux);
return io;
}
@@ -668,41 +687,6 @@ static bool md_in_flight(struct mapped_device *md)
return md_in_flight_bios(md);
}
-static void start_io_acct(struct dm_io *io)
-{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
-
- io->start_time = jiffies;
-
- generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
- &dm_disk(md)->part0);
-
- if (unlikely(dm_stats_used(&md->stats)))
- dm_stats_account_io(&md->stats, bio_data_dir(bio),
- bio->bi_iter.bi_sector, bio_sectors(bio),
- false, 0, &io->stats_aux);
-}
-
-static void end_io_acct(struct dm_io *io)
-{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
- unsigned long duration = jiffies - io->start_time;
-
- generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
- io->start_time);
-
- if (unlikely(dm_stats_used(&md->stats)))
- dm_stats_account_io(&md->stats, bio_data_dir(bio),
- bio->bi_iter.bi_sector, bio_sectors(bio),
- true, duration, &io->stats_aux);
-
- /* nudge anyone waiting on suspend queue */
- if (unlikely(wq_has_sleeper(&md->wait)))
- wake_up(&md->wait);
-}
-
/*
* Add the bio to the list of deferred io.
*/
@@ -941,7 +925,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
io_error = io->status;
bio = io->orig_bio;
- end_io_acct(io);
+
+ if (unlikely(dm_stats_used(&md->stats))) {
+ unsigned long duration = jiffies - io->start_time;
+ dm_stats_account_io(&md->stats, bio_data_dir(bio),
+ bio->bi_iter.bi_sector, bio_sectors(bio),
+ true, duration, &io->stats_aux);
+ }
+
+ end_io_acct(md, bio, io->start_time);
free_io(md, io);
if (io_error == BLK_STS_DM_REQUEUE)
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 4/8] dm: implement noclone optimization for bio-based
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (2 preceding siblings ...)
2019-02-19 22:17 ` [PATCH 3/8] dm: refactor start_io_acct and end_io_acct Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 5/8] dm: improve noclone bio support Mike Snitzer
` (3 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
From: Mikulas Patocka <mpatocka@redhat.com>
Until now bio-based DM core has always cloned an incoming bio, remapped
the clone bio to a low layer, dealt with the clone's completion and then
finally completed the original bio. This cloning can be avoided for
READ and WRITE bios if the target opts-in by setting ti->no_clone.
Avoiding cloning for READ and WRITE bios improves performance of targets
that do very little work in response to each bio (e.g. dm-linear and
dm-striped).
The improvement is accomplished by changing DM core to allocate a
'dm_noclone' structure (that is quite small) instead of cloning the bio.
The bio's bi_end_io and bi_private are saved in the 'dm_noclone' before
they are overwritten and the bio passed to the lower block device.
When the bio is finished, the function noclone_endio restores the values
bi_end_io and bi_private and passes the bio to the original bi_end_io
function.
If the allocation of the 'struct dm_noclone' fails then bio-based DM
falls back to the traditional bio cloning IO path that is backed my
mempool reservations.
Performance improvement for dm-linear:
x86-64, 2x six-core
/dev/ram0 2449MiB/s
/dev/mapper/lin 5.0-rc without optimization 1970MiB/s
/dev/mapper/lin 5.0-rc with optimization 2238MiB/s
arm64, quad core:
/dev/ram0 457MiB/s
/dev/mapper/lin 5.0-rc without optimization 325MiB/s
/dev/mapper/lin 5.0-rc with optimization 364MiB/s
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-core.h | 1 +
drivers/md/dm-linear.c | 1 +
drivers/md/dm-stripe.c | 1 +
drivers/md/dm-table.c | 11 +++++++
drivers/md/dm-zero.c | 1 +
drivers/md/dm.c | 71 ++++++++++++++++++++++++++++++++++++++++++-
drivers/md/dm.h | 1 +
include/linux/device-mapper.h | 5 +++
8 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 95c6d86ab5e8..b4832bba9d64 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -87,6 +87,7 @@ struct mapped_device {
*/
struct bio_set io_bs;
struct bio_set bs;
+ struct kmem_cache *noclone_cache;
/*
* Processing queue (flush)
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ad980a38fb1e..6e1df9fdfcc8 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_secure_erase_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
+ ti->no_clone = true;
ti->private = lc;
return 0;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 8547d7594338..32181b7ca34a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -172,6 +172,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_secure_erase_bios = stripes;
ti->num_write_same_bios = stripes;
ti->num_write_zeroes_bios = stripes;
+ ti->no_clone = true;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4b1be754cc41..6a3e23faeb7d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -47,6 +47,7 @@ struct dm_table {
bool integrity_supported:1;
bool singleton:1;
+ bool no_clone:1;
unsigned integrity_added:1;
/*
@@ -191,6 +192,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
if (!t)
return -ENOMEM;
+ t->no_clone = true;
+
INIT_LIST_HEAD(&t->devices);
INIT_LIST_HEAD(&t->target_callbacks);
@@ -789,6 +792,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
if (r)
goto bad;
+ if (!tgt->no_clone)
+ t->no_clone = false;
+
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
if (!tgt->num_discard_bios && tgt->discards_supported)
@@ -1376,6 +1382,11 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
+bool dm_table_supports_noclone(struct dm_table *table)
+{
+ return table->no_clone;
+}
+
/*
* Check whether a table has no data devices attached using each
* target's iterate_devices method.
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b65ca8dcfbdc..436a5ee89698 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -26,6 +26,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
* Silently drop discards, avoiding -EOPNOTSUPP.
*/
ti->num_discard_bios = 1;
+ ti->no_clone = true;
return 0;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1b87d20041e7..cbda11b34635 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -158,8 +158,16 @@ struct table_device {
struct dm_dev dm_dev;
};
+struct dm_noclone {
+ struct mapped_device *md;
+ bio_end_io_t *orig_bi_end_io;
+ void *orig_bi_private;
+ unsigned long start_time;
+};
+
static struct kmem_cache *_rq_tio_cache;
static struct kmem_cache *_rq_cache;
+static struct kmem_cache *_noclone_cache;
/*
* Bio-based DM's mempools' reserved IOs set by the user.
@@ -233,9 +241,13 @@ static int __init local_init(void)
if (!_rq_cache)
goto out_free_rq_tio_cache;
+ _noclone_cache = KMEM_CACHE(dm_noclone, 0);
+ if (!_rq_tio_cache)
+ goto out_free_rq_cache;
+
r = dm_uevent_init();
if (r)
- goto out_free_rq_cache;
+ goto out_free_noclone_cache;
deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
if (!deferred_remove_workqueue) {
@@ -257,6 +269,8 @@ static int __init local_init(void)
destroy_workqueue(deferred_remove_workqueue);
out_uevent_exit:
dm_uevent_exit();
+out_free_noclone_cache:
+ kmem_cache_destroy(_noclone_cache);
out_free_rq_cache:
kmem_cache_destroy(_rq_cache);
out_free_rq_tio_cache:
@@ -270,6 +284,7 @@ static void local_exit(void)
flush_scheduled_work();
destroy_workqueue(deferred_remove_workqueue);
+ kmem_cache_destroy(_noclone_cache);
kmem_cache_destroy(_rq_cache);
kmem_cache_destroy(_rq_tio_cache);
unregister_blkdev(_major, _name);
@@ -1009,6 +1024,20 @@ static void clone_endio(struct bio *bio)
dec_pending(io, error);
}
+static void noclone_endio(struct bio *bio)
+{
+ struct dm_noclone *noclone = bio->bi_private;
+ struct mapped_device *md = noclone->md;
+
+ end_io_acct(md, bio, noclone->start_time);
+
+ bio->bi_end_io = noclone->orig_bi_end_io;
+ bio->bi_private = noclone->orig_bi_private;
+ kmem_cache_free(_noclone_cache, noclone);
+
+ bio_endio(bio);
+}
+
/*
* Return maximum size of I/O possible at the supplied sector up to the current
* target boundary.
@@ -1774,8 +1803,48 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
return ret;
}
+ if (dm_table_supports_noclone(map) &&
+ (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
+ likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
+ !bio_flagged(bio, BIO_CHAIN) &&
+ likely(!bio_integrity(bio)) &&
+ likely(!dm_stats_used(&md->stats))) {
+ int r;
+ struct dm_noclone *noclone;
+ struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+ if (unlikely(!dm_target_is_valid(ti)))
+ goto no_fast_path;
+ if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
+ goto no_fast_path;
+ noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT);
+ if (unlikely(!noclone))
+ goto no_fast_path;
+ noclone->md = md;
+ noclone->start_time = jiffies;
+ noclone->orig_bi_end_io = bio->bi_end_io;
+ noclone->orig_bi_private = bio->bi_private;
+ bio->bi_end_io = noclone_endio;
+ bio->bi_private = noclone;
+ start_io_acct(md, bio);
+ r = ti->type->map(ti, bio);
+ ret = BLK_QC_T_NONE;
+ if (likely(r == DM_MAPIO_REMAPPED)) {
+ ret = generic_make_request(bio);
+ } else if (likely(r == DM_MAPIO_SUBMITTED)) {
+ } else if (r == DM_MAPIO_KILL) {
+ bio->bi_status = BLK_STS_IOERR;
+ noclone_endio(bio);
+ } else {
+ DMWARN("unimplemented target map return value: %d", r);
+ BUG();
+ }
+ goto put_table_ret;
+ }
+
+no_fast_path:
ret = dm_process_bio(md, map, bio);
+put_table_ret:
dm_put_live_table(md, srcu_idx);
return ret;
}
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 2d539b82ec08..c3c78123dfb3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -53,6 +53,7 @@ void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+bool dm_table_supports_noclone(struct dm_table *t);
bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table,
struct queue_limits *limits);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0f5b3d7c6cb3..4ab2b0f53ae8 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,6 +315,11 @@ struct dm_target {
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
+
+ /*
+ * The target can process bios without cloning them.
+ */
+ bool no_clone:1;
};
/* Each target can link one of these into the table */
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 5/8] dm: improve noclone bio support
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (3 preceding siblings ...)
2019-02-19 22:17 ` [PATCH 4/8] dm: implement noclone optimization for bio-based Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio Mike Snitzer
` (2 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
Leverage fact that dm_queue_split() enables use of noclone support even
for targets that require additional splitting (via ti->max_io_len).
To achieve this move noclone processing from dm_make_request() to
dm_process_bio() and check if bio->bi_end_io is already set to
noclone_endio. This also fixes dm_wq_work() to be able to support
noclone bios (because it calls dm_process_bio()).
While at it, clean up ->map() return processing to be comparable to
__map_bio() and add some code comments that explain why certain
conditionals exist to prevent the use of noclone.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 102 +++++++++++++++++++++++++++++++++-----------------------
1 file changed, 61 insertions(+), 41 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index cbda11b34635..6868b80fc70c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1769,14 +1769,74 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
* If in ->make_request_fn we need to use blk_queue_split(), otherwise
* queue_limits for abnormal requests (e.g. discard, writesame, etc)
* won't be imposed.
+ *
+ * For normal READ and WRITE requests we benefit from upfront splitting
+ * via dm_queue_split() because we can then leverage noclone support below.
*/
- if (current->bio_list) {
+ if (current->bio_list && (bio->bi_end_io != noclone_endio)) {
+ /*
+ * It is fine to use {blk,dm}_queue_split() before opting to use noclone
+ * because any ->bi_private and ->bi_end_io will get saved off below.
+ * Once noclone_endio() is called the bio_chain's endio will kick in.
+ * - __split_and_process_bio() can split further, but any targets that
+ * require late _DM_ initiated splitting (e.g. dm_accept_partial_bio()
+ * callers) shouldn't set ti->no_clone.
+ */
if (is_abnormal_io(bio))
blk_queue_split(md->queue, &bio);
else
dm_queue_split(md, ti, &bio);
}
+ if (dm_table_supports_noclone(map) &&
+ (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
+ likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
+ likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
+ likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
+ int r;
+ struct dm_noclone *noclone;
+
+ /* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
+ if (bio->bi_end_io != noclone_endio) {
+ noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT);
+ if (unlikely(!noclone))
+ goto no_fast_path;
+
+ noclone->md = md;
+ noclone->start_time = jiffies;
+ noclone->orig_bi_end_io = bio->bi_end_io;
+ noclone->orig_bi_private = bio->bi_private;
+ bio->bi_end_io = noclone_endio;
+ bio->bi_private = noclone;
+ }
+
+ start_io_acct(md, bio);
+ r = ti->type->map(ti, bio);
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
+ /* the bio has been remapped so dispatch it */
+ if (md->type == DM_TYPE_NVME_BIO_BASED)
+ ret = direct_make_request(bio);
+ else
+ ret = generic_make_request(bio);
+ break;
+ case DM_MAPIO_KILL:
+ bio_io_error(bio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ bio->bi_status = BLK_STS_DM_REQUEUE;
+ noclone_endio(bio);
+ break;
+ default:
+ DMWARN("unimplemented target map return value: %d", r);
+ BUG();
+ }
+
+ return ret;
+ }
+no_fast_path:
if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
return __process_bio(md, map, bio, ti);
else
@@ -1803,48 +1863,8 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
return ret;
}
- if (dm_table_supports_noclone(map) &&
- (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
- likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
- !bio_flagged(bio, BIO_CHAIN) &&
- likely(!bio_integrity(bio)) &&
- likely(!dm_stats_used(&md->stats))) {
- int r;
- struct dm_noclone *noclone;
- struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
- if (unlikely(!dm_target_is_valid(ti)))
- goto no_fast_path;
- if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
- goto no_fast_path;
- noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT);
- if (unlikely(!noclone))
- goto no_fast_path;
- noclone->md = md;
- noclone->start_time = jiffies;
- noclone->orig_bi_end_io = bio->bi_end_io;
- noclone->orig_bi_private = bio->bi_private;
- bio->bi_end_io = noclone_endio;
- bio->bi_private = noclone;
- start_io_acct(md, bio);
- r = ti->type->map(ti, bio);
- ret = BLK_QC_T_NONE;
- if (likely(r == DM_MAPIO_REMAPPED)) {
- ret = generic_make_request(bio);
- } else if (likely(r == DM_MAPIO_SUBMITTED)) {
- } else if (r == DM_MAPIO_KILL) {
- bio->bi_status = BLK_STS_IOERR;
- noclone_endio(bio);
- } else {
- DMWARN("unimplemented target map return value: %d", r);
- BUG();
- }
- goto put_table_ret;
- }
-
-no_fast_path:
ret = dm_process_bio(md, map, bio);
-put_table_ret:
dm_put_live_table(md, srcu_idx);
return ret;
}
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (4 preceding siblings ...)
2019-02-19 22:17 ` [PATCH 5/8] dm: improve noclone bio support Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
2019-02-19 22:17 ` [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data Mike Snitzer
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
A DM target can manage its own per-bio-data objects and attach them to
the new 'noclone_private' member in struct dm_noclone.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 16 ++++++++--------
include/linux/device-mapper.h | 17 +++++++++++++++++
2 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6868b80fc70c..8ff0ced278d7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -158,13 +158,6 @@ struct table_device {
struct dm_dev dm_dev;
};
-struct dm_noclone {
- struct mapped_device *md;
- bio_end_io_t *orig_bi_end_io;
- void *orig_bi_private;
- unsigned long start_time;
-};
-
static struct kmem_cache *_rq_tio_cache;
static struct kmem_cache *_rq_cache;
static struct kmem_cache *_noclone_cache;
@@ -1802,13 +1795,15 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
if (unlikely(!noclone))
goto no_fast_path;
+ noclone->magic = DM_NOCLONE_MAGIC;
noclone->md = md;
noclone->start_time = jiffies;
noclone->orig_bi_end_io = bio->bi_end_io;
noclone->orig_bi_private = bio->bi_private;
bio->bi_end_io = noclone_endio;
bio->bi_private = noclone;
- }
+ } else
+ noclone = bio->bi_private;
start_io_acct(md, bio);
r = ti->type->map(ti, bio);
@@ -1822,6 +1817,11 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
else
ret = generic_make_request(bio);
break;
+ case DM_MAPIO_NOCLONE_FAILED:
+ end_io_acct(md, bio, noclone->start_time);
+ kmem_cache_free(_noclone_cache, noclone);
+ goto no_fast_path;
+ break;
case DM_MAPIO_KILL:
bio_io_error(bio);
break;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 4ab2b0f53ae8..b3e33d5cae8b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -322,6 +322,22 @@ struct dm_target {
bool no_clone:1;
};
+#define DM_NOCLONE_MAGIC 9693664
+struct dm_noclone {
+ unsigned magic;
+ struct mapped_device *md;
+ bio_end_io_t *orig_bi_end_io;
+ void *orig_bi_private;
+ void *noclone_private;
+ unsigned long start_time;
+};
+
+static inline struct dm_noclone *dm_get_noclone_from_bio(struct bio *bio)
+{
+ struct dm_noclone *noclone = bio->bi_private;
+ return (noclone && noclone->magic == DM_NOCLONE_MAGIC) ? noclone : NULL;
+}
+
/* Each target can link one of these into the table */
struct dm_target_callbacks {
struct list_head list;
@@ -572,6 +588,7 @@ do { \
#define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE
#define DM_MAPIO_DELAY_REQUEUE DM_ENDIO_DELAY_REQUEUE
#define DM_MAPIO_KILL 4
+#define DM_MAPIO_NOCLONE_FAILED 5
#define dm_sector_div64(x, y)( \
{ \
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 7/8] dm: improve noclone_endio() to support multipath target
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (5 preceding siblings ...)
2019-02-19 22:17 ` [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
2019-02-20 15:08 ` Mikulas Patocka
2019-02-19 22:17 ` [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data Mike Snitzer
7 siblings, 1 reply; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
noclone_endio() must call the target's ->endio method if it exists.
Also must handle the various DM_ENDIO_* returns that are possible.
Factor out dm_bio_pushback_needed() for both dec_pending() and
noclone_endio() to use when handling BLK_STS_DM_REQUEUE.
Lastly, there is no need to conditionally set md->immutable_target in
__bind(). If the device only uses a single immutable target then it
should always be reflected in md->immutable_target. This is important
because noclone_endio() benefits from this to get access to the
multipath dm_target without needing to add another member to the
'dm_noclone' structure.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 77 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 55 insertions(+), 22 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8ff0ced278d7..2299f946c175 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -897,6 +897,28 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
+static bool dm_bio_pushback_needed(struct mapped_device *md,
+ struct bio *bio, blk_status_t *error)
+{
+ unsigned long flags;
+
+ /*
+ * Target requested pushing back the I/O.
+ */
+ if (__noflush_suspending(md)) {
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ // FIXME: using bio_list_add_head() creates LIFO
+ bio_list_add_head(&md->deferred, bio);
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+ return true;
+ } else {
+ /* noflush suspend was interrupted. */
+ *error = BLK_STS_IOERR;
+ }
+
+ return false;
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -917,19 +939,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->status == BLK_STS_DM_REQUEUE) {
- /*
- * Target requested pushing back the I/O.
- */
- spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md))
- /* NOTE early return due to BLK_STS_DM_REQUEUE below */
- bio_list_add_head(&md->deferred, io->orig_bio);
- else
- /* noflush suspend was interrupted. */
- io->status = BLK_STS_IOERR;
- spin_unlock_irqrestore(&md->deferred_lock, flags);
- }
+ if (unlikely(io->status == BLK_STS_DM_REQUEUE))
+ (void) dm_bio_pushback_needed(md, bio, &io->status);
io_error = io->status;
bio = io->orig_bio;
@@ -1019,8 +1030,33 @@ static void clone_endio(struct bio *bio)
static void noclone_endio(struct bio *bio)
{
+ blk_status_t error = bio->bi_status;
struct dm_noclone *noclone = bio->bi_private;
struct mapped_device *md = noclone->md;
+ struct dm_target *ti = NULL;
+ dm_endio_fn endio = NULL;
+
+ if (md->immutable_target) {
+ ti = md->immutable_target;
+ endio = ti->type->end_io;
+ }
+
+ if (endio) {
+ int r = endio(ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
+ /* The target will handle the io */
+ return;
+ default:
+ DMWARN("unimplemented target endio return value: %d", r);
+ BUG();
+ }
+ }
end_io_acct(md, bio, noclone->start_time);
@@ -1028,6 +1064,11 @@ static void noclone_endio(struct bio *bio)
bio->bi_private = noclone->orig_bi_private;
kmem_cache_free(_noclone_cache, noclone);
+ if (unlikely(error == BLK_STS_DM_REQUEUE)) {
+ if (dm_bio_pushback_needed(md, bio, &bio->bi_status))
+ return;
+ }
+
bio_endio(bio);
}
@@ -2229,15 +2270,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
if (request_based)
dm_stop_queue(q);
- if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
- /*
- * Leverage the fact that request-based DM targets and
- * NVMe bio based targets are immutable singletons
- * - used to optimize both dm_request_fn and dm_mq_queue_rq;
- * and __process_bio.
- */
- md->immutable_target = dm_table_get_immutable_target(t);
- }
+ md->immutable_target = dm_table_get_immutable_target(t);
ret = __bind_mempools(md, t);
if (ret) {
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (6 preceding siblings ...)
2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
@ 2019-02-19 22:17 ` Mike Snitzer
7 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-19 22:17 UTC (permalink / raw)
To: dm-devel
Update multipath_init_per_bio_data() to allocate a dm_mpath_per_bio_data
structure and associate it with the noclone bio using the dm_noclone
structure's 'noclone_private' member.
If kmem_cache_alloc() fails to allocate a struct dm_mpath_per_bio_data,
DM_MAPIO_NOCLONE_FAILED is returned from ->map and DM core falls back to
using traditional bio-based DM's bio cloning.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-mpath.c | 65 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 55 insertions(+), 10 deletions(-)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2ee5e357a0a7..ac7ca5334796 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -30,6 +30,8 @@
#define DM_PG_INIT_DELAY_MSECS 2000
#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
+static struct kmem_cache *_noclone_per_bio_data_cache;
+
/* Path properties */
struct pgpath {
struct list_head list;
@@ -101,6 +103,11 @@ struct dm_mpath_io {
size_t nr_bytes;
};
+struct dm_mpath_per_bio_data {
+ struct dm_mpath_io mpio;
+ struct dm_bio_details bio_details;
+};
+
typedef int (*action_fn) (struct pgpath *pgpath);
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -250,11 +257,16 @@ static struct dm_mpath_io *get_mpio(union map_info *info)
static size_t multipath_per_bio_data_size(void)
{
- return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
+ return sizeof(struct dm_mpath_per_bio_data);
}
static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
{
+ struct dm_noclone *noclone = dm_get_noclone_from_bio(bio);
+
+ if (noclone)
+ return noclone->noclone_private;
+
return dm_per_bio_data(bio, multipath_per_bio_data_size());
}
@@ -265,16 +277,29 @@ static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio
return bio_details;
}
-static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p)
+static bool multipath_init_per_bio_data(struct dm_target *ti, struct bio *bio,
+ struct dm_mpath_io **mpio_p)
{
- struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
- struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio);
+ struct dm_mpath_io *mpio;
+ struct dm_bio_details *bio_details;
+ struct dm_noclone *noclone = dm_get_noclone_from_bio(bio);
+
+ if (noclone) {
+ noclone->noclone_private =
+ kmem_cache_alloc(_noclone_per_bio_data_cache, GFP_NOWAIT);
+ if (unlikely(!noclone->noclone_private))
+ return false;
+ }
+
+ mpio = get_mpio_from_bio(bio);
+ bio_details = get_bio_details_from_mpio(mpio);
mpio->nr_bytes = bio->bi_iter.bi_size;
mpio->pgpath = NULL;
*mpio_p = mpio;
dm_bio_record(bio_details, bio);
+ return true;
}
/*-----------------------------------------------
@@ -652,7 +677,9 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = NULL;
- multipath_init_per_bio_data(bio, &mpio);
+ if (!multipath_init_per_bio_data(ti, bio, &mpio))
+ return DM_MAPIO_NOCLONE_FAILED;
+
return __multipath_map_bio(m, bio, mpio);
}
@@ -1178,9 +1205,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
- if (m->queue_mode == DM_TYPE_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ ti->no_clone = true;
ti->per_io_data_size = multipath_per_bio_data_size();
- else
+ } else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
return 0;
@@ -1584,12 +1612,13 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
return r;
}
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *bio,
blk_status_t *error)
{
struct multipath *m = ti->private;
- struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
struct pgpath *pgpath = mpio->pgpath;
+ struct dm_noclone *noclone = NULL;
unsigned long flags;
int r = DM_ENDIO_DONE;
@@ -1611,7 +1640,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
}
spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_bios, clone);
+ bio_list_add(&m->queued_bios, bio);
spin_unlock_irqrestore(&m->lock, flags);
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
@@ -1625,6 +1654,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
+ if (r != DM_ENDIO_INCOMPLETE) {
+ noclone = dm_get_noclone_from_bio(bio);
+ if (noclone)
+ kmem_cache_free(_noclone_per_bio_data_cache, noclone->noclone_private);
+ }
+
return r;
}
@@ -2057,6 +2092,13 @@ static int __init dm_multipath_init(void)
goto bad_alloc_kmpath_handlerd;
}
+ _noclone_per_bio_data_cache = KMEM_CACHE(dm_mpath_per_bio_data, 0);
+ if (!_noclone_per_bio_data_cache) {
+ DMERR("failed to create noclone_per_bio_data_cache");
+ r = -ENOMEM;
+ goto bad_noclone_pbd;
+ }
+
r = dm_register_target(&multipath_target);
if (r < 0) {
DMERR("request-based register failed %d", r);
@@ -2067,6 +2109,8 @@ static int __init dm_multipath_init(void)
return 0;
bad_register_target:
+ kmem_cache_destroy(_noclone_per_bio_data_cache);
+bad_noclone_pbd:
destroy_workqueue(kmpath_handlerd);
bad_alloc_kmpath_handlerd:
destroy_workqueue(kmultipathd);
@@ -2078,6 +2122,7 @@ static void __exit dm_multipath_exit(void)
{
destroy_workqueue(kmpath_handlerd);
destroy_workqueue(kmultipathd);
+ kmem_cache_destroy(_noclone_per_bio_data_cache);
dm_unregister_target(&multipath_target);
}
--
2.15.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 7/8] dm: improve noclone_endio() to support multipath target
2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
@ 2019-02-20 15:08 ` Mikulas Patocka
2019-02-20 15:14 ` Mike Snitzer
0 siblings, 1 reply; 11+ messages in thread
From: Mikulas Patocka @ 2019-02-20 15:08 UTC (permalink / raw)
To: Mike Snitzer; +Cc: dm-devel
On Tue, 19 Feb 2019, Mike Snitzer wrote:
> noclone_endio() must call the target's ->endio method if it exists.
> Also must handle the various DM_ENDIO_* returns that are possible.
>
> Factor out dm_bio_pushback_needed() for both dec_pending() and
> noclone_endio() to use when handling BLK_STS_DM_REQUEUE.
>
> Lastly, there is no need to conditionally set md->immutable_target in
> __bind(). If the device only uses a single immutable target then it
> should always be reflected in md->immutable_target. This is important
> because noclone_endio() benefits from this to get access to the
> multipath dm_target without needing to add another member to the
> 'dm_noclone' structure.
>
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
> drivers/md/dm.c | 77 ++++++++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 55 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 8ff0ced278d7..2299f946c175 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -897,6 +897,28 @@ static int __noflush_suspending(struct mapped_device *md)
> return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
> }
>
> +static bool dm_bio_pushback_needed(struct mapped_device *md,
> + struct bio *bio, blk_status_t *error)
> +{
> + unsigned long flags;
> +
> + /*
> + * Target requested pushing back the I/O.
> + */
> + if (__noflush_suspending(md)) {
> + spin_lock_irqsave(&md->deferred_lock, flags);
> + // FIXME: using bio_list_add_head() creates LIFO
> + bio_list_add_head(&md->deferred, bio);
> + spin_unlock_irqrestore(&md->deferred_lock, flags);
> + return true;
> + } else {
> + /* noflush suspend was interrupted. */
> + *error = BLK_STS_IOERR;
> + }
> +
> + return false;
> +}
> +
> /*
> * Decrements the number of outstanding ios that a bio has been
> * cloned into, completing the original io if necc.
> @@ -917,19 +939,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
> }
>
> if (atomic_dec_and_test(&io->io_count)) {
> - if (io->status == BLK_STS_DM_REQUEUE) {
> - /*
> - * Target requested pushing back the I/O.
> - */
> - spin_lock_irqsave(&md->deferred_lock, flags);
> - if (__noflush_suspending(md))
> - /* NOTE early return due to BLK_STS_DM_REQUEUE below */
> - bio_list_add_head(&md->deferred, io->orig_bio);
> - else
> - /* noflush suspend was interrupted. */
> - io->status = BLK_STS_IOERR;
> - spin_unlock_irqrestore(&md->deferred_lock, flags);
> - }
> + if (unlikely(io->status == BLK_STS_DM_REQUEUE))
> + (void) dm_bio_pushback_needed(md, bio, &io->status);
This triggers compiler warning because the variable bio is uninitialized
here.
Mikulas
>
> io_error = io->status;
> bio = io->orig_bio;
> @@ -1019,8 +1030,33 @@ static void clone_endio(struct bio *bio)
>
> static void noclone_endio(struct bio *bio)
> {
> + blk_status_t error = bio->bi_status;
> struct dm_noclone *noclone = bio->bi_private;
> struct mapped_device *md = noclone->md;
> + struct dm_target *ti = NULL;
> + dm_endio_fn endio = NULL;
> +
> + if (md->immutable_target) {
> + ti = md->immutable_target;
> + endio = ti->type->end_io;
> + }
> +
> + if (endio) {
> + int r = endio(ti, bio, &error);
> + switch (r) {
> + case DM_ENDIO_REQUEUE:
> + error = BLK_STS_DM_REQUEUE;
> + /*FALLTHRU*/
> + case DM_ENDIO_DONE:
> + break;
> + case DM_ENDIO_INCOMPLETE:
> + /* The target will handle the io */
> + return;
> + default:
> + DMWARN("unimplemented target endio return value: %d", r);
> + BUG();
> + }
> + }
>
> end_io_acct(md, bio, noclone->start_time);
>
> @@ -1028,6 +1064,11 @@ static void noclone_endio(struct bio *bio)
> bio->bi_private = noclone->orig_bi_private;
> kmem_cache_free(_noclone_cache, noclone);
>
> + if (unlikely(error == BLK_STS_DM_REQUEUE)) {
> + if (dm_bio_pushback_needed(md, bio, &bio->bi_status))
> + return;
> + }
> +
> bio_endio(bio);
> }
>
> @@ -2229,15 +2270,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
> if (request_based)
> dm_stop_queue(q);
>
> - if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
> - /*
> - * Leverage the fact that request-based DM targets and
> - * NVMe bio based targets are immutable singletons
> - * - used to optimize both dm_request_fn and dm_mq_queue_rq;
> - * and __process_bio.
> - */
> - md->immutable_target = dm_table_get_immutable_target(t);
> - }
> + md->immutable_target = dm_table_get_immutable_target(t);
>
> ret = __bind_mempools(md, t);
> if (ret) {
> --
> 2.15.0
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 7/8] dm: improve noclone_endio() to support multipath target
2019-02-20 15:08 ` Mikulas Patocka
@ 2019-02-20 15:14 ` Mike Snitzer
0 siblings, 0 replies; 11+ messages in thread
From: Mike Snitzer @ 2019-02-20 15:14 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: dm-devel
On Wed, Feb 20 2019 at 10:08am -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
> On Tue, 19 Feb 2019, Mike Snitzer wrote:
>
> > noclone_endio() must call the target's ->endio method if it exists.
> > Also must handle the various DM_ENDIO_* returns that are possible.
> >
> > Factor out dm_bio_pushback_needed() for both dec_pending() and
> > noclone_endio() to use when handling BLK_STS_DM_REQUEUE.
> >
> > Lastly, there is no need to conditionally set md->immutable_target in
> > __bind(). If the device only uses a single immutable target then it
> > should always be reflected in md->immutable_target. This is important
> > because noclone_endio() benefits from this to get access to the
> > multipath dm_target without needing to add another member to the
> > 'dm_noclone' structure.
> >
> > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > ---
> > drivers/md/dm.c | 77 ++++++++++++++++++++++++++++++++++++++++-----------------
> > 1 file changed, 55 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> > index 8ff0ced278d7..2299f946c175 100644
> > --- a/drivers/md/dm.c
> > +++ b/drivers/md/dm.c
> > @@ -897,6 +897,28 @@ static int __noflush_suspending(struct mapped_device *md)
> > return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
> > }
> >
> > +static bool dm_bio_pushback_needed(struct mapped_device *md,
> > + struct bio *bio, blk_status_t *error)
> > +{
> > + unsigned long flags;
> > +
> > + /*
> > + * Target requested pushing back the I/O.
> > + */
> > + if (__noflush_suspending(md)) {
> > + spin_lock_irqsave(&md->deferred_lock, flags);
> > + // FIXME: using bio_list_add_head() creates LIFO
> > + bio_list_add_head(&md->deferred, bio);
> > + spin_unlock_irqrestore(&md->deferred_lock, flags);
> > + return true;
> > + } else {
> > + /* noflush suspend was interrupted. */
> > + *error = BLK_STS_IOERR;
> > + }
> > +
> > + return false;
> > +}
> > +
> > /*
> > * Decrements the number of outstanding ios that a bio has been
> > * cloned into, completing the original io if necc.
> > @@ -917,19 +939,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
> > }
> >
> > if (atomic_dec_and_test(&io->io_count)) {
> > - if (io->status == BLK_STS_DM_REQUEUE) {
> > - /*
> > - * Target requested pushing back the I/O.
> > - */
> > - spin_lock_irqsave(&md->deferred_lock, flags);
> > - if (__noflush_suspending(md))
> > - /* NOTE early return due to BLK_STS_DM_REQUEUE below */
> > - bio_list_add_head(&md->deferred, io->orig_bio);
> > - else
> > - /* noflush suspend was interrupted. */
> > - io->status = BLK_STS_IOERR;
> > - spin_unlock_irqrestore(&md->deferred_lock, flags);
> > - }
> > + if (unlikely(io->status == BLK_STS_DM_REQUEUE))
> > + (void) dm_bio_pushback_needed(md, bio, &io->status);
>
> This triggers compiler warning because the variable bio is uninitialized
> here.
Right, I fixed it up in latest git, please see latest 'dm-5.1' branch:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/log/?h=dm-5.1
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2019-02-20 15:14 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-19 22:17 [PATCH 0/8] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-19 22:17 ` [PATCH 1/8] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
2019-02-19 22:17 ` [PATCH 2/8] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
2019-02-19 22:17 ` [PATCH 3/8] dm: refactor start_io_acct and end_io_acct Mike Snitzer
2019-02-19 22:17 ` [PATCH 4/8] dm: implement noclone optimization for bio-based Mike Snitzer
2019-02-19 22:17 ` [PATCH 5/8] dm: improve noclone bio support Mike Snitzer
2019-02-19 22:17 ` [PATCH 6/8] dm: add the ability to attach per-bio-data to dm_noclone bio Mike Snitzer
2019-02-19 22:17 ` [PATCH 7/8] dm: improve noclone_endio() to support multipath target Mike Snitzer
2019-02-20 15:08 ` Mikulas Patocka
2019-02-20 15:14 ` Mike Snitzer
2019-02-19 22:17 ` [PATCH 8/8] dm mpath: add support for dm_noclone and its per-bio-data Mike Snitzer
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.