* [PATCH v2] md: add block tracing for bio_remapping
@ 2016-11-18 2:22 NeilBrown
2016-11-18 17:26 ` Shaohua Li
0 siblings, 1 reply; 4+ messages in thread
From: NeilBrown @ 2016-11-18 2:22 UTC (permalink / raw)
To: Shaohua Li; +Cc: linux-raid
[-- Attachment #1: Type: text/plain, Size: 10897 bytes --]
The block tracing infrastructure (accessed with blktrace/blkparse)
supports the tracing of mapping bios from one device to another.
This is currently used when a bio in a partition is mapped to the
whole device, when bios are mapped by dm, and for mapping in md/raid5.
Other md personalities do not include this tracing yet, so add it.
When a read-error is detected we redirect the request to a different device.
This could justifiably be seen as a new mapping for the originial bio,
or a secondary mapping for the bio that errors. This patch uses
the second option.
When md is used under dm-raid, the mappings are not traced as we do
not have access to the block device number of the parent.
Signed-off-by: NeilBrown <neilb@suse.com>
---
This is the revised version based on discussions.
Now uses correct sector for linear and raid0, and code for raid1/raid10
rearranged a bit.
drivers/md/linear.c | 18 ++++++++++++------
drivers/md/raid0.c | 13 ++++++++++---
drivers/md/raid1.c | 26 ++++++++++++++++++++++++--
drivers/md/raid10.c | 29 +++++++++++++++++++++++++++--
4 files changed, 73 insertions(+), 13 deletions(-)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 9c7d4f5483ea..5975c9915684 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <trace/events/block.h>
#include "md.h"
#include "linear.h"
@@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
}
do {
- tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
+ sector_t bio_sector = bio->bi_iter.bi_sector;
+ tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
end_sector = tmp_dev->end_sector;
data_offset = tmp_dev->rdev->data_offset;
bio->bi_bdev = tmp_dev->rdev->bdev;
- if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
- bio->bi_iter.bi_sector < start_sector))
+ if (unlikely(bio_sector >= end_sector ||
+ bio_sector < start_sector))
goto out_of_bounds;
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
- split = bio_split(bio, end_sector -
- bio->bi_iter.bi_sector,
+ split = bio_split(bio, end_sector - bio_sector,
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
@@ -256,8 +257,13 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
/* Just ignore it */
bio_endio(split);
- } else
+ } else {
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
+ split, disk_devt(mddev->gendisk),
+ bio_sector);
generic_make_request(split);
+ }
} while (split != bio);
return;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index b3ba77a3c3bc..e628f187e5ad 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <trace/events/block.h>
#include "md.h"
#include "raid0.h"
#include "raid5.h"
@@ -463,7 +464,8 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
}
do {
- sector_t sector = bio->bi_iter.bi_sector;
+ sector_t bio_sector = bio->bi_iter.bi_sector;
+ sector_t sector = bio_sector;
unsigned chunk_sects = mddev->chunk_sectors;
unsigned sectors = chunk_sects -
@@ -472,7 +474,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
: sector_div(sector, chunk_sects));
/* Restore due to sector_div */
- sector = bio->bi_iter.bi_sector;
+ sector = bio_sector;
if (sectors < bio_sectors(bio)) {
split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
@@ -491,8 +493,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
/* Just ignore it */
bio_endio(split);
- } else
+ } else {
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
+ split, disk_devt(mddev->gendisk),
+ bio_sector);
generic_make_request(split);
+ }
} while (split != bio);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9ac61cd85e5c..2dc1934925ec 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -37,6 +37,7 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
+#include <trace/events/block.h>
#include "md.h"
#include "raid1.h"
#include "bitmap.h"
@@ -1162,6 +1163,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
bio_set_op_attrs(read_bio, op, do_sync);
read_bio->bi_private = r1_bio;
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+ read_bio, disk_devt(mddev->gendisk),
+ r1_bio->sector);
+
if (max_sectors < r1_bio->sectors) {
/* could not read all from this device, so we will
* need another r1_bio.
@@ -1367,13 +1373,20 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
mbio->bi_iter.bi_sector = (r1_bio->sector +
conf->mirrors[i].rdev->data_offset);
- mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
+ mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_end_io = raid1_end_write_request;
bio_set_op_attrs(mbio, op, do_flush_fua | do_sync);
mbio->bi_private = r1_bio;
atomic_inc(&r1_bio->remaining);
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
+ mbio, disk_devt(mddev->gendisk),
+ r1_bio->sector);
+ /* flush_pending_writes() needs access to the rdev so...*/
+ mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
+
cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
if (cb)
plug = container_of(cb, struct raid1_plug_cb, cb);
@@ -2290,6 +2303,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
struct bio *bio;
char b[BDEVNAME_SIZE];
struct md_rdev *rdev;
+ dev_t bio_dev;
+ sector_t bio_sector;
clear_bit(R1BIO_ReadError, &r1_bio->state);
/* we got a read error. Maybe the drive is bad. Maybe just
@@ -2303,6 +2318,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
bio = r1_bio->bios[r1_bio->read_disk];
bdevname(bio->bi_bdev, b);
+ bio_dev = bio->bi_bdev->bd_dev;
+ bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector;
bio_put(bio);
r1_bio->bios[r1_bio->read_disk] = NULL;
@@ -2353,6 +2370,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
else
mbio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+ bio, bio_dev, bio_sector);
generic_make_request(bio);
bio = NULL;
@@ -2367,8 +2386,11 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
sectors_handled;
goto read_more;
- } else
+ } else {
+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+ bio, bio_dev, bio_sector);
generic_make_request(bio);
+ }
}
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5290be3d5c26..c63041ec9415 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -25,6 +25,7 @@
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
+#include <trace/events/block.h>
#include "md.h"
#include "raid10.h"
#include "raid0.h"
@@ -1165,6 +1166,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
bio_set_op_attrs(read_bio, op, do_sync);
read_bio->bi_private = r10_bio;
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+ read_bio, disk_devt(mddev->gendisk),
+ r10_bio->sector);
if (max_sectors < r10_bio->sectors) {
/* Could not read all from this device, so we will
* need another r10_bio.
@@ -1367,11 +1372,17 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
choose_data_offset(r10_bio,
rdev));
- mbio->bi_bdev = (void*)rdev;
+ mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
mbio->bi_private = r10_bio;
+ if (conf->mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
+ mbio, disk_devt(conf->mddev->gendisk),
+ r10_bio->sector);
+ mbio->bi_bdev = (void*)rdev;
+
atomic_inc(&r10_bio->remaining);
cb = blk_check_plugged(raid10_unplug, mddev,
@@ -1409,11 +1420,17 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
choose_data_offset(
r10_bio, rdev));
- mbio->bi_bdev = (void*)rdev;
+ mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
mbio->bi_private = r10_bio;
+ if (conf->mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
+ mbio, disk_devt(conf->mddev->gendisk),
+ r10_bio->sector);
+ mbio->bi_bdev = (void*)rdev;
+
atomic_inc(&r10_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
@@ -2496,6 +2513,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
char b[BDEVNAME_SIZE];
unsigned long do_sync;
int max_sectors;
+ dev_t bio_dev;
+ sector_t bio_last_sector;
/* we got a read error. Maybe the drive is bad. Maybe just
* the block and we can fix it.
@@ -2507,6 +2526,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
*/
bio = r10_bio->devs[slot].bio;
bdevname(bio->bi_bdev, b);
+ bio_dev = bio->bi_bdev->bd_dev;
+ bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors;
bio_put(bio);
r10_bio->devs[slot].bio = NULL;
@@ -2546,6 +2567,10 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+ bio, bio_dev,
+ bio_last_sector - r10_bio->sectors);
+
if (max_sectors < r10_bio->sectors) {
/* Drat - have to split this up more */
struct bio *mbio = r10_bio->master_bio;
--
2.10.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 800 bytes --]
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2] md: add block tracing for bio_remapping
2016-11-18 2:22 [PATCH v2] md: add block tracing for bio_remapping NeilBrown
@ 2016-11-18 17:26 ` Shaohua Li
2016-11-18 17:50 ` Shaohua Li
0 siblings, 1 reply; 4+ messages in thread
From: Shaohua Li @ 2016-11-18 17:26 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid
On Fri, Nov 18, 2016 at 01:22:04PM +1100, Neil Brown wrote:
>
> The block tracing infrastructure (accessed with blktrace/blkparse)
> supports the tracing of mapping bios from one device to another.
> This is currently used when a bio in a partition is mapped to the
> whole device, when bios are mapped by dm, and for mapping in md/raid5.
> Other md personalities do not include this tracing yet, so add it.
>
> When a read-error is detected we redirect the request to a different device.
> This could justifiably be seen as a new mapping for the originial bio,
> or a secondary mapping for the bio that errors. This patch uses
> the second option.
>
> When md is used under dm-raid, the mappings are not traced as we do
> not have access to the block device number of the parent.
thanks, applied patch 1, 3, 4.
Thanks,
Shaohua
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>
> This is the revised version based on discussions.
> Now uses correct sector for linear and raid0, and code for raid1/raid10
> rearranged a bit.
>
> drivers/md/linear.c | 18 ++++++++++++------
> drivers/md/raid0.c | 13 ++++++++++---
> drivers/md/raid1.c | 26 ++++++++++++++++++++++++--
> drivers/md/raid10.c | 29 +++++++++++++++++++++++++++--
> 4 files changed, 73 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index 9c7d4f5483ea..5975c9915684 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -21,6 +21,7 @@
> #include <linux/seq_file.h>
> #include <linux/module.h>
> #include <linux/slab.h>
> +#include <trace/events/block.h>
> #include "md.h"
> #include "linear.h"
>
> @@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
> }
>
> do {
> - tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
> + sector_t bio_sector = bio->bi_iter.bi_sector;
> + tmp_dev = which_dev(mddev, bio_sector);
> start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
> end_sector = tmp_dev->end_sector;
> data_offset = tmp_dev->rdev->data_offset;
> bio->bi_bdev = tmp_dev->rdev->bdev;
>
> - if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
> - bio->bi_iter.bi_sector < start_sector))
> + if (unlikely(bio_sector >= end_sector ||
> + bio_sector < start_sector))
> goto out_of_bounds;
>
> if (unlikely(bio_end_sector(bio) > end_sector)) {
> /* This bio crosses a device boundary, so we have to
> * split it.
> */
> - split = bio_split(bio, end_sector -
> - bio->bi_iter.bi_sector,
> + split = bio_split(bio, end_sector - bio_sector,
> GFP_NOIO, fs_bio_set);
> bio_chain(split, bio);
> } else {
> @@ -256,8 +257,13 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
> !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
> /* Just ignore it */
> bio_endio(split);
> - } else
> + } else {
> + if (mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
> + split, disk_devt(mddev->gendisk),
> + bio_sector);
> generic_make_request(split);
> + }
> } while (split != bio);
> return;
>
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index b3ba77a3c3bc..e628f187e5ad 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -21,6 +21,7 @@
> #include <linux/seq_file.h>
> #include <linux/module.h>
> #include <linux/slab.h>
> +#include <trace/events/block.h>
> #include "md.h"
> #include "raid0.h"
> #include "raid5.h"
> @@ -463,7 +464,8 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
> }
>
> do {
> - sector_t sector = bio->bi_iter.bi_sector;
> + sector_t bio_sector = bio->bi_iter.bi_sector;
> + sector_t sector = bio_sector;
> unsigned chunk_sects = mddev->chunk_sectors;
>
> unsigned sectors = chunk_sects -
> @@ -472,7 +474,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
> : sector_div(sector, chunk_sects));
>
> /* Restore due to sector_div */
> - sector = bio->bi_iter.bi_sector;
> + sector = bio_sector;
>
> if (sectors < bio_sectors(bio)) {
> split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
> @@ -491,8 +493,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
> !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
> /* Just ignore it */
> bio_endio(split);
> - } else
> + } else {
> + if (mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
> + split, disk_devt(mddev->gendisk),
> + bio_sector);
> generic_make_request(split);
> + }
> } while (split != bio);
> }
>
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 9ac61cd85e5c..2dc1934925ec 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -37,6 +37,7 @@
> #include <linux/module.h>
> #include <linux/seq_file.h>
> #include <linux/ratelimit.h>
> +#include <trace/events/block.h>
> #include "md.h"
> #include "raid1.h"
> #include "bitmap.h"
> @@ -1162,6 +1163,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
> bio_set_op_attrs(read_bio, op, do_sync);
> read_bio->bi_private = r1_bio;
>
> + if (mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
> + read_bio, disk_devt(mddev->gendisk),
> + r1_bio->sector);
> +
> if (max_sectors < r1_bio->sectors) {
> /* could not read all from this device, so we will
> * need another r1_bio.
> @@ -1367,13 +1373,20 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
>
> mbio->bi_iter.bi_sector = (r1_bio->sector +
> conf->mirrors[i].rdev->data_offset);
> - mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
> + mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
> mbio->bi_end_io = raid1_end_write_request;
> bio_set_op_attrs(mbio, op, do_flush_fua | do_sync);
> mbio->bi_private = r1_bio;
>
> atomic_inc(&r1_bio->remaining);
>
> + if (mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
> + mbio, disk_devt(mddev->gendisk),
> + r1_bio->sector);
> + /* flush_pending_writes() needs access to the rdev so...*/
> + mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
> +
> cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
> if (cb)
> plug = container_of(cb, struct raid1_plug_cb, cb);
> @@ -2290,6 +2303,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
> struct bio *bio;
> char b[BDEVNAME_SIZE];
> struct md_rdev *rdev;
> + dev_t bio_dev;
> + sector_t bio_sector;
>
> clear_bit(R1BIO_ReadError, &r1_bio->state);
> /* we got a read error. Maybe the drive is bad. Maybe just
> @@ -2303,6 +2318,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>
> bio = r1_bio->bios[r1_bio->read_disk];
> bdevname(bio->bi_bdev, b);
> + bio_dev = bio->bi_bdev->bd_dev;
> + bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector;
> bio_put(bio);
> r1_bio->bios[r1_bio->read_disk] = NULL;
>
> @@ -2353,6 +2370,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
> else
> mbio->bi_phys_segments++;
> spin_unlock_irq(&conf->device_lock);
> + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
> + bio, bio_dev, bio_sector);
> generic_make_request(bio);
> bio = NULL;
>
> @@ -2367,8 +2386,11 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
> sectors_handled;
>
> goto read_more;
> - } else
> + } else {
> + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
> + bio, bio_dev, bio_sector);
> generic_make_request(bio);
> + }
> }
> }
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 5290be3d5c26..c63041ec9415 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -25,6 +25,7 @@
> #include <linux/seq_file.h>
> #include <linux/ratelimit.h>
> #include <linux/kthread.h>
> +#include <trace/events/block.h>
> #include "md.h"
> #include "raid10.h"
> #include "raid0.h"
> @@ -1165,6 +1166,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
> bio_set_op_attrs(read_bio, op, do_sync);
> read_bio->bi_private = r10_bio;
>
> + if (mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
> + read_bio, disk_devt(mddev->gendisk),
> + r10_bio->sector);
> if (max_sectors < r10_bio->sectors) {
> /* Could not read all from this device, so we will
> * need another r10_bio.
> @@ -1367,11 +1372,17 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
> mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
> choose_data_offset(r10_bio,
> rdev));
> - mbio->bi_bdev = (void*)rdev;
> + mbio->bi_bdev = rdev->bdev;
> mbio->bi_end_io = raid10_end_write_request;
> bio_set_op_attrs(mbio, op, do_sync | do_fua);
> mbio->bi_private = r10_bio;
>
> + if (conf->mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
> + mbio, disk_devt(conf->mddev->gendisk),
> + r10_bio->sector);
> + mbio->bi_bdev = (void*)rdev;
> +
> atomic_inc(&r10_bio->remaining);
>
> cb = blk_check_plugged(raid10_unplug, mddev,
> @@ -1409,11 +1420,17 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
> mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
> choose_data_offset(
> r10_bio, rdev));
> - mbio->bi_bdev = (void*)rdev;
> + mbio->bi_bdev = rdev->bdev;
> mbio->bi_end_io = raid10_end_write_request;
> bio_set_op_attrs(mbio, op, do_sync | do_fua);
> mbio->bi_private = r10_bio;
>
> + if (conf->mddev->gendisk)
> + trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
> + mbio, disk_devt(conf->mddev->gendisk),
> + r10_bio->sector);
> + mbio->bi_bdev = (void*)rdev;
> +
> atomic_inc(&r10_bio->remaining);
> spin_lock_irqsave(&conf->device_lock, flags);
> bio_list_add(&conf->pending_bio_list, mbio);
> @@ -2496,6 +2513,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
> char b[BDEVNAME_SIZE];
> unsigned long do_sync;
> int max_sectors;
> + dev_t bio_dev;
> + sector_t bio_last_sector;
>
> /* we got a read error. Maybe the drive is bad. Maybe just
> * the block and we can fix it.
> @@ -2507,6 +2526,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
> */
> bio = r10_bio->devs[slot].bio;
> bdevname(bio->bi_bdev, b);
> + bio_dev = bio->bi_bdev->bd_dev;
> + bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors;
> bio_put(bio);
> r10_bio->devs[slot].bio = NULL;
>
> @@ -2546,6 +2567,10 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
> bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
> bio->bi_private = r10_bio;
> bio->bi_end_io = raid10_end_read_request;
> + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
> + bio, bio_dev,
> + bio_last_sector - r10_bio->sectors);
> +
> if (max_sectors < r10_bio->sectors) {
> /* Drat - have to split this up more */
> struct bio *mbio = r10_bio->master_bio;
> --
> 2.10.2
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] md: add block tracing for bio_remapping
2016-11-18 17:26 ` Shaohua Li
@ 2016-11-18 17:50 ` Shaohua Li
2016-11-21 4:00 ` NeilBrown
0 siblings, 1 reply; 4+ messages in thread
From: Shaohua Li @ 2016-11-18 17:50 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid
On Fri, Nov 18, 2016 at 09:26:22AM -0800, Shaohua Li wrote:
> On Fri, Nov 18, 2016 at 01:22:04PM +1100, Neil Brown wrote:
> >
> > The block tracing infrastructure (accessed with blktrace/blkparse)
> > supports the tracing of mapping bios from one device to another.
> > This is currently used when a bio in a partition is mapped to the
> > whole device, when bios are mapped by dm, and for mapping in md/raid5.
> > Other md personalities do not include this tracing yet, so add it.
> >
> > When a read-error is detected we redirect the request to a different device.
> > This could justifiably be seen as a new mapping for the originial bio,
> > or a secondary mapping for the bio that errors. This patch uses
> > the second option.
> >
> > When md is used under dm-raid, the mappings are not traced as we do
> > not have access to the block device number of the parent.
>
> thanks, applied patch 1, 3, 4.
BTW, I added below patch
commit 504634f60f463e73e7d58c6810a04437da942dba
Author: Shaohua Li <shli@fb.com>
Date: Fri Nov 18 09:44:08 2016 -0800
md: add blktrace event for writes to superblock
superblock write is an expensive operation. With raid5-cache, it can be called
regularly. Tracing to help performance debug.
Signed-off-by: Shaohua Li <shli@fb.com>
Cc: NeilBrown <neilb@suse.com>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1f1c7f0..d3cef77 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -64,6 +64,7 @@
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
#include <linux/slab.h>
+#include <trace/events/block.h>
#include "md.h"
#include "bitmap.h"
#include "md-cluster.h"
@@ -2403,6 +2404,8 @@ void md_update_sb(struct mddev *mddev, int force_change)
pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
mdname(mddev), mddev->in_sync);
+ if (mddev->queue)
+ blk_add_trace_msg(mddev->queue, "md md_update_sb");
bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2] md: add block tracing for bio_remapping
2016-11-18 17:50 ` Shaohua Li
@ 2016-11-21 4:00 ` NeilBrown
0 siblings, 0 replies; 4+ messages in thread
From: NeilBrown @ 2016-11-21 4:00 UTC (permalink / raw)
To: Shaohua Li; +Cc: linux-raid
[-- Attachment #1: Type: text/plain, Size: 2244 bytes --]
On Sat, Nov 19 2016, Shaohua Li wrote:
> On Fri, Nov 18, 2016 at 09:26:22AM -0800, Shaohua Li wrote:
>> On Fri, Nov 18, 2016 at 01:22:04PM +1100, Neil Brown wrote:
>> >
>> > The block tracing infrastructure (accessed with blktrace/blkparse)
>> > supports the tracing of mapping bios from one device to another.
>> > This is currently used when a bio in a partition is mapped to the
>> > whole device, when bios are mapped by dm, and for mapping in md/raid5.
>> > Other md personalities do not include this tracing yet, so add it.
>> >
>> > When a read-error is detected we redirect the request to a different device.
>> > This could justifiably be seen as a new mapping for the originial bio,
>> > or a secondary mapping for the bio that errors. This patch uses
>> > the second option.
>> >
>> > When md is used under dm-raid, the mappings are not traced as we do
>> > not have access to the block device number of the parent.
>>
>> thanks, applied patch 1, 3, 4.
>
> BTW, I added below patch
>
Yes, that looks good. Thanks!
NeilBrown
>
> commit 504634f60f463e73e7d58c6810a04437da942dba
> Author: Shaohua Li <shli@fb.com>
> Date: Fri Nov 18 09:44:08 2016 -0800
>
> md: add blktrace event for writes to superblock
>
> superblock write is an expensive operation. With raid5-cache, it can be called
> regularly. Tracing to help performance debug.
>
> Signed-off-by: Shaohua Li <shli@fb.com>
> Cc: NeilBrown <neilb@suse.com>
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 1f1c7f0..d3cef77 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -64,6 +64,7 @@
> #include <linux/raid/md_p.h>
> #include <linux/raid/md_u.h>
> #include <linux/slab.h>
> +#include <trace/events/block.h>
> #include "md.h"
> #include "bitmap.h"
> #include "md-cluster.h"
> @@ -2403,6 +2404,8 @@ void md_update_sb(struct mddev *mddev, int force_change)
> pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
> mdname(mddev), mddev->in_sync);
>
> + if (mddev->queue)
> + blk_add_trace_msg(mddev->queue, "md md_update_sb");
> bitmap_update_sb(mddev->bitmap);
> rdev_for_each(rdev, mddev) {
> char b[BDEVNAME_SIZE];
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 800 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-11-21 4:00 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-18 2:22 [PATCH v2] md: add block tracing for bio_remapping NeilBrown
2016-11-18 17:26 ` Shaohua Li
2016-11-18 17:50 ` Shaohua Li
2016-11-21 4:00 ` NeilBrown
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.