* [PATCH 0/2] Reorganize raid*_make_request to clean up code
@ 2016-11-30 21:20 Robert LeBlanc
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-11-30 21:20 UTC (permalink / raw)
To: linux-raid; +Cc: Robert LeBlanc
In response to Christoph, I've broken the read and writes into their own
functions to make the code even cleaner. Since it is such a big change, I broke
up the commits into this series instead of creating a v2 of the previous patch.
Robert LeBlanc (2):
md/raid1: Refactor raid1_make_request
md/raid10: Refactor raid10_make_request
drivers/md/raid1.c | 244 +++++++++++++++++++++++-----------------------
drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
2 files changed, 265 insertions(+), 250 deletions(-)
--
2.10.2
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/2] md/raid1: Refactor raid1_make_request
2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
@ 2016-11-30 21:20 ` Robert LeBlanc
2016-12-01 16:04 ` John Stoffel
2016-12-02 0:36 ` NeilBrown
2016-11-30 21:20 ` [PATCH 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2 siblings, 2 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-11-30 21:20 UTC (permalink / raw)
To: linux-raid; +Cc: Robert LeBlanc
Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.
Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
drivers/md/raid1.c | 244 +++++++++++++++++++++++++++--------------------------
1 file changed, 126 insertions(+), 118 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 21dc00e..5aa0f89 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1032,17 +1032,96 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug);
}
-static void raid1_make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
+ struct r1bio *r1_bio)
{
struct r1conf *conf = mddev->private;
struct raid1_info *mirror;
- struct r1bio *r1_bio;
struct bio *read_bio;
+ struct bitmap *bitmap = mddev->bitmap;
+ const int op = bio_op(bio);
+ const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+ int sectors_handled;
+ int max_sectors;
+ int rdisk;
+
+read_again:
+ rdisk = read_balance(conf, r1_bio, &max_sectors);
+
+ if (rdisk < 0) {
+ /* couldn't find anywhere to read from */
+ raid_end_bio_io(r1_bio);
+ return;
+ }
+ mirror = conf->mirrors + rdisk;
+
+ if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+ bitmap) {
+ /* Reading from a write-mostly device must
+ * take care not to over-take any writes
+ * that are 'behind'
+ */
+ wait_event(bitmap->behind_wait,
+ atomic_read(&bitmap->behind_writes) == 0);
+ }
+ r1_bio->read_disk = rdisk;
+ r1_bio->start_next_window = 0;
+
+ read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
+ max_sectors);
+
+ r1_bio->bios[rdisk] = read_bio;
+
+ read_bio->bi_iter.bi_sector = r1_bio->sector +
+ mirror->rdev->data_offset;
+ read_bio->bi_bdev = mirror->rdev->bdev;
+ read_bio->bi_end_io = raid1_end_read_request;
+ bio_set_op_attrs(read_bio, op, do_sync);
+ read_bio->bi_private = r1_bio;
+
+ if (max_sectors < r1_bio->sectors) {
+ /* could not read all from this device, so we will
+ * need another r1_bio.
+ */
+
+ sectors_handled = (r1_bio->sector + max_sectors
+ - bio->bi_iter.bi_sector);
+ r1_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ /* Cannot call generic_make_request directly
+ * as that will be queued in __make_request
+ * and subsequent mempool_alloc might block waiting
+ * for it. So hand bio over to raid1d.
+ */
+ reschedule_retry(r1_bio);
+
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = bio;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+ r1_bio->state = 0;
+ r1_bio->mddev = mddev;
+ r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+ goto read_again;
+ } else
+ generic_make_request(read_bio);
+}
+
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+ struct r1bio *r1_bio,
+ sector_t start_next_window)
+{
+ struct r1conf *conf = mddev->private;
int i, disks;
- struct bitmap *bitmap;
+ struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
const int op = bio_op(bio);
- const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_opf &
(REQ_PREFLUSH | REQ_FUA));
@@ -1052,7 +1131,6 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
int first_clone;
int sectors_handled;
int max_sectors;
- sector_t start_next_window;
/*
* Register the new request and wait if the reconstruction
@@ -1062,12 +1140,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
md_write_start(mddev, bio); /* wait on superblock update early */
- if (bio_data_dir(bio) == WRITE &&
- ((bio_end_sector(bio) > mddev->suspend_lo &&
+ if ((bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
(mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
+ bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible
* wait.
@@ -1081,119 +1158,14 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
bio->bi_iter.bi_sector >= mddev->suspend_hi ||
(mddev_is_clustered(mddev) &&
!md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector, bio_end_sector(bio))))
+ bio->bi_iter.bi_sector,
+ bio_end_sector(bio))))
break;
schedule();
}
finish_wait(&conf->wait_barrier, &w);
}
- start_next_window = wait_barrier(conf, bio);
-
- bitmap = mddev->bitmap;
-
- /*
- * make_request() can abort the operation when read-ahead is being
- * used and no empty request is available.
- *
- */
- r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
- r1_bio->master_bio = bio;
- r1_bio->sectors = bio_sectors(bio);
- r1_bio->state = 0;
- r1_bio->mddev = mddev;
- r1_bio->sector = bio->bi_iter.bi_sector;
-
- /* We might need to issue multiple reads to different
- * devices if there are bad blocks around, so we keep
- * track of the number of reads in bio->bi_phys_segments.
- * If this is 0, there is only one r1_bio and no locking
- * will be needed when requests complete. If it is
- * non-zero, then it is the number of not-completed requests.
- */
- bio->bi_phys_segments = 0;
- bio_clear_flag(bio, BIO_SEG_VALID);
-
- if (rw == READ) {
- /*
- * read balancing logic:
- */
- int rdisk;
-
-read_again:
- rdisk = read_balance(conf, r1_bio, &max_sectors);
-
- if (rdisk < 0) {
- /* couldn't find anywhere to read from */
- raid_end_bio_io(r1_bio);
- return;
- }
- mirror = conf->mirrors + rdisk;
-
- if (test_bit(WriteMostly, &mirror->rdev->flags) &&
- bitmap) {
- /* Reading from a write-mostly device must
- * take care not to over-take any writes
- * that are 'behind'
- */
- wait_event(bitmap->behind_wait,
- atomic_read(&bitmap->behind_writes) == 0);
- }
- r1_bio->read_disk = rdisk;
- r1_bio->start_next_window = 0;
-
- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
- max_sectors);
-
- r1_bio->bios[rdisk] = read_bio;
-
- read_bio->bi_iter.bi_sector = r1_bio->sector +
- mirror->rdev->data_offset;
- read_bio->bi_bdev = mirror->rdev->bdev;
- read_bio->bi_end_io = raid1_end_read_request;
- bio_set_op_attrs(read_bio, op, do_sync);
- read_bio->bi_private = r1_bio;
-
- if (max_sectors < r1_bio->sectors) {
- /* could not read all from this device, so we will
- * need another r1_bio.
- */
-
- sectors_handled = (r1_bio->sector + max_sectors
- - bio->bi_iter.bi_sector);
- r1_bio->sectors = max_sectors;
- spin_lock_irq(&conf->device_lock);
- if (bio->bi_phys_segments == 0)
- bio->bi_phys_segments = 2;
- else
- bio->bi_phys_segments++;
- spin_unlock_irq(&conf->device_lock);
- /* Cannot call generic_make_request directly
- * as that will be queued in __make_request
- * and subsequent mempool_alloc might block waiting
- * for it. So hand bio over to raid1d.
- */
- reschedule_retry(r1_bio);
-
- r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
- r1_bio->master_bio = bio;
- r1_bio->sectors = bio_sectors(bio) - sectors_handled;
- r1_bio->state = 0;
- r1_bio->mddev = mddev;
- r1_bio->sector = bio->bi_iter.bi_sector +
- sectors_handled;
- goto read_again;
- } else
- generic_make_request(read_bio);
- return;
- }
-
- /*
- * WRITE:
- */
if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread);
wait_event(conf->wait_barrier,
@@ -1236,8 +1208,7 @@ read_again:
int bad_sectors;
int is_bad;
- is_bad = is_badblock(rdev, r1_bio->sector,
- max_sectors,
+ is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
/* mustn't write here until the bad block is
@@ -1325,7 +1296,8 @@ read_again:
continue;
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
+ bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
+ max_sectors);
if (first_clone) {
/* do behind I/O ?
@@ -1408,6 +1380,42 @@ read_again:
wake_up(&conf->wait_barrier);
}
+static void raid1_make_request(struct mddev *mddev, struct bio *bio)
+{
+ struct r1conf *conf = mddev->private;
+ struct r1bio *r1_bio;
+ sector_t start_next_window = wait_barrier(conf, bio);
+
+ /*
+ * make_request() can abort the operation when read-ahead is being
+ * used and no empty request is available.
+ *
+ */
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = bio;
+ r1_bio->sectors = bio_sectors(bio);
+ r1_bio->state = 0;
+ r1_bio->mddev = mddev;
+ r1_bio->sector = bio->bi_iter.bi_sector;
+
+ /* We might need to issue multiple reads to different
+ * devices if there are bad blocks around, so we keep
+ * track of the number of reads in bio->bi_phys_segments.
+ * If this is 0, there is only one r1_bio and no locking
+ * will be needed when requests complete. If it is
+ * non-zero, then it is the number of not-completed requests.
+ */
+ bio->bi_phys_segments = 0;
+ bio_clear_flag(bio, BIO_SEG_VALID);
+
+ if (bio_data_dir(bio) == READ) {
+ raid1_read_request(mddev, bio, r1_bio);
+ return;
+ }
+ raid1_write_request(mddev, bio, r1_bio, start_next_window);
+}
+
static void raid1_status(struct seq_file *seq, struct mddev *mddev)
{
struct r1conf *conf = mddev->private;
--
2.10.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/2] md/raid10: Refactor raid10_make_request
2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
@ 2016-11-30 21:20 ` Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2 siblings, 0 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-11-30 21:20 UTC (permalink / raw)
To: linux-raid; +Cc: Robert LeBlanc
Refactor raid10_make_request into seperate read and write functions to
clean up the code.
Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
1 file changed, 139 insertions(+), 132 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index be1a9fc..d3bd756 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1046,150 +1046,89 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug);
}
-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+ struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
- struct r10bio *r10_bio;
struct bio *read_bio;
- int i;
const int op = bio_op(bio);
- const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
- const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
- unsigned long flags;
- struct md_rdev *blocked_rdev;
- struct blk_plug_cb *cb;
- struct raid10_plug_cb *plug = NULL;
int sectors_handled;
int max_sectors;
- int sectors;
-
- md_write_start(mddev, bio);
-
- /*
- * Register the new request and wait if the reconstruction
- * thread has put up a bar for new requests.
- * Continue immediately if no resync is active currently.
- */
- wait_barrier(conf);
-
- sectors = bio_sectors(bio);
- while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- bio->bi_iter.bi_sector < conf->reshape_progress &&
- bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
- /* IO spans the reshape position. Need to wait for
- * reshape to pass
- */
- allow_barrier(conf);
- wait_event(conf->wait_barrier,
- conf->reshape_progress <= bio->bi_iter.bi_sector ||
- conf->reshape_progress >= bio->bi_iter.bi_sector +
- sectors);
- wait_barrier(conf);
- }
- if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- bio_data_dir(bio) == WRITE &&
- (mddev->reshape_backwards
- ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
- bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
- : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
- bio->bi_iter.bi_sector < conf->reshape_progress))) {
- /* Need to update reshape_position in metadata */
- mddev->reshape_position = conf->reshape_progress;
- set_mask_bits(&mddev->flags, 0,
- BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
- md_wakeup_thread(mddev->thread);
- wait_event(mddev->sb_wait,
- !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+ struct md_rdev *rdev;
+ int slot;
- conf->reshape_safe = mddev->reshape_position;
+read_again:
+ rdev = read_balance(conf, r10_bio, &max_sectors);
+ if (!rdev) {
+ raid_end_bio_io(r10_bio);
+ return;
}
+ slot = r10_bio->read_slot;
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
- r10_bio->master_bio = bio;
- r10_bio->sectors = sectors;
+ read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
+ max_sectors);
- r10_bio->mddev = mddev;
- r10_bio->sector = bio->bi_iter.bi_sector;
- r10_bio->state = 0;
+ r10_bio->devs[slot].bio = read_bio;
+ r10_bio->devs[slot].rdev = rdev;
- /* We might need to issue multiple reads to different
- * devices if there are bad blocks around, so we keep
- * track of the number of reads in bio->bi_phys_segments.
- * If this is 0, there is only one r10_bio and no locking
- * will be needed when the request completes. If it is
- * non-zero, then it is the number of not-completed requests.
- */
- bio->bi_phys_segments = 0;
- bio_clear_flag(bio, BIO_SEG_VALID);
+ read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
+ choose_data_offset(r10_bio, rdev);
+ read_bio->bi_bdev = rdev->bdev;
+ read_bio->bi_end_io = raid10_end_read_request;
+ bio_set_op_attrs(read_bio, op, do_sync);
+ read_bio->bi_private = r10_bio;
- if (rw == READ) {
- /*
- * read balancing logic:
+ if (max_sectors < r10_bio->sectors) {
+ /* Could not read all from this device, so we will
+ * need another r10_bio.
*/
- struct md_rdev *rdev;
- int slot;
-
-read_again:
- rdev = read_balance(conf, r10_bio, &max_sectors);
- if (!rdev) {
- raid_end_bio_io(r10_bio);
- return;
- }
- slot = r10_bio->read_slot;
-
- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
- max_sectors);
-
- r10_bio->devs[slot].bio = read_bio;
- r10_bio->devs[slot].rdev = rdev;
-
- read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
- choose_data_offset(r10_bio, rdev);
- read_bio->bi_bdev = rdev->bdev;
- read_bio->bi_end_io = raid10_end_read_request;
- bio_set_op_attrs(read_bio, op, do_sync);
- read_bio->bi_private = r10_bio;
+ sectors_handled = (r10_bio->sector + max_sectors
+ - bio->bi_iter.bi_sector);
+ r10_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ /* Cannot call generic_make_request directly
+ * as that will be queued in __generic_make_request
+ * and subsequent mempool_alloc might block
+ * waiting for it. so hand bio over to raid10d.
+ */
+ reschedule_retry(r10_bio);
- if (max_sectors < r10_bio->sectors) {
- /* Could not read all from this device, so we will
- * need another r10_bio.
- */
- sectors_handled = (r10_bio->sector + max_sectors
- - bio->bi_iter.bi_sector);
- r10_bio->sectors = max_sectors;
- spin_lock_irq(&conf->device_lock);
- if (bio->bi_phys_segments == 0)
- bio->bi_phys_segments = 2;
- else
- bio->bi_phys_segments++;
- spin_unlock_irq(&conf->device_lock);
- /* Cannot call generic_make_request directly
- * as that will be queued in __generic_make_request
- * and subsequent mempool_alloc might block
- * waiting for it. so hand bio over to raid10d.
- */
- reschedule_retry(r10_bio);
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = bio_sectors(bio) - sectors_handled;
+ r10_bio->state = 0;
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+ goto read_again;
+ } else
+ generic_make_request(read_bio);
+ return;
+}
- r10_bio->master_bio = bio;
- r10_bio->sectors = bio_sectors(bio) - sectors_handled;
- r10_bio->state = 0;
- r10_bio->mddev = mddev;
- r10_bio->sector = bio->bi_iter.bi_sector +
- sectors_handled;
- goto read_again;
- } else
- generic_make_request(read_bio);
- return;
- }
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ struct r10bio *r10_bio)
+{
+ struct r10conf *conf = mddev->private;
+ int i;
+ const int op = bio_op(bio);
+ const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+ const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
+ unsigned long flags;
+ struct md_rdev *blocked_rdev;
+ struct blk_plug_cb *cb;
+ struct raid10_plug_cb *plug = NULL;
+ int sectors_handled;
+ int max_sectors;
+ md_write_start(mddev, bio);
- /*
- * WRITE:
- */
if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread);
wait_event(conf->wait_barrier,
@@ -1249,8 +1188,7 @@ retry_write:
int bad_sectors;
int is_bad;
- is_bad = is_badblock(rdev, dev_sector,
- max_sectors,
+ is_bad = is_badblock(rdev, dev_sector, max_sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
/* Mustn't write here until the bad block
@@ -1353,8 +1291,7 @@ retry_write:
r10_bio->devs[i].bio = mbio;
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
- choose_data_offset(r10_bio,
- rdev));
+ choose_data_offset(r10_bio, rdev));
mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1395,8 +1332,7 @@ retry_write:
r10_bio->devs[i].repl_bio = mbio;
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
- choose_data_offset(
- r10_bio, rdev));
+ choose_data_offset(r10_bio, rdev));
mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1434,6 +1370,77 @@ retry_write:
one_write_done(r10_bio);
}
+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+ struct r10conf *conf = mddev->private;
+ struct r10bio *r10_bio;
+ int sectors;
+
+ /*
+ * Register the new request and wait if the reconstruction
+ * thread has put up a bar for new requests.
+ * Continue immediately if no resync is active currently.
+ */
+ wait_barrier(conf);
+
+ sectors = bio_sectors(bio);
+ while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ bio->bi_iter.bi_sector < conf->reshape_progress &&
+ bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+ /* IO spans the reshape position. Need to wait for
+ * reshape to pass
+ */
+ allow_barrier(conf);
+ wait_event(conf->wait_barrier,
+ conf->reshape_progress <= bio->bi_iter.bi_sector ||
+ conf->reshape_progress >= bio->bi_iter.bi_sector +
+ sectors);
+ wait_barrier(conf);
+ }
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ bio_data_dir(bio) == WRITE &&
+ (mddev->reshape_backwards
+ ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+ bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+ : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+ bio->bi_iter.bi_sector < conf->reshape_progress))) {
+ /* Need to update reshape_position in metadata */
+ mddev->reshape_position = conf->reshape_progress;
+ set_mask_bits(&mddev->flags, 0,
+ BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+ md_wakeup_thread(mddev->thread);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+
+ conf->reshape_safe = mddev->reshape_position;
+ }
+
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = sectors;
+
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_iter.bi_sector;
+ r10_bio->state = 0;
+
+ /* We might need to issue multiple reads to different
+ * devices if there are bad blocks around, so we keep
+ * track of the number of reads in bio->bi_phys_segments.
+ * If this is 0, there is only one r10_bio and no locking
+ * will be needed when the request completes. If it is
+ * non-zero, then it is the number of not-completed requests.
+ */
+ bio->bi_phys_segments = 0;
+ bio_clear_flag(bio, BIO_SEG_VALID);
+
+ if (bio_data_dir(bio) == READ) {
+ raid10_read_request(mddev, bio, r10_bio);
+ return;
+ }
+ raid10_write_request(mddev, bio, r10_bio);
+}
+
static void raid10_make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
--
2.10.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
@ 2016-12-01 16:04 ` John Stoffel
2016-12-01 17:40 ` Robert LeBlanc
2016-12-02 0:36 ` NeilBrown
1 sibling, 1 reply; 11+ messages in thread
From: John Stoffel @ 2016-12-01 16:04 UTC (permalink / raw)
To: Robert LeBlanc; +Cc: linux-raid
>>>>> "Robert" == Robert LeBlanc <robert@leblancnet.us> writes:
Robert> Refactor raid1_make_request to make read and write code in their own
Robert> functions to clean up the code.
Robert> Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Robert> ---
Robert> drivers/md/raid1.c | 244 +++++++++++++++++++++++++++--------------------------
Robert> 1 file changed, 126 insertions(+), 118 deletions(-)
Robert> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
Robert> index 21dc00e..5aa0f89 100644
Robert> --- a/drivers/md/raid1.c
Robert> +++ b/drivers/md/raid1.c
Robert> @@ -1032,17 +1032,96 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
Robert> kfree(plug);
Robert> }
Robert> -static void raid1_make_request(struct mddev *mddev, struct bio * bio)
Robert> +static void raid1_read_request(struct mddev *mddev, struct bio *bio,
Robert> + struct r1bio *r1_bio)
Robert> {
Robert> struct r1conf *conf = mddev->private;
Robert> struct raid1_info *mirror;
Robert> - struct r1bio *r1_bio;
Robert> struct bio *read_bio;
Robert> + struct bitmap *bitmap = mddev->bitmap;
Robert> + const int op = bio_op(bio);
Robert> + const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
Robert> + int sectors_handled;
Robert> + int max_sectors;
Robert> + int rdisk;
Robert> +
Robert> +read_again:
Robert> + rdisk = read_balance(conf, r1_bio, &max_sectors);
Robert> +
Robert> + if (rdisk < 0) {
Robert> + /* couldn't find anywhere to read from */
Robert> + raid_end_bio_io(r1_bio);
Robert> + return;
Robert> + }
Robert> + mirror = conf->mirrors + rdisk;
Robert> +
Robert> + if (test_bit(WriteMostly, &mirror->rdev->flags) &&
Robert> + bitmap) {
Robert> + /* Reading from a write-mostly device must
Robert> + * take care not to over-take any writes
Robert> + * that are 'behind'
Robert> + */
Robert> + wait_event(bitmap->behind_wait,
Robert> + atomic_read(&bitmap->behind_writes) == 0);
Robert> + }
Robert> + r1_bio->read_disk = rdisk;
Robert> + r1_bio->start_next_window = 0;
Robert> +
Robert> + read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
Robert> + bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
Robert> + max_sectors);
Robert> +
Robert> + r1_bio->bios[rdisk] = read_bio;
Robert> +
Robert> + read_bio->bi_iter.bi_sector = r1_bio->sector +
Robert> + mirror->rdev->data_offset;
Robert> + read_bio->bi_bdev = mirror->rdev->bdev;
Robert> + read_bio->bi_end_io = raid1_end_read_request;
Robert> + bio_set_op_attrs(read_bio, op, do_sync);
Robert> + read_bio->bi_private = r1_bio;
Robert> +
Robert> + if (max_sectors < r1_bio->sectors) {
Robert> + /* could not read all from this device, so we will
Robert> + * need another r1_bio.
Robert> + */
Robert> +
Robert> + sectors_handled = (r1_bio->sector + max_sectors
Robert> + - bio->bi_iter.bi_sector);
Robert> + r1_bio->sectors = max_sectors;
Robert> + spin_lock_irq(&conf->device_lock);
Robert> + if (bio->bi_phys_segments == 0)
Robert> + bio->bi_phys_segments = 2;
Robert> + else
Robert> + bio->bi_phys_segments++;
Robert> + spin_unlock_irq(&conf->device_lock);
Robert> + /* Cannot call generic_make_request directly
Robert> + * as that will be queued in __make_request
Robert> + * and subsequent mempool_alloc might block waiting
Robert> + * for it. So hand bio over to raid1d.
Robert> + */
Robert> + reschedule_retry(r1_bio);
Robert> +
Robert> + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> +
Robert> + r1_bio->master_bio = bio;
Robert> + r1_bio->sectors = bio_sectors(bio) - sectors_handled;
Robert> + r1_bio->state = 0;
Robert> + r1_bio->mddev = mddev;
Robert> + r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
Robert> + goto read_again;
Robert> + } else
Robert> + generic_make_request(read_bio);
Robert> +}
Robert> +
Robert> +static void raid1_write_request(struct mddev *mddev, struct bio *bio,
Robert> + struct r1bio *r1_bio,
Robert> + sector_t start_next_window)
Robert> +{
Robert> + struct r1conf *conf = mddev->private;
Robert> int i, disks;
Robert> - struct bitmap *bitmap;
Robert> + struct bitmap *bitmap = mddev->bitmap;
Robert> unsigned long flags;
Robert> const int op = bio_op(bio);
Robert> - const int rw = bio_data_dir(bio);
Robert> const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
Robert> const unsigned long do_flush_fua = (bio->bi_opf &
Robert> (REQ_PREFLUSH | REQ_FUA));
Robert> @@ -1052,7 +1131,6 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
Robert> int first_clone;
Robert> int sectors_handled;
Robert> int max_sectors;
Robert> - sector_t start_next_window;
Robert> /*
Robert> * Register the new request and wait if the reconstruction
Robert> @@ -1062,12 +1140,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
Robert> md_write_start(mddev, bio); /* wait on superblock update early */
Robert> - if (bio_data_dir(bio) == WRITE &&
Robert> - ((bio_end_sector(bio) > mddev->suspend_lo &&
Robert> + if ((bio_end_sector(bio) > mddev->suspend_lo &&
bio-> bi_iter.bi_sector < mddev->suspend_hi) ||
Robert> (mddev_is_clustered(mddev) &&
md_cluster_ops-> area_resyncing(mddev, WRITE,
Robert> - bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
Robert> + bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
Robert> /* As the suspend_* range is controlled by
Robert> * userspace, we want an interruptible
Robert> * wait.
Robert> @@ -1081,119 +1158,14 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
bio-> bi_iter.bi_sector >= mddev->suspend_hi ||
Robert> (mddev_is_clustered(mddev) &&
Robert> !md_cluster_ops->area_resyncing(mddev, WRITE,
Robert> - bio->bi_iter.bi_sector, bio_end_sector(bio))))
Robert> + bio->bi_iter.bi_sector,
Robert> + bio_end_sector(bio))))
Robert> break;
Robert> schedule();
Robert> }
Robert> finish_wait(&conf->wait_barrier, &w);
Robert> }
Robert> - start_next_window = wait_barrier(conf, bio);
Robert> -
Robert> - bitmap = mddev->bitmap;
Robert> -
Robert> - /*
Robert> - * make_request() can abort the operation when read-ahead is being
Robert> - * used and no empty request is available.
Robert> - *
Robert> - */
Robert> - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> -
Robert> - r1_bio->master_bio = bio;
Robert> - r1_bio->sectors = bio_sectors(bio);
Robert> - r1_bio->state = 0;
Robert> - r1_bio->mddev = mddev;
Robert> - r1_bio->sector = bio->bi_iter.bi_sector;
Robert> -
Robert> - /* We might need to issue multiple reads to different
Robert> - * devices if there are bad blocks around, so we keep
Robert> - * track of the number of reads in bio->bi_phys_segments.
Robert> - * If this is 0, there is only one r1_bio and no locking
Robert> - * will be needed when requests complete. If it is
Robert> - * non-zero, then it is the number of not-completed requests.
Robert> - */
Robert> - bio->bi_phys_segments = 0;
Robert> - bio_clear_flag(bio, BIO_SEG_VALID);
Robert> -
Robert> - if (rw == READ) {
Robert> - /*
Robert> - * read balancing logic:
Robert> - */
Robert> - int rdisk;
Robert> -
Robert> -read_again:
Robert> - rdisk = read_balance(conf, r1_bio, &max_sectors);
Robert> -
Robert> - if (rdisk < 0) {
Robert> - /* couldn't find anywhere to read from */
Robert> - raid_end_bio_io(r1_bio);
Robert> - return;
Robert> - }
Robert> - mirror = conf->mirrors + rdisk;
Robert> -
Robert> - if (test_bit(WriteMostly, &mirror->rdev->flags) &&
Robert> - bitmap) {
Robert> - /* Reading from a write-mostly device must
Robert> - * take care not to over-take any writes
Robert> - * that are 'behind'
Robert> - */
Robert> - wait_event(bitmap->behind_wait,
Robert> - atomic_read(&bitmap->behind_writes) == 0);
Robert> - }
Robert> - r1_bio->read_disk = rdisk;
Robert> - r1_bio->start_next_window = 0;
Robert> -
Robert> - read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
Robert> - bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
Robert> - max_sectors);
Robert> -
Robert> - r1_bio->bios[rdisk] = read_bio;
Robert> -
Robert> - read_bio->bi_iter.bi_sector = r1_bio->sector +
Robert> - mirror->rdev->data_offset;
Robert> - read_bio->bi_bdev = mirror->rdev->bdev;
Robert> - read_bio->bi_end_io = raid1_end_read_request;
Robert> - bio_set_op_attrs(read_bio, op, do_sync);
Robert> - read_bio->bi_private = r1_bio;
Robert> -
Robert> - if (max_sectors < r1_bio->sectors) {
Robert> - /* could not read all from this device, so we will
Robert> - * need another r1_bio.
Robert> - */
Robert> -
Robert> - sectors_handled = (r1_bio->sector + max_sectors
Robert> - - bio->bi_iter.bi_sector);
Robert> - r1_bio->sectors = max_sectors;
Robert> - spin_lock_irq(&conf->device_lock);
Robert> - if (bio->bi_phys_segments == 0)
Robert> - bio->bi_phys_segments = 2;
Robert> - else
Robert> - bio->bi_phys_segments++;
Robert> - spin_unlock_irq(&conf->device_lock);
Robert> - /* Cannot call generic_make_request directly
Robert> - * as that will be queued in __make_request
Robert> - * and subsequent mempool_alloc might block waiting
Robert> - * for it. So hand bio over to raid1d.
Robert> - */
Robert> - reschedule_retry(r1_bio);
Robert> -
Robert> - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> -
Robert> - r1_bio->master_bio = bio;
Robert> - r1_bio->sectors = bio_sectors(bio) - sectors_handled;
Robert> - r1_bio->state = 0;
Robert> - r1_bio->mddev = mddev;
Robert> - r1_bio->sector = bio->bi_iter.bi_sector +
Robert> - sectors_handled;
Robert> - goto read_again;
Robert> - } else
Robert> - generic_make_request(read_bio);
Robert> - return;
Robert> - }
Robert> -
Robert> - /*
Robert> - * WRITE:
Robert> - */
Robert> if (conf->pending_count >= max_queued_requests) {
Robert> md_wakeup_thread(mddev->thread);
Robert> wait_event(conf->wait_barrier,
Robert> @@ -1236,8 +1208,7 @@ read_again:
Robert> int bad_sectors;
Robert> int is_bad;
Robert> - is_bad = is_badblock(rdev, r1_bio->sector,
Robert> - max_sectors,
Robert> + is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
Robert> &first_bad, &bad_sectors);
Robert> if (is_bad < 0) {
Robert> /* mustn't write here until the bad block is
Robert> @@ -1325,7 +1296,8 @@ read_again:
Robert> continue;
Robert> mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
Robert> - bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
Robert> + bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
Robert> + max_sectors);
Robert> if (first_clone) {
Robert> /* do behind I/O ?
Robert> @@ -1408,6 +1380,42 @@ read_again:
Robert> wake_up(&conf->wait_barrier);
Robert> }
Robert> +static void raid1_make_request(struct mddev *mddev, struct bio *bio)
Robert> +{
Robert> + struct r1conf *conf = mddev->private;
Robert> + struct r1bio *r1_bio;
Robert> + sector_t start_next_window = wait_barrier(conf, bio);
Robert> +
Robert> + /*
Robert> + * make_request() can abort the operation when read-ahead is being
Robert> + * used and no empty request is available.
Robert> + *
Robert> + */
Robert> + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> +
Robert> + r1_bio->master_bio = bio;
Robert> + r1_bio->sectors = bio_sectors(bio);
Robert> + r1_bio->state = 0;
Robert> + r1_bio->mddev = mddev;
Robert> + r1_bio->sector = bio->bi_iter.bi_sector;
Robert> +
Robert> + /* We might need to issue multiple reads to different
Robert> + * devices if there are bad blocks around, so we keep
Robert> + * track of the number of reads in bio->bi_phys_segments.
Robert> + * If this is 0, there is only one r1_bio and no locking
Robert> + * will be needed when requests complete. If it is
Robert> + * non-zero, then it is the number of not-completed requests.
Robert> + */
Robert> + bio->bi_phys_segments = 0;
Robert> + bio_clear_flag(bio, BIO_SEG_VALID);
Robert> +
Robert> + if (bio_data_dir(bio) == READ) {
Robert> + raid1_read_request(mddev, bio, r1_bio);
Robert> + return;
Robert> + }
Robert> + raid1_write_request(mddev, bio, r1_bio, start_next_window);
Robert> +}
Maybe this is pedantic, but why not have an 'else' instead of the
return to make it more clear? If someone (like me :-) missed the
return on the quick read through, it's going to be some confusion.
if (bio_data_dir(bio) == READ)
raid1_read_request(mddev, bui, r1_bio);
else
raid1_write_request(mddev, bio, r1_bio, start_next_window);
seems cleaner.
Also, why are the calls different with the start_next_window parameter
added in? Sorry for being dense, trying to understand the code
better...
John
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
2016-12-01 16:04 ` John Stoffel
@ 2016-12-01 17:40 ` Robert LeBlanc
2016-12-01 22:00 ` John Stoffel
0 siblings, 1 reply; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-01 17:40 UTC (permalink / raw)
To: John Stoffel; +Cc: linux-raid
On Thu, Dec 1, 2016 at 9:04 AM, John Stoffel <john@stoffel.org> wrote:
> Maybe this is pedantic, but why not have an 'else' instead of the
> return to make it more clear? If someone (like me :-) missed the
> return on the quick read through, it's going to be some confusion.
>
> if (bio_data_dir(bio) == READ)
> raid1_read_request(mddev, bui, r1_bio);
> else
> raid1_write_request(mddev, bio, r1_bio, start_next_window);
>
> seems cleaner.
Yeah, I agree that is a bit cleaner, I was just following the previous
style. I'll change it to what you propose.
> Also, why are the calls different with the start_next_window parameter
> added in? Sorry for being dense, trying to understand the code
> better...
start_next_windows is set by wait_barrier() and is only non-zero for
writes and the return value of zero is never used for reads. I tried
to move that code into the write branch, but I could not unload the
module due to conf->nr_pending == conf->queued+extra failing in
wait_event_lock_irq_cmd() in freeze_array(). Each read was
decrementing conf->nr_pending and since wait_barrier() was not being
called on reads, conf->nr_pending was very negative and would not
return to zero. In order to refactor that out completely would require
a lot more work and changes. I'd need to do more research into how
much the spin_locks are needed for reading when there is recovery
going on before I could start to try and refactor that. The other
option is to break wait_barrier() into two functions, one that gets
called for both read and write and the other for just the writes. I
can do the latter pretty easily if that is a desired direction.
----------------
Robert LeBlanc
PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
2016-12-01 17:40 ` Robert LeBlanc
@ 2016-12-01 22:00 ` John Stoffel
0 siblings, 0 replies; 11+ messages in thread
From: John Stoffel @ 2016-12-01 22:00 UTC (permalink / raw)
To: Robert LeBlanc; +Cc: John Stoffel, linux-raid
>>>>> "Robert" == Robert LeBlanc <robert@leblancnet.us> writes:
Robert> On Thu, Dec 1, 2016 at 9:04 AM, John Stoffel <john@stoffel.org> wrote:
>> Maybe this is pedantic, but why not have an 'else' instead of the
>> return to make it more clear? If someone (like me :-) missed the
>> return on the quick read through, it's going to be some confusion.
>>
>> if (bio_data_dir(bio) == READ)
>> raid1_read_request(mddev, bui, r1_bio);
>> else
>> raid1_write_request(mddev, bio, r1_bio, start_next_window);
>>
>> seems cleaner.
Robert> Yeah, I agree that is a bit cleaner, I was just following the
Robert> previous style. I'll change it to what you propose.
It's not a big deal, whatever makes you happy.
>> Also, why are the calls different with the start_next_window parameter
>> added in? Sorry for being dense, trying to understand the code
>> better...
Robert> start_next_windows is set by wait_barrier() and is only
Robert> non-zero for writes and the return value of zero is never used
Robert> for reads. I tried to move that code into the write branch,
Robert> but I could not unload the module due to conf->nr_pending ==
Robert> conf->queued+extra failing in wait_event_lock_irq_cmd() in
Robert> freeze_array(). Each read was decrementing conf->nr_pending
Robert> and since wait_barrier() was not being called on reads,
Robert> conf->nr_pending was very negative and would not return to
Robert> zero. In order to refactor that out completely would require a
Robert> lot more work and changes. I'd need to do more research into
Robert> how much the spin_locks are needed for reading when there is
Robert> recovery going on before I could start to try and refactor
Robert> that. The other option is to break wait_barrier() into two
Robert> functions, one that gets called for both read and write and
Robert> the other for just the writes. I can do the latter pretty
Robert> easily if that is a desired direction.
That second option might be the way to move forward. But it sounds
like a bigger refactoring than is really needed now.
John
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-12-01 16:04 ` John Stoffel
@ 2016-12-02 0:36 ` NeilBrown
1 sibling, 0 replies; 11+ messages in thread
From: NeilBrown @ 2016-12-02 0:36 UTC (permalink / raw)
To: Robert LeBlanc, linux-raid
[-- Attachment #1: Type: text/plain, Size: 829 bytes --]
On Thu, Dec 01 2016, Robert LeBlanc wrote:
> Refactor raid1_make_request to make read and write code in their own
> functions to clean up the code.
There is a change in here that is more than just a refactoring.
You have moved the wait_barrier() call from after the checks on
->suspend_{lo,hi} to before.
If a write was sent to a suspended region, this will block any pending
resync until the suspended region is moved.
The previous code won't block resync in this case.
I cannot see that this would actually have any practical significance,
as I don't think the suspend_hi/lo settings are used at all for raid1.
I'm less certain about the ->area_resyncing() test, though that probably
isn't a problem.
But it should at least be noted in the change-log that there is
potentially a functional change here.
Thanks,
NeilBrown
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 0/2] Reorganize raid*_make_request to clean up code
2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-11-30 21:20 ` [PATCH 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
@ 2016-12-02 3:30 ` Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
2 siblings, 2 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-02 3:30 UTC (permalink / raw)
To: linux-raid; +Cc: Robert LeBlanc
In response to Christoph, I've broken the read and writes into their own
functions to make the code even cleaner. Since it is such a big change, I broke
up the commits into this series instead of creating a v2 of the previous patch.
Changes since v1:
John Stoffel
* Changed to if/then instead of return in raid1_make_request
Neil Brown
* Moved wait_barrier into raid1_{read,write}_request so that it could be after
->suspend_{hi,lo}. This prevents a write blocking a resync until the suspend
region is moved.
Robert LeBlanc (2):
md/raid1: Refactor raid1_make_request
md/raid10: Refactor raid10_make_request
drivers/md/raid1.c | 241 ++++++++++++++++++++++++----------------------
drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
2 files changed, 264 insertions(+), 248 deletions(-)
--
2.10.2
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 1/2] md/raid1: Refactor raid1_make_request
2016-12-02 3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
@ 2016-12-02 3:30 ` Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
1 sibling, 0 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-02 3:30 UTC (permalink / raw)
To: linux-raid; +Cc: Robert LeBlanc
Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.
Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
drivers/md/raid1.c | 241 +++++++++++++++++++++++++++--------------------------
1 file changed, 125 insertions(+), 116 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 21dc00e..0ea6541 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1032,17 +1032,97 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug);
}
-static void raid1_make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
+ struct r1bio *r1_bio)
{
struct r1conf *conf = mddev->private;
struct raid1_info *mirror;
- struct r1bio *r1_bio;
struct bio *read_bio;
+ struct bitmap *bitmap = mddev->bitmap;
+ const int op = bio_op(bio);
+ const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+ int sectors_handled;
+ int max_sectors;
+ int rdisk;
+
+ wait_barrier(conf, bio);
+
+read_again:
+ rdisk = read_balance(conf, r1_bio, &max_sectors);
+
+ if (rdisk < 0) {
+ /* couldn't find anywhere to read from */
+ raid_end_bio_io(r1_bio);
+ return;
+ }
+ mirror = conf->mirrors + rdisk;
+
+ if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+ bitmap) {
+ /* Reading from a write-mostly device must
+ * take care not to over-take any writes
+ * that are 'behind'
+ */
+ wait_event(bitmap->behind_wait,
+ atomic_read(&bitmap->behind_writes) == 0);
+ }
+ r1_bio->read_disk = rdisk;
+ r1_bio->start_next_window = 0;
+
+ read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
+ max_sectors);
+
+ r1_bio->bios[rdisk] = read_bio;
+
+ read_bio->bi_iter.bi_sector = r1_bio->sector +
+ mirror->rdev->data_offset;
+ read_bio->bi_bdev = mirror->rdev->bdev;
+ read_bio->bi_end_io = raid1_end_read_request;
+ bio_set_op_attrs(read_bio, op, do_sync);
+ read_bio->bi_private = r1_bio;
+
+ if (max_sectors < r1_bio->sectors) {
+ /* could not read all from this device, so we will
+ * need another r1_bio.
+ */
+
+ sectors_handled = (r1_bio->sector + max_sectors
+ - bio->bi_iter.bi_sector);
+ r1_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ /* Cannot call generic_make_request directly
+ * as that will be queued in __make_request
+ * and subsequent mempool_alloc might block waiting
+ * for it. So hand bio over to raid1d.
+ */
+ reschedule_retry(r1_bio);
+
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = bio;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+ r1_bio->state = 0;
+ r1_bio->mddev = mddev;
+ r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+ goto read_again;
+ } else
+ generic_make_request(read_bio);
+}
+
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+ struct r1bio *r1_bio)
+{
+ struct r1conf *conf = mddev->private;
int i, disks;
- struct bitmap *bitmap;
+ struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
const int op = bio_op(bio);
- const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_opf &
(REQ_PREFLUSH | REQ_FUA));
@@ -1062,12 +1142,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
md_write_start(mddev, bio); /* wait on superblock update early */
- if (bio_data_dir(bio) == WRITE &&
- ((bio_end_sector(bio) > mddev->suspend_lo &&
+ if ((bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
(mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
+ bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible
* wait.
@@ -1081,119 +1160,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
bio->bi_iter.bi_sector >= mddev->suspend_hi ||
(mddev_is_clustered(mddev) &&
!md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector, bio_end_sector(bio))))
+ bio->bi_iter.bi_sector,
+ bio_end_sector(bio))))
break;
schedule();
}
finish_wait(&conf->wait_barrier, &w);
}
-
start_next_window = wait_barrier(conf, bio);
- bitmap = mddev->bitmap;
-
- /*
- * make_request() can abort the operation when read-ahead is being
- * used and no empty request is available.
- *
- */
- r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
- r1_bio->master_bio = bio;
- r1_bio->sectors = bio_sectors(bio);
- r1_bio->state = 0;
- r1_bio->mddev = mddev;
- r1_bio->sector = bio->bi_iter.bi_sector;
-
- /* We might need to issue multiple reads to different
- * devices if there are bad blocks around, so we keep
- * track of the number of reads in bio->bi_phys_segments.
- * If this is 0, there is only one r1_bio and no locking
- * will be needed when requests complete. If it is
- * non-zero, then it is the number of not-completed requests.
- */
- bio->bi_phys_segments = 0;
- bio_clear_flag(bio, BIO_SEG_VALID);
-
- if (rw == READ) {
- /*
- * read balancing logic:
- */
- int rdisk;
-
-read_again:
- rdisk = read_balance(conf, r1_bio, &max_sectors);
-
- if (rdisk < 0) {
- /* couldn't find anywhere to read from */
- raid_end_bio_io(r1_bio);
- return;
- }
- mirror = conf->mirrors + rdisk;
-
- if (test_bit(WriteMostly, &mirror->rdev->flags) &&
- bitmap) {
- /* Reading from a write-mostly device must
- * take care not to over-take any writes
- * that are 'behind'
- */
- wait_event(bitmap->behind_wait,
- atomic_read(&bitmap->behind_writes) == 0);
- }
- r1_bio->read_disk = rdisk;
- r1_bio->start_next_window = 0;
-
- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
- max_sectors);
-
- r1_bio->bios[rdisk] = read_bio;
-
- read_bio->bi_iter.bi_sector = r1_bio->sector +
- mirror->rdev->data_offset;
- read_bio->bi_bdev = mirror->rdev->bdev;
- read_bio->bi_end_io = raid1_end_read_request;
- bio_set_op_attrs(read_bio, op, do_sync);
- read_bio->bi_private = r1_bio;
-
- if (max_sectors < r1_bio->sectors) {
- /* could not read all from this device, so we will
- * need another r1_bio.
- */
-
- sectors_handled = (r1_bio->sector + max_sectors
- - bio->bi_iter.bi_sector);
- r1_bio->sectors = max_sectors;
- spin_lock_irq(&conf->device_lock);
- if (bio->bi_phys_segments == 0)
- bio->bi_phys_segments = 2;
- else
- bio->bi_phys_segments++;
- spin_unlock_irq(&conf->device_lock);
- /* Cannot call generic_make_request directly
- * as that will be queued in __make_request
- * and subsequent mempool_alloc might block waiting
- * for it. So hand bio over to raid1d.
- */
- reschedule_retry(r1_bio);
-
- r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
- r1_bio->master_bio = bio;
- r1_bio->sectors = bio_sectors(bio) - sectors_handled;
- r1_bio->state = 0;
- r1_bio->mddev = mddev;
- r1_bio->sector = bio->bi_iter.bi_sector +
- sectors_handled;
- goto read_again;
- } else
- generic_make_request(read_bio);
- return;
- }
-
- /*
- * WRITE:
- */
if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread);
wait_event(conf->wait_barrier,
@@ -1236,8 +1211,7 @@ read_again:
int bad_sectors;
int is_bad;
- is_bad = is_badblock(rdev, r1_bio->sector,
- max_sectors,
+ is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
/* mustn't write here until the bad block is
@@ -1325,7 +1299,8 @@ read_again:
continue;
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
+ bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
+ max_sectors);
if (first_clone) {
/* do behind I/O ?
@@ -1408,6 +1383,40 @@ read_again:
wake_up(&conf->wait_barrier);
}
+static void raid1_make_request(struct mddev *mddev, struct bio *bio)
+{
+ struct r1conf *conf = mddev->private;
+ struct r1bio *r1_bio;
+
+ /*
+ * make_request() can abort the operation when read-ahead is being
+ * used and no empty request is available.
+ *
+ */
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = bio;
+ r1_bio->sectors = bio_sectors(bio);
+ r1_bio->state = 0;
+ r1_bio->mddev = mddev;
+ r1_bio->sector = bio->bi_iter.bi_sector;
+
+ /* We might need to issue multiple reads to different
+ * devices if there are bad blocks around, so we keep
+ * track of the number of reads in bio->bi_phys_segments.
+ * If this is 0, there is only one r1_bio and no locking
+ * will be needed when requests complete. If it is
+ * non-zero, then it is the number of not-completed requests.
+ */
+ bio->bi_phys_segments = 0;
+ bio_clear_flag(bio, BIO_SEG_VALID);
+
+ if (bio_data_dir(bio) == READ)
+ raid1_read_request(mddev, bio, r1_bio);
+ else
+ raid1_write_request(mddev, bio, r1_bio);
+}
+
static void raid1_status(struct seq_file *seq, struct mddev *mddev)
{
struct r1conf *conf = mddev->private;
--
2.10.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 2/2] md/raid10: Refactor raid10_make_request
2016-12-02 3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
@ 2016-12-02 3:30 ` Robert LeBlanc
2016-12-02 22:02 ` Shaohua Li
1 sibling, 1 reply; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-02 3:30 UTC (permalink / raw)
To: linux-raid; +Cc: Robert LeBlanc
Refactor raid10_make_request into seperate read and write functions to
clean up the code.
Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
1 file changed, 139 insertions(+), 132 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index be1a9fc..d3bd756 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1046,150 +1046,89 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug);
}
-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+ struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
- struct r10bio *r10_bio;
struct bio *read_bio;
- int i;
const int op = bio_op(bio);
- const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
- const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
- unsigned long flags;
- struct md_rdev *blocked_rdev;
- struct blk_plug_cb *cb;
- struct raid10_plug_cb *plug = NULL;
int sectors_handled;
int max_sectors;
- int sectors;
-
- md_write_start(mddev, bio);
-
- /*
- * Register the new request and wait if the reconstruction
- * thread has put up a bar for new requests.
- * Continue immediately if no resync is active currently.
- */
- wait_barrier(conf);
-
- sectors = bio_sectors(bio);
- while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- bio->bi_iter.bi_sector < conf->reshape_progress &&
- bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
- /* IO spans the reshape position. Need to wait for
- * reshape to pass
- */
- allow_barrier(conf);
- wait_event(conf->wait_barrier,
- conf->reshape_progress <= bio->bi_iter.bi_sector ||
- conf->reshape_progress >= bio->bi_iter.bi_sector +
- sectors);
- wait_barrier(conf);
- }
- if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- bio_data_dir(bio) == WRITE &&
- (mddev->reshape_backwards
- ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
- bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
- : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
- bio->bi_iter.bi_sector < conf->reshape_progress))) {
- /* Need to update reshape_position in metadata */
- mddev->reshape_position = conf->reshape_progress;
- set_mask_bits(&mddev->flags, 0,
- BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
- md_wakeup_thread(mddev->thread);
- wait_event(mddev->sb_wait,
- !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+ struct md_rdev *rdev;
+ int slot;
- conf->reshape_safe = mddev->reshape_position;
+read_again:
+ rdev = read_balance(conf, r10_bio, &max_sectors);
+ if (!rdev) {
+ raid_end_bio_io(r10_bio);
+ return;
}
+ slot = r10_bio->read_slot;
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
- r10_bio->master_bio = bio;
- r10_bio->sectors = sectors;
+ read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
+ max_sectors);
- r10_bio->mddev = mddev;
- r10_bio->sector = bio->bi_iter.bi_sector;
- r10_bio->state = 0;
+ r10_bio->devs[slot].bio = read_bio;
+ r10_bio->devs[slot].rdev = rdev;
- /* We might need to issue multiple reads to different
- * devices if there are bad blocks around, so we keep
- * track of the number of reads in bio->bi_phys_segments.
- * If this is 0, there is only one r10_bio and no locking
- * will be needed when the request completes. If it is
- * non-zero, then it is the number of not-completed requests.
- */
- bio->bi_phys_segments = 0;
- bio_clear_flag(bio, BIO_SEG_VALID);
+ read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
+ choose_data_offset(r10_bio, rdev);
+ read_bio->bi_bdev = rdev->bdev;
+ read_bio->bi_end_io = raid10_end_read_request;
+ bio_set_op_attrs(read_bio, op, do_sync);
+ read_bio->bi_private = r10_bio;
- if (rw == READ) {
- /*
- * read balancing logic:
+ if (max_sectors < r10_bio->sectors) {
+ /* Could not read all from this device, so we will
+ * need another r10_bio.
*/
- struct md_rdev *rdev;
- int slot;
-
-read_again:
- rdev = read_balance(conf, r10_bio, &max_sectors);
- if (!rdev) {
- raid_end_bio_io(r10_bio);
- return;
- }
- slot = r10_bio->read_slot;
-
- read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
- max_sectors);
-
- r10_bio->devs[slot].bio = read_bio;
- r10_bio->devs[slot].rdev = rdev;
-
- read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
- choose_data_offset(r10_bio, rdev);
- read_bio->bi_bdev = rdev->bdev;
- read_bio->bi_end_io = raid10_end_read_request;
- bio_set_op_attrs(read_bio, op, do_sync);
- read_bio->bi_private = r10_bio;
+ sectors_handled = (r10_bio->sector + max_sectors
+ - bio->bi_iter.bi_sector);
+ r10_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ /* Cannot call generic_make_request directly
+ * as that will be queued in __generic_make_request
+ * and subsequent mempool_alloc might block
+ * waiting for it. so hand bio over to raid10d.
+ */
+ reschedule_retry(r10_bio);
- if (max_sectors < r10_bio->sectors) {
- /* Could not read all from this device, so we will
- * need another r10_bio.
- */
- sectors_handled = (r10_bio->sector + max_sectors
- - bio->bi_iter.bi_sector);
- r10_bio->sectors = max_sectors;
- spin_lock_irq(&conf->device_lock);
- if (bio->bi_phys_segments == 0)
- bio->bi_phys_segments = 2;
- else
- bio->bi_phys_segments++;
- spin_unlock_irq(&conf->device_lock);
- /* Cannot call generic_make_request directly
- * as that will be queued in __generic_make_request
- * and subsequent mempool_alloc might block
- * waiting for it. so hand bio over to raid10d.
- */
- reschedule_retry(r10_bio);
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
- r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = bio_sectors(bio) - sectors_handled;
+ r10_bio->state = 0;
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+ goto read_again;
+ } else
+ generic_make_request(read_bio);
+ return;
+}
- r10_bio->master_bio = bio;
- r10_bio->sectors = bio_sectors(bio) - sectors_handled;
- r10_bio->state = 0;
- r10_bio->mddev = mddev;
- r10_bio->sector = bio->bi_iter.bi_sector +
- sectors_handled;
- goto read_again;
- } else
- generic_make_request(read_bio);
- return;
- }
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+ struct r10bio *r10_bio)
+{
+ struct r10conf *conf = mddev->private;
+ int i;
+ const int op = bio_op(bio);
+ const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+ const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
+ unsigned long flags;
+ struct md_rdev *blocked_rdev;
+ struct blk_plug_cb *cb;
+ struct raid10_plug_cb *plug = NULL;
+ int sectors_handled;
+ int max_sectors;
+ md_write_start(mddev, bio);
- /*
- * WRITE:
- */
if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread);
wait_event(conf->wait_barrier,
@@ -1249,8 +1188,7 @@ retry_write:
int bad_sectors;
int is_bad;
- is_bad = is_badblock(rdev, dev_sector,
- max_sectors,
+ is_bad = is_badblock(rdev, dev_sector, max_sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
/* Mustn't write here until the bad block
@@ -1353,8 +1291,7 @@ retry_write:
r10_bio->devs[i].bio = mbio;
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
- choose_data_offset(r10_bio,
- rdev));
+ choose_data_offset(r10_bio, rdev));
mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1395,8 +1332,7 @@ retry_write:
r10_bio->devs[i].repl_bio = mbio;
mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
- choose_data_offset(
- r10_bio, rdev));
+ choose_data_offset(r10_bio, rdev));
mbio->bi_bdev = rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1434,6 +1370,77 @@ retry_write:
one_write_done(r10_bio);
}
+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+ struct r10conf *conf = mddev->private;
+ struct r10bio *r10_bio;
+ int sectors;
+
+ /*
+ * Register the new request and wait if the reconstruction
+ * thread has put up a bar for new requests.
+ * Continue immediately if no resync is active currently.
+ */
+ wait_barrier(conf);
+
+ sectors = bio_sectors(bio);
+ while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ bio->bi_iter.bi_sector < conf->reshape_progress &&
+ bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+ /* IO spans the reshape position. Need to wait for
+ * reshape to pass
+ */
+ allow_barrier(conf);
+ wait_event(conf->wait_barrier,
+ conf->reshape_progress <= bio->bi_iter.bi_sector ||
+ conf->reshape_progress >= bio->bi_iter.bi_sector +
+ sectors);
+ wait_barrier(conf);
+ }
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ bio_data_dir(bio) == WRITE &&
+ (mddev->reshape_backwards
+ ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+ bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+ : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+ bio->bi_iter.bi_sector < conf->reshape_progress))) {
+ /* Need to update reshape_position in metadata */
+ mddev->reshape_position = conf->reshape_progress;
+ set_mask_bits(&mddev->flags, 0,
+ BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+ md_wakeup_thread(mddev->thread);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+
+ conf->reshape_safe = mddev->reshape_position;
+ }
+
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = sectors;
+
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_iter.bi_sector;
+ r10_bio->state = 0;
+
+ /* We might need to issue multiple reads to different
+ * devices if there are bad blocks around, so we keep
+ * track of the number of reads in bio->bi_phys_segments.
+ * If this is 0, there is only one r10_bio and no locking
+ * will be needed when the request completes. If it is
+ * non-zero, then it is the number of not-completed requests.
+ */
+ bio->bi_phys_segments = 0;
+ bio_clear_flag(bio, BIO_SEG_VALID);
+
+ if (bio_data_dir(bio) == READ) {
+ raid10_read_request(mddev, bio, r10_bio);
+ return;
+ }
+ raid10_write_request(mddev, bio, r10_bio);
+}
+
static void raid10_make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
--
2.10.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/2] md/raid10: Refactor raid10_make_request
2016-12-02 3:30 ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
@ 2016-12-02 22:02 ` Shaohua Li
0 siblings, 0 replies; 11+ messages in thread
From: Shaohua Li @ 2016-12-02 22:02 UTC (permalink / raw)
To: Robert LeBlanc; +Cc: linux-raid
On Thu, Dec 01, 2016 at 08:30:08PM -0700, Robert LeBlanc wrote:
> Refactor raid10_make_request into seperate read and write functions to
> clean up the code.
>
> Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
> ---
Hi,
could you please resend the patches against my for-next branch? The two patches
don't apply.
> int bad_sectors;
> int is_bad;
>
> - is_bad = is_badblock(rdev, dev_sector,
> - max_sectors,
> + is_bad = is_badblock(rdev, dev_sector, max_sectors,
> &first_bad, &bad_sectors);
> if (is_bad < 0) {
> /* Mustn't write here until the bad block
> @@ -1353,8 +1291,7 @@ retry_write:
> r10_bio->devs[i].bio = mbio;
>
> mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
> - choose_data_offset(r10_bio,
> - rdev));
> + choose_data_offset(r10_bio, rdev));
> mbio->bi_bdev = rdev->bdev;
> mbio->bi_end_io = raid10_end_write_request;
> bio_set_op_attrs(mbio, op, do_sync | do_fua);
> @@ -1395,8 +1332,7 @@ retry_write:
> r10_bio->devs[i].repl_bio = mbio;
>
> mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
> - choose_data_offset(
> - r10_bio, rdev));
> + choose_data_offset(r10_bio, rdev));
> mbio->bi_bdev = rdev->bdev;
> mbio->bi_end_io = raid10_end_write_request;
> bio_set_op_attrs(mbio, op, do_sync | do_fua);
> @@ -1434,6 +1370,77 @@ retry_write:
> one_write_done(r10_bio);
> }
>
> +static void __make_request(struct mddev *mddev, struct bio *bio)
> +{
> + struct r10conf *conf = mddev->private;
> + struct r10bio *r10_bio;
> + int sectors;
> +
we do wait_barrier before md_write_start now. I'm not confortable with this.
Could you please add md_write_start here? A single line of code for write here
doesn't matter.
> + /*
> + * Register the new request and wait if the reconstruction
> + * thread has put up a bar for new requests.
> + * Continue immediately if no resync is active currently.
> + */
> + wait_barrier(conf);
> +
> + sectors = bio_sectors(bio);
> + while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
> + bio->bi_iter.bi_sector < conf->reshape_progress &&
> + bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
> + /* IO spans the reshape position. Need to wait for
> + * reshape to pass
> + */
> + allow_barrier(conf);
> + wait_event(conf->wait_barrier,
> + conf->reshape_progress <= bio->bi_iter.bi_sector ||
> + conf->reshape_progress >= bio->bi_iter.bi_sector +
> + sectors);
> + wait_barrier(conf);
> + }
> + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
> + bio_data_dir(bio) == WRITE &&
> + (mddev->reshape_backwards
> + ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
> + bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
> + : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
> + bio->bi_iter.bi_sector < conf->reshape_progress))) {
> + /* Need to update reshape_position in metadata */
> + mddev->reshape_position = conf->reshape_progress;
> + set_mask_bits(&mddev->flags, 0,
> + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
> + md_wakeup_thread(mddev->thread);
> + wait_event(mddev->sb_wait,
> + !test_bit(MD_CHANGE_PENDING, &mddev->flags));
> +
> + conf->reshape_safe = mddev->reshape_position;
> + }
this is write only and could be moved to write path.
> +
> + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
> +
> + r10_bio->master_bio = bio;
> + r10_bio->sectors = sectors;
> +
> + r10_bio->mddev = mddev;
> + r10_bio->sector = bio->bi_iter.bi_sector;
> + r10_bio->state = 0;
> +
> + /* We might need to issue multiple reads to different
> + * devices if there are bad blocks around, so we keep
> + * track of the number of reads in bio->bi_phys_segments.
> + * If this is 0, there is only one r10_bio and no locking
> + * will be needed when the request completes. If it is
> + * non-zero, then it is the number of not-completed requests.
> + */
> + bio->bi_phys_segments = 0;
> + bio_clear_flag(bio, BIO_SEG_VALID);
> +
> + if (bio_data_dir(bio) == READ) {
> + raid10_read_request(mddev, bio, r10_bio);
> + return;
> + }
Better do the same as raid1 here.
> + raid10_write_request(mddev, bio, r10_bio);
> +}
Thanks,
Shaohua
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2016-12-02 22:02 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-12-01 16:04 ` John Stoffel
2016-12-01 17:40 ` Robert LeBlanc
2016-12-01 22:00 ` John Stoffel
2016-12-02 0:36 ` NeilBrown
2016-11-30 21:20 ` [PATCH 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-12-02 3:30 ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
2016-12-02 22:02 ` Shaohua Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.