All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Reorganize raid*_make_request to clean up code
@ 2016-11-30 21:20 Robert LeBlanc
  2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-11-30 21:20 UTC (permalink / raw)
  To: linux-raid; +Cc: Robert LeBlanc

In response to Christoph, I've broken the read and writes into their own
functions to make the code even cleaner. Since it is such a big change, I broke
up the commits into this series instead of creating a v2 of the previous patch.

Robert LeBlanc (2):
  md/raid1: Refactor raid1_make_request
  md/raid10: Refactor raid10_make_request

 drivers/md/raid1.c  | 244 +++++++++++++++++++++++-----------------------
 drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
 2 files changed, 265 insertions(+), 250 deletions(-)

-- 
2.10.2


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] md/raid1: Refactor raid1_make_request
  2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
@ 2016-11-30 21:20 ` Robert LeBlanc
  2016-12-01 16:04   ` John Stoffel
  2016-12-02  0:36   ` NeilBrown
  2016-11-30 21:20 ` [PATCH 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
  2016-12-02  3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
  2 siblings, 2 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-11-30 21:20 UTC (permalink / raw)
  To: linux-raid; +Cc: Robert LeBlanc

Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
 drivers/md/raid1.c | 244 +++++++++++++++++++++++++++--------------------------
 1 file changed, 126 insertions(+), 118 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 21dc00e..5aa0f89 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1032,17 +1032,96 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void raid1_make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
+				 struct r1bio *r1_bio)
 {
 	struct r1conf *conf = mddev->private;
 	struct raid1_info *mirror;
-	struct r1bio *r1_bio;
 	struct bio *read_bio;
+	struct bitmap *bitmap = mddev->bitmap;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	int sectors_handled;
+	int max_sectors;
+	int rdisk;
+
+read_again:
+	rdisk = read_balance(conf, r1_bio, &max_sectors);
+
+	if (rdisk < 0) {
+		/* couldn't find anywhere to read from */
+		raid_end_bio_io(r1_bio);
+		return;
+	}
+	mirror = conf->mirrors + rdisk;
+
+	if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+	    bitmap) {
+		/* Reading from a write-mostly device must
+		 * take care not to over-take any writes
+		 * that are 'behind'
+		 */
+		wait_event(bitmap->behind_wait,
+			   atomic_read(&bitmap->behind_writes) == 0);
+	}
+	r1_bio->read_disk = rdisk;
+	r1_bio->start_next_window = 0;
+
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
+
+	r1_bio->bios[rdisk] = read_bio;
+
+	read_bio->bi_iter.bi_sector = r1_bio->sector +
+		mirror->rdev->data_offset;
+	read_bio->bi_bdev = mirror->rdev->bdev;
+	read_bio->bi_end_io = raid1_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	read_bio->bi_private = r1_bio;
+
+	if (max_sectors < r1_bio->sectors) {
+		/* could not read all from this device, so we will
+		 * need another r1_bio.
+		 */
+
+		sectors_handled = (r1_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r1_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		/* Cannot call generic_make_request directly
+		 * as that will be queued in __make_request
+		 * and subsequent mempool_alloc might block waiting
+		 * for it.  So hand bio over to raid1d.
+		 */
+		reschedule_retry(r1_bio);
+
+		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+		r1_bio->master_bio = bio;
+		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r1_bio->state = 0;
+		r1_bio->mddev = mddev;
+		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+}
+
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+				struct r1bio *r1_bio,
+				sector_t start_next_window)
+{
+	struct r1conf *conf = mddev->private;
 	int i, disks;
-	struct bitmap *bitmap;
+	struct bitmap *bitmap = mddev->bitmap;
 	unsigned long flags;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_opf &
 						(REQ_PREFLUSH | REQ_FUA));
@@ -1052,7 +1131,6 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 	int first_clone;
 	int sectors_handled;
 	int max_sectors;
-	sector_t start_next_window;
 
 	/*
 	 * Register the new request and wait if the reconstruction
@@ -1062,12 +1140,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	if (bio_data_dir(bio) == WRITE &&
-	    ((bio_end_sector(bio) > mddev->suspend_lo &&
+	if ((bio_end_sector(bio) > mddev->suspend_lo &&
 	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
 	    (mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
-		     bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
+		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
 		/* As the suspend_* range is controlled by
 		 * userspace, we want an interruptible
 		 * wait.
@@ -1081,119 +1158,14 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
 			    (mddev_is_clustered(mddev) &&
 			     !md_cluster_ops->area_resyncing(mddev, WRITE,
-				     bio->bi_iter.bi_sector, bio_end_sector(bio))))
+				     bio->bi_iter.bi_sector,
+				     bio_end_sector(bio))))
 				break;
 			schedule();
 		}
 		finish_wait(&conf->wait_barrier, &w);
 	}
 
-	start_next_window = wait_barrier(conf, bio);
-
-	bitmap = mddev->bitmap;
-
-	/*
-	 * make_request() can abort the operation when read-ahead is being
-	 * used and no empty request is available.
-	 *
-	 */
-	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-	r1_bio->master_bio = bio;
-	r1_bio->sectors = bio_sectors(bio);
-	r1_bio->state = 0;
-	r1_bio->mddev = mddev;
-	r1_bio->sector = bio->bi_iter.bi_sector;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r1_bio and no locking
-	 * will be needed when requests complete.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		int rdisk;
-
-read_again:
-		rdisk = read_balance(conf, r1_bio, &max_sectors);
-
-		if (rdisk < 0) {
-			/* couldn't find anywhere to read from */
-			raid_end_bio_io(r1_bio);
-			return;
-		}
-		mirror = conf->mirrors + rdisk;
-
-		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
-		    bitmap) {
-			/* Reading from a write-mostly device must
-			 * take care not to over-take any writes
-			 * that are 'behind'
-			 */
-			wait_event(bitmap->behind_wait,
-				   atomic_read(&bitmap->behind_writes) == 0);
-		}
-		r1_bio->read_disk = rdisk;
-		r1_bio->start_next_window = 0;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r1_bio->bios[rdisk] = read_bio;
-
-		read_bio->bi_iter.bi_sector = r1_bio->sector +
-			mirror->rdev->data_offset;
-		read_bio->bi_bdev = mirror->rdev->bdev;
-		read_bio->bi_end_io = raid1_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		read_bio->bi_private = r1_bio;
-
-		if (max_sectors < r1_bio->sectors) {
-			/* could not read all from this device, so we will
-			 * need another r1_bio.
-			 */
-
-			sectors_handled = (r1_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r1_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __make_request
-			 * and subsequent mempool_alloc might block waiting
-			 * for it.  So hand bio over to raid1d.
-			 */
-			reschedule_retry(r1_bio);
-
-			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-			r1_bio->master_bio = bio;
-			r1_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r1_bio->state = 0;
-			r1_bio->mddev = mddev;
-			r1_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		wait_event(conf->wait_barrier,
@@ -1236,8 +1208,7 @@ read_again:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, r1_bio->sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* mustn't write here until the bad block is
@@ -1325,7 +1296,8 @@ read_again:
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
+		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
+			 max_sectors);
 
 		if (first_clone) {
 			/* do behind I/O ?
@@ -1408,6 +1380,42 @@ read_again:
 	wake_up(&conf->wait_barrier);
 }
 
+static void raid1_make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r1conf *conf = mddev->private;
+	struct r1bio *r1_bio;
+	sector_t start_next_window = wait_barrier(conf, bio);
+
+	/*
+	 * make_request() can abort the operation when read-ahead is being
+	 * used and no empty request is available.
+	 *
+	 */
+	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+	r1_bio->master_bio = bio;
+	r1_bio->sectors = bio_sectors(bio);
+	r1_bio->state = 0;
+	r1_bio->mddev = mddev;
+	r1_bio->sector = bio->bi_iter.bi_sector;
+
+	/* We might need to issue multiple reads to different
+	 * devices if there are bad blocks around, so we keep
+	 * track of the number of reads in bio->bi_phys_segments.
+	 * If this is 0, there is only one r1_bio and no locking
+	 * will be needed when requests complete.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ) {
+		raid1_read_request(mddev, bio, r1_bio);
+		return;
+	}
+	raid1_write_request(mddev, bio, r1_bio, start_next_window);
+}
+
 static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r1conf *conf = mddev->private;
-- 
2.10.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] md/raid10: Refactor raid10_make_request
  2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
  2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
@ 2016-11-30 21:20 ` Robert LeBlanc
  2016-12-02  3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
  2 siblings, 0 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-11-30 21:20 UTC (permalink / raw)
  To: linux-raid; +Cc: Robert LeBlanc

Refactor raid10_make_request into seperate read and write functions to
clean up the code.

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
 drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
 1 file changed, 139 insertions(+), 132 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index be1a9fc..d3bd756 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1046,150 +1046,89 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+				struct r10bio *r10_bio)
 {
 	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
 	struct bio *read_bio;
-	int i;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
-	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
-	unsigned long flags;
-	struct md_rdev *blocked_rdev;
-	struct blk_plug_cb *cb;
-	struct raid10_plug_cb *plug = NULL;
 	int sectors_handled;
 	int max_sectors;
-	int sectors;
-
-	md_write_start(mddev, bio);
-
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-	wait_barrier(conf);
-
-	sectors = bio_sectors(bio);
-	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio->bi_iter.bi_sector < conf->reshape_progress &&
-	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/* IO spans the reshape position.  Need to wait for
-		 * reshape to pass
-		 */
-		allow_barrier(conf);
-		wait_event(conf->wait_barrier,
-			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
-			   conf->reshape_progress >= bio->bi_iter.bi_sector +
-			   sectors);
-		wait_barrier(conf);
-	}
-	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio_data_dir(bio) == WRITE &&
-	    (mddev->reshape_backwards
-	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
-		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
-	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
-		bio->bi_iter.bi_sector < conf->reshape_progress))) {
-		/* Need to update reshape_position in metadata */
-		mddev->reshape_position = conf->reshape_progress;
-		set_mask_bits(&mddev->flags, 0,
-			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait,
-			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+	struct md_rdev *rdev;
+	int slot;
 
-		conf->reshape_safe = mddev->reshape_position;
+read_again:
+	rdev = read_balance(conf, r10_bio, &max_sectors);
+	if (!rdev) {
+		raid_end_bio_io(r10_bio);
+		return;
 	}
+	slot = r10_bio->read_slot;
 
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-	r10_bio->master_bio = bio;
-	r10_bio->sectors = sectors;
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
 
-	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_iter.bi_sector;
-	r10_bio->state = 0;
+	r10_bio->devs[slot].bio = read_bio;
+	r10_bio->devs[slot].rdev = rdev;
 
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r10_bio and no locking
-	 * will be needed when the request completes.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
+	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
+		choose_data_offset(r10_bio, rdev);
+	read_bio->bi_bdev = rdev->bdev;
+	read_bio->bi_end_io = raid10_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	read_bio->bi_private = r10_bio;
 
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
+	if (max_sectors < r10_bio->sectors) {
+		/* Could not read all from this device, so we will
+		 * need another r10_bio.
 		 */
-		struct md_rdev *rdev;
-		int slot;
-
-read_again:
-		rdev = read_balance(conf, r10_bio, &max_sectors);
-		if (!rdev) {
-			raid_end_bio_io(r10_bio);
-			return;
-		}
-		slot = r10_bio->read_slot;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r10_bio->devs[slot].bio = read_bio;
-		r10_bio->devs[slot].rdev = rdev;
-
-		read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
-			choose_data_offset(r10_bio, rdev);
-		read_bio->bi_bdev = rdev->bdev;
-		read_bio->bi_end_io = raid10_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		read_bio->bi_private = r10_bio;
+		sectors_handled = (r10_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r10_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		/* Cannot call generic_make_request directly
+		 * as that will be queued in __generic_make_request
+		 * and subsequent mempool_alloc might block
+		 * waiting for it.  so hand bio over to raid10d.
+		 */
+		reschedule_retry(r10_bio);
 
-		if (max_sectors < r10_bio->sectors) {
-			/* Could not read all from this device, so we will
-			 * need another r10_bio.
-			 */
-			sectors_handled = (r10_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r10_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __generic_make_request
-			 * and subsequent mempool_alloc might block
-			 * waiting for it.  so hand bio over to raid10d.
-			 */
-			reschedule_retry(r10_bio);
+		r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
-			r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+		r10_bio->master_bio = bio;
+		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r10_bio->state = 0;
+		r10_bio->mddev = mddev;
+		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+	return;
+}
 
-			r10_bio->master_bio = bio;
-			r10_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r10_bio->state = 0;
-			r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+				 struct r10bio *r10_bio)
+{
+	struct r10conf *conf = mddev->private;
+	int i;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
+	unsigned long flags;
+	struct md_rdev *blocked_rdev;
+	struct blk_plug_cb *cb;
+	struct raid10_plug_cb *plug = NULL;
+	int sectors_handled;
+	int max_sectors;
+	md_write_start(mddev, bio);
 
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		wait_event(conf->wait_barrier,
@@ -1249,8 +1188,7 @@ retry_write:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, dev_sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, dev_sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* Mustn't write here until the bad block
@@ -1353,8 +1291,7 @@ retry_write:
 			r10_bio->devs[i].bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
-					   choose_data_offset(r10_bio,
-							      rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1395,8 +1332,7 @@ retry_write:
 			r10_bio->devs[i].repl_bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
-					   choose_data_offset(
-						   r10_bio, rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1434,6 +1370,77 @@ retry_write:
 	one_write_done(r10_bio);
 }
 
+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r10conf *conf = mddev->private;
+	struct r10bio *r10_bio;
+	int sectors;
+
+	/*
+	 * Register the new request and wait if the reconstruction
+	 * thread has put up a bar for new requests.
+	 * Continue immediately if no resync is active currently.
+	 */
+	wait_barrier(conf);
+
+	sectors = bio_sectors(bio);
+	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+		/* IO spans the reshape position.  Need to wait for
+		 * reshape to pass
+		 */
+		allow_barrier(conf);
+		wait_event(conf->wait_barrier,
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
+		wait_barrier(conf);
+	}
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio_data_dir(bio) == WRITE &&
+	    (mddev->reshape_backwards
+	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+		bio->bi_iter.bi_sector < conf->reshape_progress))) {
+		/* Need to update reshape_position in metadata */
+		mddev->reshape_position = conf->reshape_progress;
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+		md_wakeup_thread(mddev->thread);
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+
+		conf->reshape_safe = mddev->reshape_position;
+	}
+
+	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+	r10_bio->master_bio = bio;
+	r10_bio->sectors = sectors;
+
+	r10_bio->mddev = mddev;
+	r10_bio->sector = bio->bi_iter.bi_sector;
+	r10_bio->state = 0;
+
+	/* We might need to issue multiple reads to different
+	 * devices if there are bad blocks around, so we keep
+	 * track of the number of reads in bio->bi_phys_segments.
+	 * If this is 0, there is only one r10_bio and no locking
+	 * will be needed when the request completes.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ) {
+		raid10_read_request(mddev, bio, r10_bio);
+		return;
+	}
+	raid10_write_request(mddev, bio, r10_bio);
+}
+
 static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;
-- 
2.10.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
  2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
@ 2016-12-01 16:04   ` John Stoffel
  2016-12-01 17:40     ` Robert LeBlanc
  2016-12-02  0:36   ` NeilBrown
  1 sibling, 1 reply; 11+ messages in thread
From: John Stoffel @ 2016-12-01 16:04 UTC (permalink / raw)
  To: Robert LeBlanc; +Cc: linux-raid

>>>>> "Robert" == Robert LeBlanc <robert@leblancnet.us> writes:

Robert> Refactor raid1_make_request to make read and write code in their own
Robert> functions to clean up the code.

Robert> Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
Robert> ---
Robert>  drivers/md/raid1.c | 244 +++++++++++++++++++++++++++--------------------------
Robert>  1 file changed, 126 insertions(+), 118 deletions(-)

Robert> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
Robert> index 21dc00e..5aa0f89 100644
Robert> --- a/drivers/md/raid1.c
Robert> +++ b/drivers/md/raid1.c
Robert> @@ -1032,17 +1032,96 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
Robert>  	kfree(plug);
Robert>  }
 
Robert> -static void raid1_make_request(struct mddev *mddev, struct bio * bio)
Robert> +static void raid1_read_request(struct mddev *mddev, struct bio *bio,
Robert> +				 struct r1bio *r1_bio)
Robert>  {
Robert>  	struct r1conf *conf = mddev->private;
Robert>  	struct raid1_info *mirror;
Robert> -	struct r1bio *r1_bio;
Robert>  	struct bio *read_bio;
Robert> +	struct bitmap *bitmap = mddev->bitmap;
Robert> +	const int op = bio_op(bio);
Robert> +	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
Robert> +	int sectors_handled;
Robert> +	int max_sectors;
Robert> +	int rdisk;
Robert> +
Robert> +read_again:
Robert> +	rdisk = read_balance(conf, r1_bio, &max_sectors);
Robert> +
Robert> +	if (rdisk < 0) {
Robert> +		/* couldn't find anywhere to read from */
Robert> +		raid_end_bio_io(r1_bio);
Robert> +		return;
Robert> +	}
Robert> +	mirror = conf->mirrors + rdisk;
Robert> +
Robert> +	if (test_bit(WriteMostly, &mirror->rdev->flags) &&
Robert> +	    bitmap) {
Robert> +		/* Reading from a write-mostly device must
Robert> +		 * take care not to over-take any writes
Robert> +		 * that are 'behind'
Robert> +		 */
Robert> +		wait_event(bitmap->behind_wait,
Robert> +			   atomic_read(&bitmap->behind_writes) == 0);
Robert> +	}
Robert> +	r1_bio->read_disk = rdisk;
Robert> +	r1_bio->start_next_window = 0;
Robert> +
Robert> +	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
Robert> +	bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
Robert> +		 max_sectors);
Robert> +
Robert> +	r1_bio->bios[rdisk] = read_bio;
Robert> +
Robert> +	read_bio->bi_iter.bi_sector = r1_bio->sector +
Robert> +		mirror->rdev->data_offset;
Robert> +	read_bio->bi_bdev = mirror->rdev->bdev;
Robert> +	read_bio->bi_end_io = raid1_end_read_request;
Robert> +	bio_set_op_attrs(read_bio, op, do_sync);
Robert> +	read_bio->bi_private = r1_bio;
Robert> +
Robert> +	if (max_sectors < r1_bio->sectors) {
Robert> +		/* could not read all from this device, so we will
Robert> +		 * need another r1_bio.
Robert> +		 */
Robert> +
Robert> +		sectors_handled = (r1_bio->sector + max_sectors
Robert> +				   - bio->bi_iter.bi_sector);
Robert> +		r1_bio->sectors = max_sectors;
Robert> +		spin_lock_irq(&conf->device_lock);
Robert> +		if (bio->bi_phys_segments == 0)
Robert> +			bio->bi_phys_segments = 2;
Robert> +		else
Robert> +			bio->bi_phys_segments++;
Robert> +		spin_unlock_irq(&conf->device_lock);
Robert> +		/* Cannot call generic_make_request directly
Robert> +		 * as that will be queued in __make_request
Robert> +		 * and subsequent mempool_alloc might block waiting
Robert> +		 * for it.  So hand bio over to raid1d.
Robert> +		 */
Robert> +		reschedule_retry(r1_bio);
Robert> +
Robert> +		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> +
Robert> +		r1_bio->master_bio = bio;
Robert> +		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
Robert> +		r1_bio->state = 0;
Robert> +		r1_bio->mddev = mddev;
Robert> +		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
Robert> +		goto read_again;
Robert> +	} else
Robert> +		generic_make_request(read_bio);
Robert> +}
Robert> +
Robert> +static void raid1_write_request(struct mddev *mddev, struct bio *bio,
Robert> +				struct r1bio *r1_bio,
Robert> +				sector_t start_next_window)
Robert> +{
Robert> +	struct r1conf *conf = mddev->private;
Robert>  	int i, disks;
Robert> -	struct bitmap *bitmap;
Robert> +	struct bitmap *bitmap = mddev->bitmap;
Robert>  	unsigned long flags;
Robert>  	const int op = bio_op(bio);
Robert> -	const int rw = bio_data_dir(bio);
Robert>  	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
Robert>  	const unsigned long do_flush_fua = (bio->bi_opf &
Robert>  						(REQ_PREFLUSH | REQ_FUA));
Robert> @@ -1052,7 +1131,6 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
Robert>  	int first_clone;
Robert>  	int sectors_handled;
Robert>  	int max_sectors;
Robert> -	sector_t start_next_window;
 
Robert>  	/*
Robert>  	 * Register the new request and wait if the reconstruction
Robert> @@ -1062,12 +1140,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 
Robert>  	md_write_start(mddev, bio); /* wait on superblock update early */
 
Robert> -	if (bio_data_dir(bio) == WRITE &&
Robert> -	    ((bio_end_sector(bio) > mddev->suspend_lo &&
Robert> +	if ((bio_end_sector(bio) > mddev->suspend_lo &&
bio-> bi_iter.bi_sector < mddev->suspend_hi) ||
Robert>  	    (mddev_is_clustered(mddev) &&
md_cluster_ops-> area_resyncing(mddev, WRITE,
Robert> -		     bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
Robert> +		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
Robert>  		/* As the suspend_* range is controlled by
Robert>  		 * userspace, we want an interruptible
Robert>  		 * wait.
Robert> @@ -1081,119 +1158,14 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
bio-> bi_iter.bi_sector >= mddev->suspend_hi ||
Robert>  			    (mddev_is_clustered(mddev) &&
Robert>  			     !md_cluster_ops->area_resyncing(mddev, WRITE,
Robert> -				     bio->bi_iter.bi_sector, bio_end_sector(bio))))
Robert> +				     bio->bi_iter.bi_sector,
Robert> +				     bio_end_sector(bio))))
Robert>  				break;
Robert>  			schedule();
Robert>  		}
Robert>  		finish_wait(&conf->wait_barrier, &w);
Robert>  	}
 
Robert> -	start_next_window = wait_barrier(conf, bio);
Robert> -
Robert> -	bitmap = mddev->bitmap;
Robert> -
Robert> -	/*
Robert> -	 * make_request() can abort the operation when read-ahead is being
Robert> -	 * used and no empty request is available.
Robert> -	 *
Robert> -	 */
Robert> -	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> -
Robert> -	r1_bio->master_bio = bio;
Robert> -	r1_bio->sectors = bio_sectors(bio);
Robert> -	r1_bio->state = 0;
Robert> -	r1_bio->mddev = mddev;
Robert> -	r1_bio->sector = bio->bi_iter.bi_sector;
Robert> -
Robert> -	/* We might need to issue multiple reads to different
Robert> -	 * devices if there are bad blocks around, so we keep
Robert> -	 * track of the number of reads in bio->bi_phys_segments.
Robert> -	 * If this is 0, there is only one r1_bio and no locking
Robert> -	 * will be needed when requests complete.  If it is
Robert> -	 * non-zero, then it is the number of not-completed requests.
Robert> -	 */
Robert> -	bio->bi_phys_segments = 0;
Robert> -	bio_clear_flag(bio, BIO_SEG_VALID);
Robert> -
Robert> -	if (rw == READ) {
Robert> -		/*
Robert> -		 * read balancing logic:
Robert> -		 */
Robert> -		int rdisk;
Robert> -
Robert> -read_again:
Robert> -		rdisk = read_balance(conf, r1_bio, &max_sectors);
Robert> -
Robert> -		if (rdisk < 0) {
Robert> -			/* couldn't find anywhere to read from */
Robert> -			raid_end_bio_io(r1_bio);
Robert> -			return;
Robert> -		}
Robert> -		mirror = conf->mirrors + rdisk;
Robert> -
Robert> -		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
Robert> -		    bitmap) {
Robert> -			/* Reading from a write-mostly device must
Robert> -			 * take care not to over-take any writes
Robert> -			 * that are 'behind'
Robert> -			 */
Robert> -			wait_event(bitmap->behind_wait,
Robert> -				   atomic_read(&bitmap->behind_writes) == 0);
Robert> -		}
Robert> -		r1_bio->read_disk = rdisk;
Robert> -		r1_bio->start_next_window = 0;
Robert> -
Robert> -		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
Robert> -		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
Robert> -			 max_sectors);
Robert> -
Robert> -		r1_bio->bios[rdisk] = read_bio;
Robert> -
Robert> -		read_bio->bi_iter.bi_sector = r1_bio->sector +
Robert> -			mirror->rdev->data_offset;
Robert> -		read_bio->bi_bdev = mirror->rdev->bdev;
Robert> -		read_bio->bi_end_io = raid1_end_read_request;
Robert> -		bio_set_op_attrs(read_bio, op, do_sync);
Robert> -		read_bio->bi_private = r1_bio;
Robert> -
Robert> -		if (max_sectors < r1_bio->sectors) {
Robert> -			/* could not read all from this device, so we will
Robert> -			 * need another r1_bio.
Robert> -			 */
Robert> -
Robert> -			sectors_handled = (r1_bio->sector + max_sectors
Robert> -					   - bio->bi_iter.bi_sector);
Robert> -			r1_bio->sectors = max_sectors;
Robert> -			spin_lock_irq(&conf->device_lock);
Robert> -			if (bio->bi_phys_segments == 0)
Robert> -				bio->bi_phys_segments = 2;
Robert> -			else
Robert> -				bio->bi_phys_segments++;
Robert> -			spin_unlock_irq(&conf->device_lock);
Robert> -			/* Cannot call generic_make_request directly
Robert> -			 * as that will be queued in __make_request
Robert> -			 * and subsequent mempool_alloc might block waiting
Robert> -			 * for it.  So hand bio over to raid1d.
Robert> -			 */
Robert> -			reschedule_retry(r1_bio);
Robert> -
Robert> -			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> -
Robert> -			r1_bio->master_bio = bio;
Robert> -			r1_bio->sectors = bio_sectors(bio) - sectors_handled;
Robert> -			r1_bio->state = 0;
Robert> -			r1_bio->mddev = mddev;
Robert> -			r1_bio->sector = bio->bi_iter.bi_sector +
Robert> -				sectors_handled;
Robert> -			goto read_again;
Robert> -		} else
Robert> -			generic_make_request(read_bio);
Robert> -		return;
Robert> -	}
Robert> -
Robert> -	/*
Robert> -	 * WRITE:
Robert> -	 */
Robert>  	if (conf->pending_count >= max_queued_requests) {
Robert>  		md_wakeup_thread(mddev->thread);
Robert>  		wait_event(conf->wait_barrier,
Robert> @@ -1236,8 +1208,7 @@ read_again:
Robert>  			int bad_sectors;
Robert>  			int is_bad;
 
Robert> -			is_bad = is_badblock(rdev, r1_bio->sector,
Robert> -					     max_sectors,
Robert> +			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
Robert>  					     &first_bad, &bad_sectors);
Robert>  			if (is_bad < 0) {
Robert>  				/* mustn't write here until the bad block is
Robert> @@ -1325,7 +1296,8 @@ read_again:
Robert>  			continue;
 
Robert>  		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
Robert> -		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
Robert> +		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
Robert> +			 max_sectors);
 
Robert>  		if (first_clone) {
Robert>  			/* do behind I/O ?
Robert> @@ -1408,6 +1380,42 @@ read_again:
Robert>  	wake_up(&conf->wait_barrier);
Robert>  }
 
Robert> +static void raid1_make_request(struct mddev *mddev, struct bio *bio)
Robert> +{
Robert> +	struct r1conf *conf = mddev->private;
Robert> +	struct r1bio *r1_bio;
Robert> +	sector_t start_next_window = wait_barrier(conf, bio);
Robert> +
Robert> +	/*
Robert> +	 * make_request() can abort the operation when read-ahead is being
Robert> +	 * used and no empty request is available.
Robert> +	 *
Robert> +	 */
Robert> +	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
Robert> +
Robert> +	r1_bio->master_bio = bio;
Robert> +	r1_bio->sectors = bio_sectors(bio);
Robert> +	r1_bio->state = 0;
Robert> +	r1_bio->mddev = mddev;
Robert> +	r1_bio->sector = bio->bi_iter.bi_sector;
Robert> +
Robert> +	/* We might need to issue multiple reads to different
Robert> +	 * devices if there are bad blocks around, so we keep
Robert> +	 * track of the number of reads in bio->bi_phys_segments.
Robert> +	 * If this is 0, there is only one r1_bio and no locking
Robert> +	 * will be needed when requests complete.  If it is
Robert> +	 * non-zero, then it is the number of not-completed requests.
Robert> +	 */
Robert> +	bio->bi_phys_segments = 0;
Robert> +	bio_clear_flag(bio, BIO_SEG_VALID);
Robert> +
Robert> +	if (bio_data_dir(bio) == READ) {
Robert> +		raid1_read_request(mddev, bio, r1_bio);
Robert> +		return;
Robert> +	}
Robert> +	raid1_write_request(mddev, bio, r1_bio, start_next_window);
Robert> +}

Maybe this is pedantic, but why not have an 'else' instead of the
return to make it more clear?  If someone (like me :-) missed the
return on the quick read through, it's going to be some confusion.

if (bio_data_dir(bio) == READ)
   raid1_read_request(mddev, bui, r1_bio);
else
   raid1_write_request(mddev, bio, r1_bio, start_next_window);

seems cleaner.

Also, why are the calls different with the start_next_window parameter
added in?  Sorry for being dense, trying to understand the code
better...

John

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
  2016-12-01 16:04   ` John Stoffel
@ 2016-12-01 17:40     ` Robert LeBlanc
  2016-12-01 22:00       ` John Stoffel
  0 siblings, 1 reply; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-01 17:40 UTC (permalink / raw)
  To: John Stoffel; +Cc: linux-raid

On Thu, Dec 1, 2016 at 9:04 AM, John Stoffel <john@stoffel.org> wrote:
> Maybe this is pedantic, but why not have an 'else' instead of the
> return to make it more clear?  If someone (like me :-) missed the
> return on the quick read through, it's going to be some confusion.
>
> if (bio_data_dir(bio) == READ)
>    raid1_read_request(mddev, bui, r1_bio);
> else
>    raid1_write_request(mddev, bio, r1_bio, start_next_window);
>
> seems cleaner.

Yeah, I agree that is a bit cleaner, I was just following the previous
style. I'll change it to what you propose.

> Also, why are the calls different with the start_next_window parameter
> added in?  Sorry for being dense, trying to understand the code
> better...

start_next_windows is set by wait_barrier() and is only non-zero for
writes and the return value of zero is never used for reads. I tried
to move that code into the write branch, but I could not unload the
module due to conf->nr_pending == conf->queued+extra failing in
wait_event_lock_irq_cmd() in freeze_array(). Each read was
decrementing conf->nr_pending and since wait_barrier() was not being
called on reads, conf->nr_pending was very negative and would not
return to zero. In order to refactor that out completely would require
a lot more work and changes. I'd need to do more research into how
much the spin_locks are needed for reading when there is recovery
going on before I could start to try and refactor that. The other
option is to break wait_barrier() into two functions, one that gets
called for both read and write and the other for just the writes. I
can do the latter pretty easily if that is a desired direction.

----------------
Robert LeBlanc
PGP Fingerprint 79A2 9CA4 6CC4 45DD A904  C70E E654 3BB2 FA62 B9F1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
  2016-12-01 17:40     ` Robert LeBlanc
@ 2016-12-01 22:00       ` John Stoffel
  0 siblings, 0 replies; 11+ messages in thread
From: John Stoffel @ 2016-12-01 22:00 UTC (permalink / raw)
  To: Robert LeBlanc; +Cc: John Stoffel, linux-raid

>>>>> "Robert" == Robert LeBlanc <robert@leblancnet.us> writes:

Robert> On Thu, Dec 1, 2016 at 9:04 AM, John Stoffel <john@stoffel.org> wrote:
>> Maybe this is pedantic, but why not have an 'else' instead of the
>> return to make it more clear?  If someone (like me :-) missed the
>> return on the quick read through, it's going to be some confusion.
>> 
>> if (bio_data_dir(bio) == READ)
>> raid1_read_request(mddev, bui, r1_bio);
>> else
>> raid1_write_request(mddev, bio, r1_bio, start_next_window);
>> 
>> seems cleaner.

Robert> Yeah, I agree that is a bit cleaner, I was just following the
Robert> previous style. I'll change it to what you propose.

It's not a big deal, whatever makes you happy.  

>> Also, why are the calls different with the start_next_window parameter
>> added in?  Sorry for being dense, trying to understand the code
>> better...

Robert> start_next_windows is set by wait_barrier() and is only
Robert> non-zero for writes and the return value of zero is never used
Robert> for reads. I tried to move that code into the write branch,
Robert> but I could not unload the module due to conf->nr_pending ==
Robert> conf->queued+extra failing in wait_event_lock_irq_cmd() in
Robert> freeze_array(). Each read was decrementing conf->nr_pending
Robert> and since wait_barrier() was not being called on reads,
Robert> conf->nr_pending was very negative and would not return to
Robert> zero. In order to refactor that out completely would require a
Robert> lot more work and changes. I'd need to do more research into
Robert> how much the spin_locks are needed for reading when there is
Robert> recovery going on before I could start to try and refactor
Robert> that. The other option is to break wait_barrier() into two
Robert> functions, one that gets called for both read and write and
Robert> the other for just the writes. I can do the latter pretty
Robert> easily if that is a desired direction.

That second option might be the way to move forward.  But it sounds
like a bigger refactoring than is really needed now.

John

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] md/raid1: Refactor raid1_make_request
  2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
  2016-12-01 16:04   ` John Stoffel
@ 2016-12-02  0:36   ` NeilBrown
  1 sibling, 0 replies; 11+ messages in thread
From: NeilBrown @ 2016-12-02  0:36 UTC (permalink / raw)
  To: Robert LeBlanc, linux-raid

[-- Attachment #1: Type: text/plain, Size: 829 bytes --]

On Thu, Dec 01 2016, Robert LeBlanc wrote:

> Refactor raid1_make_request to make read and write code in their own
> functions to clean up the code.

There is a change in here that is more than just a refactoring.

You have moved the wait_barrier() call from after the checks on
->suspend_{lo,hi} to before.
If a write was sent to a suspended region, this will block any pending
resync until the suspended region is moved.
The previous code won't block resync in this case.

I cannot see that this would actually have any practical significance,
as I don't think the suspend_hi/lo settings are used at all for raid1.
I'm less certain about the ->area_resyncing() test, though that probably
isn't a problem.

But it should at least be noted in the change-log that there is
potentially a functional change here.

Thanks,
NeilBrown

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 0/2] Reorganize raid*_make_request to clean up code
  2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
  2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
  2016-11-30 21:20 ` [PATCH 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
@ 2016-12-02  3:30 ` Robert LeBlanc
  2016-12-02  3:30   ` [PATCH v2 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
  2016-12-02  3:30   ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
  2 siblings, 2 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-02  3:30 UTC (permalink / raw)
  To: linux-raid; +Cc: Robert LeBlanc

In response to Christoph, I've broken the read and writes into their own
functions to make the code even cleaner. Since it is such a big change, I broke
up the commits into this series instead of creating a v2 of the previous patch.

Changes since v1:
 
 John Stoffel
 * Changed to if/then instead of return in raid1_make_request

 Neil Brown
 * Moved wait_barrier into raid1_{read,write}_request so that it could be after
   ->suspend_{hi,lo}. This prevents a write blocking a resync until the suspend
   region is moved.

Robert LeBlanc (2):
  md/raid1: Refactor raid1_make_request
  md/raid10: Refactor raid10_make_request

 drivers/md/raid1.c  | 241 ++++++++++++++++++++++++----------------------
 drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
 2 files changed, 264 insertions(+), 248 deletions(-)

-- 
2.10.2


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] md/raid1: Refactor raid1_make_request
  2016-12-02  3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
@ 2016-12-02  3:30   ` Robert LeBlanc
  2016-12-02  3:30   ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
  1 sibling, 0 replies; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-02  3:30 UTC (permalink / raw)
  To: linux-raid; +Cc: Robert LeBlanc

Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
 drivers/md/raid1.c | 241 +++++++++++++++++++++++++++--------------------------
 1 file changed, 125 insertions(+), 116 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 21dc00e..0ea6541 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1032,17 +1032,97 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void raid1_make_request(struct mddev *mddev, struct bio * bio)
+static void raid1_read_request(struct mddev *mddev, struct bio *bio,
+				 struct r1bio *r1_bio)
 {
 	struct r1conf *conf = mddev->private;
 	struct raid1_info *mirror;
-	struct r1bio *r1_bio;
 	struct bio *read_bio;
+	struct bitmap *bitmap = mddev->bitmap;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	int sectors_handled;
+	int max_sectors;
+	int rdisk;
+
+	wait_barrier(conf, bio);
+
+read_again:
+	rdisk = read_balance(conf, r1_bio, &max_sectors);
+
+	if (rdisk < 0) {
+		/* couldn't find anywhere to read from */
+		raid_end_bio_io(r1_bio);
+		return;
+	}
+	mirror = conf->mirrors + rdisk;
+
+	if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+	    bitmap) {
+		/* Reading from a write-mostly device must
+		 * take care not to over-take any writes
+		 * that are 'behind'
+		 */
+		wait_event(bitmap->behind_wait,
+			   atomic_read(&bitmap->behind_writes) == 0);
+	}
+	r1_bio->read_disk = rdisk;
+	r1_bio->start_next_window = 0;
+
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
+
+	r1_bio->bios[rdisk] = read_bio;
+
+	read_bio->bi_iter.bi_sector = r1_bio->sector +
+		mirror->rdev->data_offset;
+	read_bio->bi_bdev = mirror->rdev->bdev;
+	read_bio->bi_end_io = raid1_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	read_bio->bi_private = r1_bio;
+
+	if (max_sectors < r1_bio->sectors) {
+		/* could not read all from this device, so we will
+		 * need another r1_bio.
+		 */
+
+		sectors_handled = (r1_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r1_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		/* Cannot call generic_make_request directly
+		 * as that will be queued in __make_request
+		 * and subsequent mempool_alloc might block waiting
+		 * for it.  So hand bio over to raid1d.
+		 */
+		reschedule_retry(r1_bio);
+
+		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+		r1_bio->master_bio = bio;
+		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r1_bio->state = 0;
+		r1_bio->mddev = mddev;
+		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+}
+
+static void raid1_write_request(struct mddev *mddev, struct bio *bio,
+				struct r1bio *r1_bio)
+{
+	struct r1conf *conf = mddev->private;
 	int i, disks;
-	struct bitmap *bitmap;
+	struct bitmap *bitmap = mddev->bitmap;
 	unsigned long flags;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_opf &
 						(REQ_PREFLUSH | REQ_FUA));
@@ -1062,12 +1142,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	if (bio_data_dir(bio) == WRITE &&
-	    ((bio_end_sector(bio) > mddev->suspend_lo &&
+	if ((bio_end_sector(bio) > mddev->suspend_lo &&
 	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
 	    (mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
-		     bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
+		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
 		/* As the suspend_* range is controlled by
 		 * userspace, we want an interruptible
 		 * wait.
@@ -1081,119 +1160,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
 			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
 			    (mddev_is_clustered(mddev) &&
 			     !md_cluster_ops->area_resyncing(mddev, WRITE,
-				     bio->bi_iter.bi_sector, bio_end_sector(bio))))
+				     bio->bi_iter.bi_sector,
+				     bio_end_sector(bio))))
 				break;
 			schedule();
 		}
 		finish_wait(&conf->wait_barrier, &w);
 	}
-
 	start_next_window = wait_barrier(conf, bio);
 
-	bitmap = mddev->bitmap;
-
-	/*
-	 * make_request() can abort the operation when read-ahead is being
-	 * used and no empty request is available.
-	 *
-	 */
-	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-	r1_bio->master_bio = bio;
-	r1_bio->sectors = bio_sectors(bio);
-	r1_bio->state = 0;
-	r1_bio->mddev = mddev;
-	r1_bio->sector = bio->bi_iter.bi_sector;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r1_bio and no locking
-	 * will be needed when requests complete.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		int rdisk;
-
-read_again:
-		rdisk = read_balance(conf, r1_bio, &max_sectors);
-
-		if (rdisk < 0) {
-			/* couldn't find anywhere to read from */
-			raid_end_bio_io(r1_bio);
-			return;
-		}
-		mirror = conf->mirrors + rdisk;
-
-		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
-		    bitmap) {
-			/* Reading from a write-mostly device must
-			 * take care not to over-take any writes
-			 * that are 'behind'
-			 */
-			wait_event(bitmap->behind_wait,
-				   atomic_read(&bitmap->behind_writes) == 0);
-		}
-		r1_bio->read_disk = rdisk;
-		r1_bio->start_next_window = 0;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r1_bio->bios[rdisk] = read_bio;
-
-		read_bio->bi_iter.bi_sector = r1_bio->sector +
-			mirror->rdev->data_offset;
-		read_bio->bi_bdev = mirror->rdev->bdev;
-		read_bio->bi_end_io = raid1_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		read_bio->bi_private = r1_bio;
-
-		if (max_sectors < r1_bio->sectors) {
-			/* could not read all from this device, so we will
-			 * need another r1_bio.
-			 */
-
-			sectors_handled = (r1_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r1_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __make_request
-			 * and subsequent mempool_alloc might block waiting
-			 * for it.  So hand bio over to raid1d.
-			 */
-			reschedule_retry(r1_bio);
-
-			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-			r1_bio->master_bio = bio;
-			r1_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r1_bio->state = 0;
-			r1_bio->mddev = mddev;
-			r1_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		wait_event(conf->wait_barrier,
@@ -1236,8 +1211,7 @@ read_again:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, r1_bio->sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* mustn't write here until the bad block is
@@ -1325,7 +1299,8 @@ read_again:
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
+		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
+			 max_sectors);
 
 		if (first_clone) {
 			/* do behind I/O ?
@@ -1408,6 +1383,40 @@ read_again:
 	wake_up(&conf->wait_barrier);
 }
 
+static void raid1_make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r1conf *conf = mddev->private;
+	struct r1bio *r1_bio;
+
+	/*
+	 * make_request() can abort the operation when read-ahead is being
+	 * used and no empty request is available.
+	 *
+	 */
+	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+	r1_bio->master_bio = bio;
+	r1_bio->sectors = bio_sectors(bio);
+	r1_bio->state = 0;
+	r1_bio->mddev = mddev;
+	r1_bio->sector = bio->bi_iter.bi_sector;
+
+	/* We might need to issue multiple reads to different
+	 * devices if there are bad blocks around, so we keep
+	 * track of the number of reads in bio->bi_phys_segments.
+	 * If this is 0, there is only one r1_bio and no locking
+	 * will be needed when requests complete.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ)
+		raid1_read_request(mddev, bio, r1_bio);
+	else
+		raid1_write_request(mddev, bio, r1_bio);
+}
+
 static void raid1_status(struct seq_file *seq, struct mddev *mddev)
 {
 	struct r1conf *conf = mddev->private;
-- 
2.10.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] md/raid10: Refactor raid10_make_request
  2016-12-02  3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
  2016-12-02  3:30   ` [PATCH v2 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
@ 2016-12-02  3:30   ` Robert LeBlanc
  2016-12-02 22:02     ` Shaohua Li
  1 sibling, 1 reply; 11+ messages in thread
From: Robert LeBlanc @ 2016-12-02  3:30 UTC (permalink / raw)
  To: linux-raid; +Cc: Robert LeBlanc

Refactor raid10_make_request into seperate read and write functions to
clean up the code.

Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
---
 drivers/md/raid10.c | 271 +++++++++++++++++++++++++++-------------------------
 1 file changed, 139 insertions(+), 132 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index be1a9fc..d3bd756 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1046,150 +1046,89 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+				struct r10bio *r10_bio)
 {
 	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
 	struct bio *read_bio;
-	int i;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
-	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
-	unsigned long flags;
-	struct md_rdev *blocked_rdev;
-	struct blk_plug_cb *cb;
-	struct raid10_plug_cb *plug = NULL;
 	int sectors_handled;
 	int max_sectors;
-	int sectors;
-
-	md_write_start(mddev, bio);
-
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-	wait_barrier(conf);
-
-	sectors = bio_sectors(bio);
-	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio->bi_iter.bi_sector < conf->reshape_progress &&
-	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/* IO spans the reshape position.  Need to wait for
-		 * reshape to pass
-		 */
-		allow_barrier(conf);
-		wait_event(conf->wait_barrier,
-			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
-			   conf->reshape_progress >= bio->bi_iter.bi_sector +
-			   sectors);
-		wait_barrier(conf);
-	}
-	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio_data_dir(bio) == WRITE &&
-	    (mddev->reshape_backwards
-	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
-		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
-	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
-		bio->bi_iter.bi_sector < conf->reshape_progress))) {
-		/* Need to update reshape_position in metadata */
-		mddev->reshape_position = conf->reshape_progress;
-		set_mask_bits(&mddev->flags, 0,
-			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait,
-			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+	struct md_rdev *rdev;
+	int slot;
 
-		conf->reshape_safe = mddev->reshape_position;
+read_again:
+	rdev = read_balance(conf, r10_bio, &max_sectors);
+	if (!rdev) {
+		raid_end_bio_io(r10_bio);
+		return;
 	}
+	slot = r10_bio->read_slot;
 
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-	r10_bio->master_bio = bio;
-	r10_bio->sectors = sectors;
+	read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+	bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
+		 max_sectors);
 
-	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_iter.bi_sector;
-	r10_bio->state = 0;
+	r10_bio->devs[slot].bio = read_bio;
+	r10_bio->devs[slot].rdev = rdev;
 
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r10_bio and no locking
-	 * will be needed when the request completes.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
+	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
+		choose_data_offset(r10_bio, rdev);
+	read_bio->bi_bdev = rdev->bdev;
+	read_bio->bi_end_io = raid10_end_read_request;
+	bio_set_op_attrs(read_bio, op, do_sync);
+	read_bio->bi_private = r10_bio;
 
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
+	if (max_sectors < r10_bio->sectors) {
+		/* Could not read all from this device, so we will
+		 * need another r10_bio.
 		 */
-		struct md_rdev *rdev;
-		int slot;
-
-read_again:
-		rdev = read_balance(conf, r10_bio, &max_sectors);
-		if (!rdev) {
-			raid_end_bio_io(r10_bio);
-			return;
-		}
-		slot = r10_bio->read_slot;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
-			 max_sectors);
-
-		r10_bio->devs[slot].bio = read_bio;
-		r10_bio->devs[slot].rdev = rdev;
-
-		read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
-			choose_data_offset(r10_bio, rdev);
-		read_bio->bi_bdev = rdev->bdev;
-		read_bio->bi_end_io = raid10_end_read_request;
-		bio_set_op_attrs(read_bio, op, do_sync);
-		read_bio->bi_private = r10_bio;
+		sectors_handled = (r10_bio->sector + max_sectors
+				   - bio->bi_iter.bi_sector);
+		r10_bio->sectors = max_sectors;
+		spin_lock_irq(&conf->device_lock);
+		if (bio->bi_phys_segments == 0)
+			bio->bi_phys_segments = 2;
+		else
+			bio->bi_phys_segments++;
+		spin_unlock_irq(&conf->device_lock);
+		/* Cannot call generic_make_request directly
+		 * as that will be queued in __generic_make_request
+		 * and subsequent mempool_alloc might block
+		 * waiting for it.  so hand bio over to raid10d.
+		 */
+		reschedule_retry(r10_bio);
 
-		if (max_sectors < r10_bio->sectors) {
-			/* Could not read all from this device, so we will
-			 * need another r10_bio.
-			 */
-			sectors_handled = (r10_bio->sector + max_sectors
-					   - bio->bi_iter.bi_sector);
-			r10_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __generic_make_request
-			 * and subsequent mempool_alloc might block
-			 * waiting for it.  so hand bio over to raid10d.
-			 */
-			reschedule_retry(r10_bio);
+		r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
-			r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+		r10_bio->master_bio = bio;
+		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
+		r10_bio->state = 0;
+		r10_bio->mddev = mddev;
+		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
+		goto read_again;
+	} else
+		generic_make_request(read_bio);
+	return;
+}
 
-			r10_bio->master_bio = bio;
-			r10_bio->sectors = bio_sectors(bio) - sectors_handled;
-			r10_bio->state = 0;
-			r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+				 struct r10bio *r10_bio)
+{
+	struct r10conf *conf = mddev->private;
+	int i;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
+	unsigned long flags;
+	struct md_rdev *blocked_rdev;
+	struct blk_plug_cb *cb;
+	struct raid10_plug_cb *plug = NULL;
+	int sectors_handled;
+	int max_sectors;
+	md_write_start(mddev, bio);
 
-	/*
-	 * WRITE:
-	 */
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		wait_event(conf->wait_barrier,
@@ -1249,8 +1188,7 @@ retry_write:
 			int bad_sectors;
 			int is_bad;
 
-			is_bad = is_badblock(rdev, dev_sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, dev_sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* Mustn't write here until the bad block
@@ -1353,8 +1291,7 @@ retry_write:
 			r10_bio->devs[i].bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
-					   choose_data_offset(r10_bio,
-							      rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1395,8 +1332,7 @@ retry_write:
 			r10_bio->devs[i].repl_bio = mbio;
 
 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
-					   choose_data_offset(
-						   r10_bio, rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1434,6 +1370,77 @@ retry_write:
 	one_write_done(r10_bio);
 }
 
+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r10conf *conf = mddev->private;
+	struct r10bio *r10_bio;
+	int sectors;
+
+	/*
+	 * Register the new request and wait if the reconstruction
+	 * thread has put up a bar for new requests.
+	 * Continue immediately if no resync is active currently.
+	 */
+	wait_barrier(conf);
+
+	sectors = bio_sectors(bio);
+	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+		/* IO spans the reshape position.  Need to wait for
+		 * reshape to pass
+		 */
+		allow_barrier(conf);
+		wait_event(conf->wait_barrier,
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
+		wait_barrier(conf);
+	}
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio_data_dir(bio) == WRITE &&
+	    (mddev->reshape_backwards
+	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+		bio->bi_iter.bi_sector < conf->reshape_progress))) {
+		/* Need to update reshape_position in metadata */
+		mddev->reshape_position = conf->reshape_progress;
+		set_mask_bits(&mddev->flags, 0,
+			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+		md_wakeup_thread(mddev->thread);
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
+
+		conf->reshape_safe = mddev->reshape_position;
+	}
+
+	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+	r10_bio->master_bio = bio;
+	r10_bio->sectors = sectors;
+
+	r10_bio->mddev = mddev;
+	r10_bio->sector = bio->bi_iter.bi_sector;
+	r10_bio->state = 0;
+
+	/* We might need to issue multiple reads to different
+	 * devices if there are bad blocks around, so we keep
+	 * track of the number of reads in bio->bi_phys_segments.
+	 * If this is 0, there is only one r10_bio and no locking
+	 * will be needed when the request completes.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ) {
+		raid10_read_request(mddev, bio, r10_bio);
+		return;
+	}
+	raid10_write_request(mddev, bio, r10_bio);
+}
+
 static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;
-- 
2.10.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] md/raid10: Refactor raid10_make_request
  2016-12-02  3:30   ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
@ 2016-12-02 22:02     ` Shaohua Li
  0 siblings, 0 replies; 11+ messages in thread
From: Shaohua Li @ 2016-12-02 22:02 UTC (permalink / raw)
  To: Robert LeBlanc; +Cc: linux-raid

On Thu, Dec 01, 2016 at 08:30:08PM -0700, Robert LeBlanc wrote:
> Refactor raid10_make_request into seperate read and write functions to
> clean up the code.
> 
> Signed-off-by: Robert LeBlanc <robert@leblancnet.us>
> ---

Hi,

could you please resend the patches against my for-next branch? The two patches
don't apply.

>  			int bad_sectors;
>  			int is_bad;
>  
> -			is_bad = is_badblock(rdev, dev_sector,
> -					     max_sectors,
> +			is_bad = is_badblock(rdev, dev_sector, max_sectors,
>  					     &first_bad, &bad_sectors);
>  			if (is_bad < 0) {
>  				/* Mustn't write here until the bad block
> @@ -1353,8 +1291,7 @@ retry_write:
>  			r10_bio->devs[i].bio = mbio;
>  
>  			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
> -					   choose_data_offset(r10_bio,
> -							      rdev));
> +					   choose_data_offset(r10_bio, rdev));
>  			mbio->bi_bdev = rdev->bdev;
>  			mbio->bi_end_io	= raid10_end_write_request;
>  			bio_set_op_attrs(mbio, op, do_sync | do_fua);
> @@ -1395,8 +1332,7 @@ retry_write:
>  			r10_bio->devs[i].repl_bio = mbio;
>  
>  			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
> -					   choose_data_offset(
> -						   r10_bio, rdev));
> +					   choose_data_offset(r10_bio, rdev));
>  			mbio->bi_bdev = rdev->bdev;
>  			mbio->bi_end_io	= raid10_end_write_request;
>  			bio_set_op_attrs(mbio, op, do_sync | do_fua);
> @@ -1434,6 +1370,77 @@ retry_write:
>  	one_write_done(r10_bio);
>  }
>  
> +static void __make_request(struct mddev *mddev, struct bio *bio)
> +{
> +	struct r10conf *conf = mddev->private;
> +	struct r10bio *r10_bio;
> +	int sectors;
> +

we do wait_barrier before md_write_start now. I'm not confortable with this.
Could you please add md_write_start here? A single line of code for write here
doesn't matter.

> +	/*
> +	 * Register the new request and wait if the reconstruction
> +	 * thread has put up a bar for new requests.
> +	 * Continue immediately if no resync is active currently.
> +	 */
> +	wait_barrier(conf);
> +
> +	sectors = bio_sectors(bio);
> +	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
> +	    bio->bi_iter.bi_sector < conf->reshape_progress &&
> +	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
> +		/* IO spans the reshape position.  Need to wait for
> +		 * reshape to pass
> +		 */
> +		allow_barrier(conf);
> +		wait_event(conf->wait_barrier,
> +			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
> +			   conf->reshape_progress >= bio->bi_iter.bi_sector +
> +			   sectors);
> +		wait_barrier(conf);
> +	}
> +	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
> +	    bio_data_dir(bio) == WRITE &&
> +	    (mddev->reshape_backwards
> +	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
> +		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
> +	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
> +		bio->bi_iter.bi_sector < conf->reshape_progress))) {
> +		/* Need to update reshape_position in metadata */
> +		mddev->reshape_position = conf->reshape_progress;
> +		set_mask_bits(&mddev->flags, 0,
> +			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
> +		md_wakeup_thread(mddev->thread);
> +		wait_event(mddev->sb_wait,
> +			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
> +
> +		conf->reshape_safe = mddev->reshape_position;
> +	}

this is write only and could be moved to write path.

> +
> +	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
> +
> +	r10_bio->master_bio = bio;
> +	r10_bio->sectors = sectors;
> +
> +	r10_bio->mddev = mddev;
> +	r10_bio->sector = bio->bi_iter.bi_sector;
> +	r10_bio->state = 0;
> +
> +	/* We might need to issue multiple reads to different
> +	 * devices if there are bad blocks around, so we keep
> +	 * track of the number of reads in bio->bi_phys_segments.
> +	 * If this is 0, there is only one r10_bio and no locking
> +	 * will be needed when the request completes.  If it is
> +	 * non-zero, then it is the number of not-completed requests.
> +	 */
> +	bio->bi_phys_segments = 0;
> +	bio_clear_flag(bio, BIO_SEG_VALID);
> +
> +	if (bio_data_dir(bio) == READ) {
> +		raid10_read_request(mddev, bio, r10_bio);
> +		return;
> +	}
Better do the same as raid1 here.

> +	raid10_write_request(mddev, bio, r10_bio);
> +}


Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-12-02 22:02 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-30 21:20 [PATCH 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-11-30 21:20 ` [PATCH 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-12-01 16:04   ` John Stoffel
2016-12-01 17:40     ` Robert LeBlanc
2016-12-01 22:00       ` John Stoffel
2016-12-02  0:36   ` NeilBrown
2016-11-30 21:20 ` [PATCH 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
2016-12-02  3:30 ` [PATCH v2 0/2] Reorganize raid*_make_request to clean up code Robert LeBlanc
2016-12-02  3:30   ` [PATCH v2 1/2] md/raid1: Refactor raid1_make_request Robert LeBlanc
2016-12-02  3:30   ` [PATCH v2 2/2] md/raid10: Refactor raid10_make_request Robert LeBlanc
2016-12-02 22:02     ` Shaohua Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.