All of lore.kernel.org
 help / color / mirror / Atom feed
* Subject:[PATCH 010:013]: raid0: reshape core code
@ 2009-06-16 21:58 raz ben yehuda
  0 siblings, 0 replies; only message in thread
From: raz ben yehuda @ 2009-06-16 21:58 UTC (permalink / raw)
  To: linux raid, Neil Brown

reshape core code. it includes:
. online reshape
. resume reshape
. reverse mapping ( from disk to raid ) is done by saving the raid0 offset in
  raid0_reshape_bio record allocated per bio.
. start_reshape is added to support resume reshape ( as a flag ).

Algorithm basics:
	. create a new temporary mapping.
	. raid0d start the reshape process.
	. in raid0_sync, i read a full zone strip, wait and write this strip 
		to its new raid position. once done, i update the superblocks.
	. reshape is complete when find_zone returns NULL.
	. raid0d calls spare_active to finish.
	. incoming ios redirected and never done in raid0_make_request context.
	. incoming ios are routed over a sliding window.
	. incoming ios have higher priority then reshape_ios.

 raid0.c |  685 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 675 insertions(+), 10 deletions(-)

Signed-off-by: razb <raziebe@gmail.com>
---
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 984d603..0b2c2e5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -23,6 +23,47 @@
 #include "md.h"
 #include "raid0.h"
 
+#define RAID0_RESHAPE_START	0x01
+#define RAID0_RESHAPE_END	0x02
+
+static int reshape_init(mddev_t *mddev);
+/*
+ *	raid0d is used for:
+ *	start raid0_sync, stop raid0_sync
+ */
+static void raid0d(mddev_t *mddev)
+{
+	raid0_conf_t *conf = mddev->private;
+	if (!conf->reshape)
+		return;
+	if (test_bit(RAID0_RESHAPE_START, &conf->reshape->flags)
+		|| test_bit(RAID0_RESHAPE_END, &conf->reshape->flags)) {
+		if (mddev->sync_thread)
+			conf->reshape->flags = 0;
+		md_check_recovery(mddev);
+	}
+}
+
+/*
+* Create a reshape thread for serving writes, retries, delayed ios
+*/
+static int start_raid0d(mddev_t *mddev)
+{
+	if (mddev->thread) {
+		md_wakeup_thread(mddev->thread);
+		return 0;
+	}
+	mddev->thread = md_register_thread(raid0d,
+					mddev, "%s_raid0d");
+	if (!mddev->thread) {
+		printk(KERN_ERR
+			"raid0: couldn't allocate thread for %s\n",
+			mdname(mddev));
+		return -1;
+	}
+	md_wakeup_thread(mddev->thread);
+	return 0;
+}
 static void raid0_unplug(struct request_queue *q)
 {
 	mddev_t *mddev = q->queuedata;
@@ -372,7 +413,18 @@ static int raid0_run(mddev_t *mddev)
 	if (!conf)
 		goto abort;
 	mddev->private = conf;
-
+	/*
+	 *  I am doing it only to eliminate the
+	 *  resync=PENDING in mdstats with sb ver= 1.
+	*/
+	if (mddev->recovery_cp == 0)
+		mddev->recovery_cp = MaxSector;
+	if (mddev->reshape_position != MaxSector) {
+		mddev->recovery_cp = mddev->reshape_position;
+		printk(KERN_INFO "raid0: %s detected reshape "
+			"recovery. ended at=%lld\n", mdname(mddev),
+			(unsigned long long)mddev->recovery_cp);
+	}
 	if (create_strip_zones(conf, &mddev->disks,
 			mddev->chunk_sectors,  mddev->raid_disks))
 		goto abort;
@@ -394,6 +446,12 @@ static int raid0_run(mddev_t *mddev)
 	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
 	print_conf(mddev->private, mddev->raid_disks, mdname(mddev));
 	list_splice(&new_disks, &mddev->disks);
+	mutex_init(&conf->reshape_lock);
+	if (mddev->reshape_position != MaxSector) {
+		if (reshape_init(mddev))
+			goto abort;
+		start_raid0d(mddev);
+	}
 	return 0;
 abort:
 	{
@@ -410,6 +468,20 @@ static int raid0_stop(mddev_t *mddev)
 {
 	raid0_conf_t *conf = mddev->private;
 
+	if (mddev->thread) {
+		md_unregister_thread(mddev->thread);
+		mddev->thread = 0;
+	}
+	if (conf->reshape) {
+		struct raid0_reshape *reshape = conf->reshape;
+		/* a reshape process is going on */
+		printk(KERN_INFO "raid0: %s, stopping while reshape\n",
+				mdname(mddev));
+		kfree(reshape->conf->strip_zone);
+		kfree(reshape->conf->devlist);
+		kfree(reshape->conf);
+		kfree(reshape);
+	}
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	kfree(conf->strip_zone);
 	kfree(conf->devlist);
@@ -502,7 +574,6 @@ static int make_request(struct request_queue *q,
 			unsigned int chunk_sectors,
 			struct bio *bio)
 {
-	mddev_t *mddev = q->queuedata;
 	sector_t sector_offset;
 	struct strip_zone *zone;
 	mdk_rdev_t *tmp_dev;
@@ -539,15 +610,11 @@ static int make_request(struct request_queue *q,
 	}
 
 	sector_offset = bio->bi_sector;
-	zone =  find_zone(mddev->private, &sector_offset);
+	zone =  find_zone(conf , &sector_offset);
 	if (!zone)
 		BUG();
-	tmp_dev = map_sector(mddev->private,
-				chunk_sectors,
-				raid_disks,
-				zone,
-				bio->bi_sector,
-				&sector_offset);
+	tmp_dev = map_sector(conf, chunk_sectors, raid_disks, zone,
+				bio->bi_sector,	&sector_offset);
 	bio->bi_bdev = tmp_dev->bdev;
 	bio->bi_sector = sector_offset + zone->dev_start +
 		tmp_dev->data_offset;
@@ -581,7 +648,17 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
-
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
+		unsigned long flags;
+		/*
+		* IO must moves to reshape context,
+		*/
+		struct raid0_reshape *reshape = mddev_to_reshape(mddev);
+		spin_lock_irqsave(&reshape->lock, flags);
+		bio_list_add(&reshape->incoming_ios, bio);
+		spin_unlock_irqrestore(&reshape->lock, flags);
+		return 0;
+	}
 	return make_request(q, mddev->private,
 				mddev->raid_disks,
 				mddev->chunk_sectors, bio);
@@ -620,6 +697,581 @@ static void raid0_status(struct seq_file *seq, mddev_t *mddev)
 }
 

+/*
+ * end read from source device. move io to write list.
+ * incase of an error just notify an error and leave
+*/
+static void reshape_read_endio(struct bio *bi, int error)
+{
+	int i;
+	struct raid0_reshape_bio *r = bi->bi_private;
+	struct raid0_reshape *reshape = r->reshape;
+
+	if (!error && test_bit(BIO_UPTODATE, &bi->bi_flags)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&reshape->lock, flags);
+		bio_list_add(&reshape->ios, bi);
+		spin_unlock_irqrestore(&reshape->lock, flags);
+		return;
+	}
+	printk(KERN_ERR "raid0: reshape read end io: io error sector=%llu\n",
+				(unsigned long long)bi->bi_sector);
+	for (i = 0; i < bi->bi_vcnt; i++)
+		safe_put_page(bi->bi_io_vec[i].bv_page);
+	bio_put(bi);
+	atomic_dec(&reshape->active_ios);
+}
+
+/*
+ *	reshape ending io. incase of an error just generate an
+*	error message and continue
+*/
+static void reshape_write_endio(struct bio *bi, int error)
+{
+	int i;
+	struct raid0_reshape_bio *r = bi->bi_private;
+	struct raid0_reshape *reshape = r->reshape;
+	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+
+	if (error || !uptodate) {
+		printk(KERN_ERR "raid0: reshape write endio:"
+				" io error sector=%llu\n",
+			(unsigned long long)bi->bi_sector);
+	}
+	for (i = 0; i < bi->bi_vcnt; i++)
+		safe_put_page(bi->bi_io_vec[i].bv_page);
+	bio_put(bi);
+	atomic_dec(&reshape->active_ios);
+	md_done_sync(reshape->mddev_src, r->bi_size>>9, 1);
+	kfree(r);
+}
+
+static sector_t real_to_virtual(struct bio *bi)
+{
+	struct raid0_reshape_bio *r = bi->bi_private;
+	return r->array_sector;
+}
+/*
+ * find the position of bio in the new raid
+ * generate the io
+*/
+static void process_reshape_writes(mddev_t *mddev, struct bio *bi)
+{
+	mdk_rdev_t *tmp_dev;
+	sector_t sector_offset;
+	struct strip_zone *zone;
+	struct raid0_reshape *reshape = mddev_to_reshape(mddev);
+	raid0_conf_t *conf_tgt  = reshape->conf;
+	/*
+	* re-assign the array's address
+	*/
+	bi->bi_sector = real_to_virtual(bi);
+	bi->bi_rw = WRITE;
+	bi->bi_idx = 0;
+	sector_offset = bi->bi_sector;
+	zone    =  find_zone(conf_tgt, &sector_offset);
+	if (!zone)
+		BUG();
+
+	tmp_dev = map_sector(reshape->conf,
+				mddev->chunk_sectors,
+				reshape->raid_disks,
+				zone, bi->bi_sector,
+				&sector_offset);
+
+	bi->bi_bdev   = tmp_dev->bdev;
+	bi->bi_sector = sector_offset + zone->dev_start +
+					tmp_dev->data_offset;
+	bi->bi_end_io  = reshape_write_endio;
+	bi->bi_size    = ((struct raid0_reshape_bio *)bi->bi_private)->bi_size;
+	generic_make_request(bi);
+}
+
+/*
+ * create the new raid ( the target ) mappings.
+ * This includes zones and disks.
+*/
+static int create_temp_target(mddev_t *mddev)
+{
+	int nraid_disks;
+	mdk_rdev_t *rdev = NULL;
+	raid0_conf_t *conf_src = mddev->private;
+	struct raid0_reshape *reshape = conf_src->reshape;
+
+	/*
+	* Enumerate each device with its new id
+	*
+	*/
+	nraid_disks = mddev->raid_disks;
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		if (!test_bit(In_sync, &rdev->flags)) {
+			if (rdev->raid_disk == -1
+				&& rdev->desc_nr == -1)
+					rdev->desc_nr = nraid_disks;
+			nraid_disks++;
+			rdev->raid_disk = rdev->desc_nr;
+			rdev->saved_raid_disk = rdev->raid_disk;
+		}
+	}
+	reshape->conf = kzalloc(sizeof(*reshape->conf), GFP_KERNEL);
+	if (!reshape->conf)
+		return -ENOMEM;
+	if (create_strip_zones(reshape->conf, &mddev->disks,
+				mddev->chunk_sectors, nraid_disks))
+		return -ENOMEM;
+	if (calc_zones(reshape->conf, &mddev->disks, nraid_disks)) {
+		kfree(reshape->conf->strip_zone);
+		kfree(reshape->conf->devlist);
+		kfree(reshape->conf);
+		return -EINVAL;
+	}
+	/*
+	* recalc the queues dimensions to fix the transfer size is need.
+	*/
+	reshape->raid_disks = nraid_disks;
+	mddev->delta_disks = nraid_disks - mddev->raid_disks;
+	set_queues(&mddev->disks, mddev->queue);
+	print_conf(reshape->conf, reshape->raid_disks, "new mappings");
+	return 0;
+}
+
+/*
+ *  Process all incoming ios.
+ *
+ *  a reshape window is :READ head + size of the zone stripe.
+ *   --------------[READ **************] ---------------
+ *     area A         reshape window            area B
+ *
+ *   area B: IO will processed from original mappings in reshape context.
+ *   area A: IO will processed from new mappings from this context.
+ *   Reshape Window: wait and process same as area A but from reshape context.
+*/
+static void process_incomings(mddev_t *mddev, int *go_faster)
+{
+	struct bio_list resched_bios;
+	struct raid0_reshape *reshape = mddev_to_reshape(mddev);
+	struct bio *bi;
+	unsigned long flags;
+
+	/*
+	* We do not work on the online list, as it grows all the time.
+	* so we copy the online list to a temporary
+	* list, and process it in a lockless manner.
+	*/
+	bio_list_init(&resched_bios);
+	spin_lock_irqsave(&reshape->lock, flags);
+	bio_list_merge(&resched_bios, &reshape->incoming_ios);
+	bio_list_init(&reshape->incoming_ios);
+	spin_unlock_irqrestore(&reshape->lock, flags);
+
+	while (!bio_list_empty(&resched_bios)) {
+		/*
+		 *  IO is in area A .submit it on new raid mappings.
+		 *  if make_request == 1 then IO should be transfered, else
+		 *  it was splitted and moved to incoming ios list.
+		 *  very much the same for area B.
+		*/
+		bi = bio_list_pop(&resched_bios);
+		if ((bi->bi_sector + bio_sectors(bi)) < mddev->recovery_cp) {
+			if (make_request(mddev->gendisk->queue,
+				reshape->conf,
+				reshape->raid_disks,
+				mddev->chunk_sectors,
+				bi) == 1)
+				generic_make_request(bi);
+			continue;
+		}
+		if ((bi->bi_sector >
+			(mddev->recovery_cp + reshape->window))) {
+			*go_faster = 0;
+			/*
+			*  IO is in area B .submit it on old raid mappings.
+			*/
+			if (make_request(mddev->gendisk->queue,
+				mddev->private,
+				mddev->raid_disks,
+				mddev->chunk_sectors,
+				bi) == 1)
+				generic_make_request(bi);
+			continue;
+		}
+		/* IO is still in reshape window , reschedule */
+		spin_lock_irqsave(&reshape->lock, flags);
+		bio_list_add(&reshape->incoming_ios, bi);
+		spin_unlock_irqrestore(&reshape->lock, flags);
+	}
+}
+
+/*
+ * Determine the amount of bios and their sizes that cover a single
+ * chunk.
+ * A chunk may be 1024bytes or 2^30bytes. Reshape works by reading an entire
+ * chunk from the old raid and writing to the new raid.
+ * There are three factors that determine the bio size,
+ * 1. the transfer size ( both the tubale max_sector
+ *    and the hardware constraint max_hw_sector)
+ * 2. predefined maximum bio size
+ * 3. chunk size
+ * We take the minimum of the three.
+ * we caluclate how many bios (nr_bios)are needed to cover a single chunk,
+ * in the case when chunk size is not a modulo 0 of the bio_size
+ * the last bio size is smaller (last_io_size)
+ * this procedure can never fail.
+*/
+static void reshape_iosize(mddev_t *mddev)
+{
+	int bio_max_size = BIO_MAX_PAGES*PAGE_SIZE;
+	raid0_conf_t *conf = mddev->private;
+	struct raid0_reshape *reshape = conf->reshape;
+	int chunk_size = mddev->chunk_sectors<<9;
+
+	reshape->nr_bios = 0;
+	reshape->last_bio_size = 0;
+	reshape->bio_size =  0;
+	reshape->bio_size =  min(chunk_size, bio_max_size);
+	reshape->bio_size =  min((int)queue_max_hw_sectors(mddev->queue)<<9,
+				reshape->bio_size);
+	reshape->bio_size =  min((int)queue_max_sectors(mddev->queue)<<9,
+				reshape->bio_size);
+
+	if ((mddev->chunk_sectors<<9) > reshape->bio_size) {
+		reshape->nr_bios = chunk_size/reshape->bio_size;
+		reshape->last_bio_size = chunk_size -
+			(reshape->nr_bios * reshape->bio_size);
+		if (reshape->last_bio_size)
+			reshape->nr_bios++;
+	} else{
+		reshape->nr_bios = 1;
+	}
+	printk(KERN_INFO "raid0: using reshape transfer"
+			" size of %ubytes.. \nraid0: tailed with %ubytes,"
+			" covered with %d bios\n",
+			reshape->bio_size,
+			reshape->last_bio_size,
+			reshape->nr_bios);
+}
+
+/*
+ * 1. Calculate size of io in pages.
+ * 2. Create a new raid0 as the temporary target.
+*/
+static int reshape_init(mddev_t *mddev)
+{
+	raid0_conf_t *conf = mddev->private;
+	conf->reshape = kzalloc(sizeof(*conf->reshape), GFP_NOIO);
+	if (!conf->reshape) {
+		printk(KERN_INFO "%s: failed to allocate"
+			" memory for reshape\n",
+			mdname(mddev));
+		return -1;
+	}
+	reshape_iosize(mddev);
+	conf->reshape->mddev_src = mddev;
+	printk(KERN_INFO "raid0: %s reshape, create a temporary mappings\n",
+			mdname(mddev));
+	if (create_temp_target(mddev)) {
+		printk(KERN_INFO "raid0: failed to"
+			" setup temporary mappings\n");
+		return -1;
+	}
+	mddev->resync_max_sectors = mddev->array_sectors;
+	mddev->resync_max = mddev->array_sectors;
+	spin_lock_init(&conf->reshape->lock);
+	bio_list_init(&conf->reshape->ios);
+	bio_list_init(&conf->reshape->incoming_ios);
+	atomic_set(&conf->reshape->active_ios, 0);
+	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+	set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+	conf->reshape->flags = 0 ;
+	set_bit(RAID0_RESHAPE_START, &conf->reshape->flags);
+	return 0;
+}
+
+struct bio *reshape_get_bio(struct raid0_reshape *reshape, int vcnt,
+				int bio_size)
+{
+	int i;
+	struct bio *bi = bio_alloc(GFP_NOIO, vcnt);
+	if (!bi) {
+		printk(KERN_ERR "raid0: failed too alloc bio for"
+			" reshaping.rejecting vcnt=%d\n", vcnt);
+		return NULL;
+	}
+	bi->bi_rw = READ;
+	bi->bi_size = 0;
+	bi->bi_vcnt = 0;
+	for (i = 0; i < vcnt; i++) {
+		bi->bi_io_vec[i].bv_len    = PAGE_SIZE;
+		if (bio_size < PAGE_SIZE)
+			bi->bi_io_vec[i].bv_len = bio_size;
+		bio_size -= bi->bi_io_vec[i].bv_len;
+		bi->bi_io_vec[i].bv_offset = 0;
+		bi->bi_io_vec[i].bv_page   = alloc_page(GFP_NOIO);
+		if (!bi->bi_io_vec[i].bv_page)
+			break;
+		bi->bi_vcnt++;
+		bi->bi_size += bi->bi_io_vec[i].bv_len;
+	}
+	bi->bi_next    		   = NULL;
+	bi->bi_end_io 		   = reshape_read_endio;
+	bi->bi_private  	   = kmalloc(sizeof(struct raid0_reshape_bio),
+						GFP_NOIO);
+	bi->bi_idx  		   = 0;
+	return bi;
+}
+
+static inline int is_last_bio_in_chunk(struct raid0_reshape *reshape, int idx)
+{
+	return  idx == (reshape->nr_bios - 1) && reshape->last_bio_size;
+}
+
+static void set_reshape_handle(sector_t sector,
+				struct bio *bi,
+				struct raid0_reshape *reshape)
+{
+	struct raid0_reshape_bio *r = bi->bi_private;
+	r->reshape = reshape;
+	r->bi_size = bi->bi_size;
+	r->array_sector = sector;
+}
+
+/*
+ * process all returning reads and process into the new raid.
+*/
+void do_reshape_writes(mddev_t *mddev)
+{
+	struct raid0_reshape *reshape = mddev_to_reshape(mddev);
+	/*
+	* process all reshape writes
+	*/
+	while (!bio_list_empty(&reshape->ios)) {
+		struct bio *bi;
+		unsigned long flags;
+
+		spin_lock_irqsave(&reshape->lock, flags);
+		bi = bio_list_pop(&reshape->ios);
+		spin_unlock_irqrestore(&reshape->lock, flags);
+		process_reshape_writes(mddev, bi);
+	}
+}
+
+/*
+ * 1.	allocate a read bio by the size of a chunk
+ * 2.   map bio to target device, process the next chunk in the stripe
+ * 3.   generate read ios
+ * 4.   wait for reads
+ * 5.   process incoming ios while waiting
+ * 6.   in return to a read we trasnmit a write
+ * 7.   wait for writes to complete
+ * 8.   a whole stripe is done, sync super blocks.
+*/
+sector_t raid0_sync(mddev_t *mddev, sector_t sector, int *skipped,
+			 int go_faster)
+{
+	struct bio *bi;
+	struct strip_zone *zone;
+	sector_t sector_offset;
+	mdk_rdev_t *tmp_dev;
+	int i = 0, chunk, chunks;
+	sector_t sectors = 0;
+	raid0_conf_t *conf = mddev->private;
+	struct raid0_reshape *reshape = mddev_to_reshape(mddev);
+
+	process_incomings(mddev, &go_faster);
+	if (!go_faster)
+		msleep(100);
+
+	/*
+	 * each zone has its own width, take it here.
+	*/
+	sector_offset = sector;
+	zone = find_zone(mddev->private, &sector_offset);
+	if (!zone) {
+		mdk_rdev_t *rdev;
+		struct strip_zone *z =
+			&conf->strip_zone[conf->nr_strip_zones-1];
+		int last_stripe = (mddev->chunk_sectors)*z->nb_dev;
+		/*
+		* md tells me this is the last sync, did we finish ?
+		*/
+		if ((mddev->recovery_cp + last_stripe) ==
+					mddev->array_sectors) {
+			mddev->recovery_cp = MaxSector;
+			printk(KERN_INFO "raid0: %s, reshape "
+					 "ended succefully at %lld\n",
+				mdname(mddev),
+				(unsigned long long)sector);
+		} else{
+			printk(KERN_INFO "raid0: %s, reshape was "
+					"interrupted at %lld\n",
+				mdname(mddev),
+				(unsigned long long)mddev->curr_resync);
+		}
+		/*
+		* either case, set all disk to sync
+		*/
+		list_for_each_entry(rdev, &mddev->disks, same_set)
+			set_bit(In_sync, &rdev->flags);
+		set_bit(RAID0_RESHAPE_END, &reshape->flags);
+		return 0;
+	}
+	chunks = zone->nb_dev;
+	if ((sector + chunks*(mddev->chunk_sectors)) >
+					mddev->array_sectors) {
+		printk(KERN_ERR "raid0: %s insane , %lld aborting reshape\n",
+			mdname(mddev),
+			(unsigned long long)sector);
+		return 0;
+	}
+	mutex_lock(&conf->reshape_lock);
+	/*
+	* mark currrent position. this way we have a window defined.
+	*/
+	mddev->recovery_cp = sector;
+	/*
+	* generate a serie of reads over the current stripe
+	*/
+	for (chunk = 0 ; chunk < chunks; chunk++) {
+		int bio_size = reshape->bio_size;
+		int vcnt     = (bio_size + PAGE_SIZE - 1)/PAGE_SIZE;
+		for (i = 0; i < reshape->nr_bios; i++) {
+			sector_offset = sector;
+			if (is_last_bio_in_chunk(reshape, i)) {
+				bio_size = reshape->last_bio_size;
+				vcnt = (bio_size + PAGE_SIZE - 1)/PAGE_SIZE;
+			}
+			bi = reshape_get_bio(reshape, vcnt, bio_size);
+			if (!bi) {
+				mutex_unlock(&conf->reshape_lock);
+				return sectors;
+			}
+			set_reshape_handle(sector, bi, reshape);
+			/*
+			 * map the bio
+			*/
+			zone =  find_zone(mddev->private, &sector_offset);
+			if (!zone)
+				BUG();
+
+			tmp_dev = map_sector(mddev->private,
+						mddev->chunk_sectors,
+						mddev->raid_disks,
+						zone, sector,
+						&sector_offset);
+			bi->bi_bdev = tmp_dev->bdev;
+			bi->bi_sector = sector_offset + zone->dev_start +
+							tmp_dev->data_offset;
+			atomic_inc(&reshape->active_ios);
+			generic_make_request(bi);
+			sectors += (bi->bi_size>>9);
+			sector  += (bi->bi_size>>9);
+		}
+	}
+	mutex_unlock(&conf->reshape_lock);
+	/* save last window size */
+	reshape->window = sectors;
+	/*
+	 * now wait on ios.
+	*/
+	do {
+		msleep(10);
+		process_incomings(mddev, &go_faster);
+		do_reshape_writes(mddev);
+	} while (atomic_read(&reshape->active_ios) > 0);
+
+	list_for_each_entry(tmp_dev, &mddev->disks, same_set) {
+		tmp_dev->sb_loaded = 1;
+		set_bit(MD_CHANGE_DEVS, &mddev->flags);
+	}
+	/* for resume reshape */
+	mddev->reshape_position = sector + sectors;
+	/* will update the super blocks */
+	md_check_recovery(mddev);
+	return sectors;
+}
+
+/*
+ * 1. Raise a device barrier and wait until all IO stops.
+ * 2. Create a temporary mddev mappings that include the new disks.
+ * 3. Set the resync flag and wake thread
+ */
+static int raid0_check_reshape(mddev_t *mddev)
+{
+	int err;
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+		return 0;
+	/* Cannot change chunk_size, layout, or level */
+	if (mddev->chunk_sectors  != mddev->new_chunk_sectors ||
+	    mddev->layout != mddev->new_layout ||
+	    mddev->level != mddev->new_level) {
+		mddev->new_chunk_sectors = mddev->chunk_sectors;
+		mddev->new_layout = mddev->layout;
+		mddev->new_level = mddev->level;
+		return -EINVAL;
+	}
+	err = md_allow_write(mddev);
+	if (err)
+		return err;
+	if (reshape_init(mddev)) {
+		printk(KERN_ERR "raid0: failed to start reshape\n");
+		return -1;
+	}
+	mddev->recovery_cp = 0;
+	return start_raid0d(mddev);
+}
+
+/*
+ * Find all not synce'd disks within the raid0's configuration
+ * and mark then sync. Then recompute the stripes zones and fix
+ * number of disks
+ */
+static int raid0_spare_active(mddev_t *mddev)
+{
+	int go_faster;
+	struct raid0_reshape *reshape = mddev_to_reshape(mddev);
+	raid0_conf_t *conf = mddev->private;
+
+	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+	/*
+	* we might be having incoming ios not processed yet,
+	* we drain them here. All ios must be released, else
+	* we have an error
+	*/
+	process_incomings(mddev, &go_faster);
+	mutex_lock(&conf->reshape_lock);
+	conf->reshape = NULL;
+	mutex_unlock(&conf->reshape_lock);
+	blk_plug_device_unlocked(mddev->queue);
+	/*
+	* recompute the raid's conf.
+	*/
+	mddev->raid_disks = reshape->raid_disks;
+	mddev->in_sync = 1;
+	mddev->delta_disks = 0;
+	mddev->recovery_cp = MaxSector;
+	mddev->reshape_position  = MaxSector;
+	raid0_run(mddev);
+	/* report media change */
+	set_capacity(mddev->gendisk, mddev->array_sectors);
+	mddev->changed = 1;
+	blk_unplug(mddev->queue);
+	md_allow_write(mddev);
+	/*
+	* now free unused memory
+	*/
+	kfree(reshape->conf->strip_zone);
+	kfree(reshape->conf->devlist);
+	kfree(reshape->conf);
+	kfree(reshape);
+	kfree(conf->strip_zone);
+	kfree(conf->devlist);
+	kfree(conf);
+	return 0;
+}
+
+
+
 static int raid0_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 {
 	char b[BDEVNAME_SIZE];
@@ -682,6 +1334,15 @@ static int raid0_remove_disk(mddev_t *mddev, int number)
 	return 0;
 }
 
+/*
+ * for the sake of resume reshape only. reshape is invoked
+ * automatically by raid0_run.
+*/
+int raid0_start_reshape(mddev_t *mddev)
+{
+	return -1;
+}
+
 static struct mdk_personality raid0_personality=
 {
 	.name		= "raid0",
@@ -694,6 +1355,10 @@ static struct mdk_personality raid0_personality=
 	.size		= raid0_size,
 	.hot_add_disk	= raid0_add_disk,
 	.hot_remove_disk = raid0_remove_disk,
+	.check_reshape	= raid0_check_reshape,
+	.spare_active	= raid0_spare_active,
+	.sync_request   = raid0_sync,
+	.start_reshape = raid0_start_reshape,
 };
 
 static int __init raid0_init (void)




^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2009-06-16 21:58 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-16 21:58 Subject:[PATCH 010:013]: raid0: reshape core code raz ben yehuda

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.