All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] md: Add support for Raid0->Raid10 takeover
@ 2010-01-29 14:54 Trela, Maciej
  2010-02-01  0:08 ` Neil Brown
  0 siblings, 1 reply; 7+ messages in thread
From: Trela, Maciej @ 2010-01-29 14:54 UTC (permalink / raw)
  To: linux-raid, NeilBrown; +Cc: Williams, Dan J, Ciechanowski, Ed


Signed-off-by: Maciej Trela <maciej.trela@intel.com>
---
 drivers/md/raid10.c |   96 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d119b7b..5b7cda6 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2177,13 +2177,20 @@ static int run(mddev_t *mddev)
 	 * bookkeeping area. [whatever we allocate in run(),
 	 * should be freed in stop()]
 	 */
-	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
-	mddev->private = conf;
-	if (!conf) {
-		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
-			mdname(mddev));
-		goto out;
-	}
+
+	if (mddev->private == NULL)
+	{
+		conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
+		mddev->private = conf;
+		if (!conf) {
+			printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
+				   mdname(mddev));
+			goto out;
+		}
+	} else 
+	    conf = mddev->private;
+	
+	
 	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
 				 GFP_KERNEL);
 	if (!conf->mirrors) {
@@ -2377,6 +2384,80 @@ static void raid10_quiesce(mddev_t *mddev, int state)
 	}
 }
 
+static conf_t *setup_conf(mddev_t *mddev)
+{
+	conf_t *conf;
+	
+	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
+	if (conf == NULL)
+		goto abort;
+	
+	return conf;
+	
+ abort:
+	if (conf) {
+		kfree(conf);
+		return ERR_PTR(-EIO);
+	} else
+		return ERR_PTR(-ENOMEM);
+}
+
+static void *raid10_takeover_raid0(mddev_t *mddev)
+{
+	mdk_rdev_t *rdev;
+
+	if (mddev->degraded > 0)
+	{
+	    printk(KERN_ERR "error: degraded raid0!\n");
+		return ERR_PTR(-EINVAL);
+	}
+	
+	/* Update slot numbers to obtain 
+	 * degraded raid10 with missing mirrors
+	 */
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		rdev->raid_disk *= 2;
+	}
+	
+	/* Set new parameters */
+	mddev->new_level = 10;
+	/* new layout: far_copies = 1, 
+       near_copies = raid0->raid_disks */
+	mddev->new_layout = (1<<8) + mddev->raid_disks;
+	mddev->delta_disks = mddev->raid_disks;
+	mddev->degraded = mddev->raid_disks;
+	mddev->raid_disks *= 2;
+	/* make sure it will be not marked as dirty */
+	mddev->recovery_cp = MaxSector;
+
+	return setup_conf(mddev);
+}
+
+static void *raid10_takeover(mddev_t *mddev)
+{
+	mdk_rdev_t *rdev;
+	sector_t sectors, dev_sectors;
+
+	/* raid10 can take over:
+	 *  raid0 - providing it has only two drives
+	 */
+	if (mddev->level == 0) {
+		/* make sure all devices are the same (only one zone is supported) */
+		sectors = mddev->dev_sectors;
+		sector_div(sectors, mddev->chunk_sectors);
+		list_for_each_entry(rdev, &mddev->disks, same_set) {
+			dev_sectors = rdev->sectors;
+			sector_div(dev_sectors, mddev->chunk_sectors);
+			if (dev_sectors != sectors) {
+				printk("error: cannot takeover raid 0 with different dev sizes.\n");
+				return ERR_PTR(-EINVAL);
+			}
+		}
+	    return raid10_takeover_raid0(mddev);
+	}	
+	return ERR_PTR(-EINVAL);
+}
+
 static struct mdk_personality raid10_personality =
 {
 	.name		= "raid10",
@@ -2393,6 +2474,7 @@ static struct mdk_personality raid10_personality =
 	.sync_request	= sync_request,
 	.quiesce	= raid10_quiesce,
 	.size		= raid10_size,
+	.takeover	= raid10_takeover,
 };
 
 static int __init raid_init(void)
-- 
1.6.3.3




^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] md: Add support for Raid0->Raid10 takeover
  2010-01-29 14:54 [PATCH 3/3] md: Add support for Raid0->Raid10 takeover Trela, Maciej
@ 2010-02-01  0:08 ` Neil Brown
  2010-02-03 12:16   ` Trela, Maciej
  2010-02-03 12:29   ` [PATCH 4/4] md: Enable takeover for external metadata Trela, Maciej
  0 siblings, 2 replies; 7+ messages in thread
From: Neil Brown @ 2010-02-01  0:08 UTC (permalink / raw)
  To: Trela, Maciej; +Cc: linux-raid, Williams, Dan J, Ciechanowski, Ed

On Fri, 29 Jan 2010 14:54:21 +0000
"Trela, Maciej" <Maciej.Trela@intel.com> wrote:


> +static void *raid10_takeover_raid0(mddev_t *mddev)
> +{
> +	mdk_rdev_t *rdev;
> +
> +	if (mddev->degraded > 0)
> +	{
> +	    printk(KERN_ERR "error: degraded raid0!\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +	
> +	/* Update slot numbers to obtain 
> +	 * degraded raid10 with missing mirrors
> +	 */
> +	list_for_each_entry(rdev, &mddev->disks, same_set) {
> +		rdev->raid_disk *= 2;
> +	}
> +	
> +	/* Set new parameters */
> +	mddev->new_level = 10;
> +	/* new layout: far_copies = 1, 
> +       near_copies = raid0->raid_disks */
> +	mddev->new_layout = (1<<8) + mddev->raid_disks;
> +	mddev->delta_disks = mddev->raid_disks;
> +	mddev->degraded = mddev->raid_disks;
> +	mddev->raid_disks *= 2;

You are confusing raid_disk with near_copies.  They are very different things.

Presumably you want to make 'near_copies' always be 2.

NeilBrown


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 3/3] md: Add support for Raid0->Raid10 takeover
  2010-02-01  0:08 ` Neil Brown
@ 2010-02-03 12:16   ` Trela, Maciej
  2010-02-03 12:29   ` [PATCH 4/4] md: Enable takeover for external metadata Trela, Maciej
  1 sibling, 0 replies; 7+ messages in thread
From: Trela, Maciej @ 2010-02-03 12:16 UTC (permalink / raw)
  To: Neil Brown; +Cc: linux-raid, Williams, Dan J, Ciechanowski, Ed

> 
> You are confusing raid_disk with near_copies.  They are very different
> things.
> 
> Presumably you want to make 'near_copies' always be 2.
> 

Fixed.
Also I've changed the check for multiply zones as in raid5 patch.

Maciek Trela.


Signed-off-by: Maciej Trela <maciej.trela@intel.com>
---
 drivers/md/raid10.c |   91 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d119b7b..9e2068c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -23,6 +23,7 @@
 #include <linux/seq_file.h>
 #include "md.h"
 #include "raid10.h"
+#include "raid0.h"
 #include "bitmap.h"
 
 /*
@@ -2177,13 +2178,20 @@ static int run(mddev_t *mddev)
 	 * bookkeeping area. [whatever we allocate in run(),
 	 * should be freed in stop()]
 	 */
-	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
-	mddev->private = conf;
-	if (!conf) {
-		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
-			mdname(mddev));
-		goto out;
-	}
+
+	if (mddev->private == NULL)
+	{
+		conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
+		mddev->private = conf;
+		if (!conf) {
+			printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
+				   mdname(mddev));
+			goto out;
+		}
+	} else 
+	    conf = mddev->private;
+	
+	
 	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
 				 GFP_KERNEL);
 	if (!conf->mirrors) {
@@ -2377,6 +2385,74 @@ static void raid10_quiesce(mddev_t *mddev, int state)
 	}
 }
 
+static conf_t *setup_conf(mddev_t *mddev)
+{
+	conf_t *conf;
+	
+	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
+	if (conf == NULL)
+		goto abort;
+	
+	return conf;
+	
+ abort:
+	if (conf) {
+		kfree(conf);
+		return ERR_PTR(-EIO);
+	} else
+		return ERR_PTR(-ENOMEM);
+}
+
+static void *raid10_takeover_raid0(mddev_t *mddev)
+{
+	mdk_rdev_t *rdev;
+
+	if (mddev->degraded > 0)
+	{
+	    printk(KERN_ERR "error: degraded raid0!\n");
+		return ERR_PTR(-EINVAL);
+	}
+	
+	/* Update slot numbers to obtain 
+	 * degraded raid10 with missing mirrors
+	 */
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		rdev->raid_disk *= 2;
+	}
+	
+	/* Set new parameters */
+	mddev->new_level = 10;
+	/* new layout: far_copies = 1, near_copies = 2 */
+	mddev->new_layout = (1<<8) + 2;
+	mddev->new_chunk_sectors = mddev->chunk_sectors;
+	mddev->delta_disks = mddev->raid_disks;
+	mddev->degraded = mddev->raid_disks;
+	mddev->raid_disks *= 2;
+	/* make sure it will be not marked as dirty */
+	mddev->recovery_cp = MaxSector;
+
+	return setup_conf(mddev);
+}
+
+static void *raid10_takeover(mddev_t *mddev)
+{
+	struct raid0_private_data *raid0_priv;
+
+	/* raid10 can take over:
+	 *  raid0 - providing it has only two drives
+	 */
+	if (mddev->level == 0) {
+		/* for raid0 takeover only one zone is supported */
+		raid0_priv = (struct raid0_private_data*)(mddev->private);
+		if (raid0_priv->nr_strip_zones > 1) {
+				printk("error: cannot takeover raid 0 with more than one zone.\n");
+				return ERR_PTR(-EINVAL);
+		}
+	    return raid10_takeover_raid0(mddev);
+	}	
+	return ERR_PTR(-EINVAL);
+}
+
 static struct mdk_personality raid10_personality =
 {
 	.name		= "raid10",
@@ -2393,6 +2469,7 @@ static struct mdk_personality raid10_personality =
 	.sync_request	= sync_request,
 	.quiesce	= raid10_quiesce,
 	.size		= raid10_size,
+	.takeover	= raid10_takeover,
 };
 
 static int __init raid_init(void)
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] md: Enable takeover for external metadata
  2010-02-01  0:08 ` Neil Brown
  2010-02-03 12:16   ` Trela, Maciej
@ 2010-02-03 12:29   ` Trela, Maciej
  2010-02-10  5:57     ` Neil Brown
  1 sibling, 1 reply; 7+ messages in thread
From: Trela, Maciej @ 2010-02-03 12:29 UTC (permalink / raw)
  To: Neil Brown; +Cc: linux-raid, Williams, Dan J, Ciechanowski, Ed


This is one more patch for md takeover that was accidentally skipped while sending...
Please, apply it together with the rest of md takeover patches.

Now md will notify mdmon about the level change.
Also sysfs redundancy_group specific to non-raid0 is now handled when switching personalities.

Regards,
Maciek Trela.


Signed-off-by: Maciej Trela <maciej.trela@intel.com>
---
 drivers/md/md.c |   39 ++++++++++++++++++++++++++++++++++++++-
 1 files changed, 38 insertions(+), 1 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index dd3dfe4..ea1aeb4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2358,6 +2358,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 			return err;
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		sysfs_remove_link(&rdev->mddev->kobj, nm);
+
+		/* indicate disk removal */
+		rdev->raid_disk = -1;
+
 		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
 		md_wakeup_thread(rdev->mddev->thread);
 	} else if (rdev->mddev->pers) {
@@ -2887,6 +2891,9 @@ level_show(mddev_t *mddev, char *page)
 		return 0;
 }
 
+
+static struct attribute_group md_redundancy_group;
+
 static ssize_t
 level_store(mddev_t *mddev, const char *buf, size_t len)
 {
@@ -2980,6 +2987,26 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 			rdev->raid_disk = -1;
 			clear_bit(In_sync, &rdev->flags);
 		}
+
+	/* if changed from Raid0 to other personality 
+	 * show the redundancy group
+	 */
+	if ((mddev->level == 0) && (pers->sync_request))
+	{
+		if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
+			printk(KERN_WARNING
+			       "md: cannot register extra attributes for %s\n",
+			       mdname(mddev));
+		mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
+	} 
+	
+	/* if changed from other personality to Raid0
+	 * remove the redundancy group
+	 */
+	if (mddev->new_level == 0) {
+		sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
+	}
+
 	mddev->pers = pers;
 	mddev->private = priv;
 	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
@@ -2987,11 +3014,21 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 	mddev->layout = mddev->new_layout;
 	mddev->chunk_sectors = mddev->new_chunk_sectors;
 	mddev->delta_disks = 0;
+
+	/* FIXME: For external metadata max_disks are not supported now
+	 * so in this case set this to some resonable value.
+	 */
+	if (mddev->max_disks == 0) {	
+		mddev->max_disks = 32;
+	}
+	
 	pers->run(mddev);
 	mddev_resume(mddev);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
+	md_wakeup_thread(mddev->thread);	
+	sysfs_notify(&mddev->kobj, NULL, "level");	
+
 	return rv;
 }
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] md: Enable takeover for external metadata
  2010-02-03 12:29   ` [PATCH 4/4] md: Enable takeover for external metadata Trela, Maciej
@ 2010-02-10  5:57     ` Neil Brown
  2010-02-10 17:36       ` Dan Williams
  0 siblings, 1 reply; 7+ messages in thread
From: Neil Brown @ 2010-02-10  5:57 UTC (permalink / raw)
  To: Trela, Maciej; +Cc: linux-raid, Williams, Dan J, Ciechanowski, Ed

On Wed, 3 Feb 2010 12:29:53 +0000
"Trela, Maciej" <Maciej.Trela@intel.com> wrote:

> 
> This is one more patch for md takeover that was accidentally skipped while sending...
> Please, apply it together with the rest of md takeover patches.
> 
> Now md will notify mdmon about the level change.
> Also sysfs redundancy_group specific to non-raid0 is now handled when switching personalities.
> 


Thanks for these.

You should get into the habit of checking your patches
with ./scripts/checkpatch.pl.  There was lots of stray white space and
various other issues.  I have fixed them.  You can find the current patches
in my "md-scatch" branch:
  http://neil.brown.name/git?p=md;a=shortlog;h=refs/heads/md-scratch

I also fixed some other little issues like the fact that you don't want to
set ->delta_disks when converting between RAID0 and RAID5.

I think there are still some issues with error handling if kzalloc fails
while allocating the new 'private' structures.   I'll have another run
through the patches in a few days and see what else I can fix it.

I seem to have misplaced the mdadm patches.  If you would like to send those
again I will have a look and possibly apply them.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] md: Enable takeover for external metadata
  2010-02-10  5:57     ` Neil Brown
@ 2010-02-10 17:36       ` Dan Williams
  2010-02-11 12:34         ` Trela, Maciej
  0 siblings, 1 reply; 7+ messages in thread
From: Dan Williams @ 2010-02-10 17:36 UTC (permalink / raw)
  To: Neil Brown; +Cc: Trela, Maciej, linux-raid, Ciechanowski, Ed

On Tue, Feb 9, 2010 at 10:57 PM, Neil Brown <neilb@suse.de> wrote:
> On Wed, 3 Feb 2010 12:29:53 +0000
> "Trela, Maciej" <Maciej.Trela@intel.com> wrote:
>
>>
>> This is one more patch for md takeover that was accidentally skipped while sending...
>> Please, apply it together with the rest of md takeover patches.
>>
>> Now md will notify mdmon about the level change.
>> Also sysfs redundancy_group specific to non-raid0 is now handled when switching personalities.
>>
>
>
> Thanks for these.
>
> You should get into the habit of checking your patches
> with ./scripts/checkpatch.pl.  There was lots of stray white space and
> various other issues.  I have fixed them.  You can find the current patches
> in my "md-scatch" branch:
>  http://neil.brown.name/git?p=md;a=shortlog;h=refs/heads/md-scratch
>
> I also fixed some other little issues like the fact that you don't want to
> set ->delta_disks when converting between RAID0 and RAID5.
>
> I think there are still some issues with error handling if kzalloc fails
> while allocating the new 'private' structures.   I'll have another run
> through the patches in a few days and see what else I can fix it.
>
> I seem to have misplaced the mdadm patches.  If you would like to send those
> again I will have a look and possibly apply them.

I believe one thing that still needs investigation, before turning on
the mdadm imsm support, is to see if we need superblock specific
migration strategies in the kernel.  Unless we are incredibly lucky I
do not think the native migration strategy will line up with what the
option-rom expects from a checkpoint interpretation standpoint.
Maciej, any comments on this aspect of the compatibility?

Thanks,
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 4/4] md: Enable takeover for external metadata
  2010-02-10 17:36       ` Dan Williams
@ 2010-02-11 12:34         ` Trela, Maciej
  0 siblings, 0 replies; 7+ messages in thread
From: Trela, Maciej @ 2010-02-11 12:34 UTC (permalink / raw)
  To: Neil Brown, Williams, Dan J; +Cc: linux-raid, Ciechanowski, Ed, Kwolek, Adam

> I believe one thing that still needs investigation, before turning on
> the mdadm imsm support, is to see if we need superblock specific
> migration strategies in the kernel.  Unless we are incredibly lucky I
> do not think the native migration strategy will line up with what the
> option-rom expects from a checkpoint interpretation standpoint.
> Maciej, any comments on this aspect of the compatibility?
> 

Yes, you are right, the compatibility is a big issue here.
For now I can see three main aspects of the IMSM compatibility:
1. Checkpointing with IMSM Migration Record together with General Migration Copy Area
2. Migration optimization space
3. Checkpointing optimization algorithm

Ad.1.
This is a crucial issue for the compatibility and I'm working on it currently.

The Migration Record is used during a general migration process (reshape).
It is stored on the very last block of the raid device after the IMSM superblock.
If the OROM detects a migration in progress it examines the Migration Record
to get the current migration unit. 
In case when current unit is in the Migration Copy Area the OROM will
copy it to the destination array and update the Migration Record.

For now I have the preliminary version of checkpointing code (based on Adam's IMSM reshape patch)
that stores curr_migr_unit in IMSM Migration Record during the reshape and restarts the reshape using the Migration Record. 

My idea was that Migration Record should be updated directly from the kernel (in reshape_request()) during the reshape. 
However, since Migration Record contains some IMSM specific Fields like FamilyID, 
the mdmon, when starting the reshape, initiates the Migration Record on the disks.

Still I'll have to implement copying data with Migration Copy Area instead of using backup-file for IMSM...


Ad.2.
If source and destination arrays have the same number of data disks, IMSM in order to avoid overwriting source
stripes while copying (additional copy from src to Copying Area would be involved) uses Migration Optimization Space.
This is a 4MB of space, added at the beginning or at the end of destination array depending on free space availability.

Summarizing, the main idea is that src and dst arrays should start at different physical blocks.
The first block of the source array is store in the IMSM superblock and the first block of the dest array is 
both in the superblock and in the Migration Record.


Ad.3.
This is an algorithm optimizing the number of checkpoint writes during the reshape. 
My first impression is that it could be similar to the algorithm in reshape_request() but I'll need to investigate it more...


I would appreciate any feedback on these...
Thanks,
Maciek.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-02-11 12:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-29 14:54 [PATCH 3/3] md: Add support for Raid0->Raid10 takeover Trela, Maciej
2010-02-01  0:08 ` Neil Brown
2010-02-03 12:16   ` Trela, Maciej
2010-02-03 12:29   ` [PATCH 4/4] md: Enable takeover for external metadata Trela, Maciej
2010-02-10  5:57     ` Neil Brown
2010-02-10 17:36       ` Dan Williams
2010-02-11 12:34         ` Trela, Maciej

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.