linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* md/multipath: Multipath, Multiport support and prototype patch for round robin routing
@ 2001-07-23 20:32 Brian Beattie
  2001-08-15 20:34 ` Christoph Hellwig
  0 siblings, 1 reply; 3+ messages in thread
From: Brian Beattie @ 2001-07-23 20:32 UTC (permalink / raw)
  To: Linux Kernel

I've been looking at the multipath support Ingo Molnar added to md as
included in RedHat 7.1.  I'm looking at various improvements that might
be possible.  To try to get some discussion going, I posting some ideas
of things I thgink could be improved, and some patches for a prototype
to add round robin routing.

Some of the things that I think could be done that would improve md, in
no particular order: different routing options ( prefered route, round
robin, static weighted, dynamic weighted ), improved error handeling,
automatic route recovery, automatic device discovery, automatic device
identification.  Some of these may not be feasible and others may have
some other features.

All comments welcome

-- 
Brian Beattie
IBM Linux Technology Center
bbeattie@sequent.com
503.578.5899  Des2-3C-5

--------------------- patch -------------------------
diff -rc linux-2.4.2/drivers/md/multipath.c linux/drivers/md/multipath.c
*** linux-2.4.2/drivers/md/multipath.c	Sun Apr  8 15:22:24 2001
--- linux/drivers/md/multipath.c	Wed Jul 11 18:25:08 2001
***************
*** 25,30 ****
--- 25,31 ----
  #include <linux/module.h>
  #include <linux/malloc.h>
  #include <linux/raid/multipath.h>
+ #include <linux/sysctl.h>
  #include <asm/atomic.h>
  
  #define MAJOR_NR MD_MAJOR
***************
*** 46,51 ****
--- 47,92 ----
  #define PRINTK(x...)  do { } while (0)
  #endif
  
+ static char multipath_version[] =
+ 		{ "MD/LVM Multipath Storage Device Driver: ver 0.0.1" };
+ 
+ static int multipath_proc_readstr (ctl_table *, int, struct file *, void *,
+ 		size_t *);
+ static int multipath_proc_read_dev (ctl_table *, int, struct file *, void *,
+ 		size_t *);
+ 
+ static struct ctl_table_header *multipath_table_header;
+ 
+ static struct multipath_dev_table multipath_dev_template = {
+         "",
+ 	{
+ 		{MULTIPATH_ROUTING, "routing", NULL, sizeof(int), 0644,
+ 			NULL, &proc_dointvec},
+ 		{MULTIPATH_CONF, "config", NULL, 0, 0444, NULL,
+ 			&multipath_proc_read_dev},
+ 		{0},
+ 	},
+ 	{{MULTIPATH_DEV, NULL, NULL, 0, 0555, NULL},{0}},
+ 	{{DEV_MULTIPATH, "multipath", NULL, 0, 0555, NULL},{0}},
+ 	{{CTL_DEV, "dev", NULL, 0, 0555, NULL},{0}}
+ 	
+ };
+ 
+ static ctl_table multipath_ver_table[] = {
+ 	{MULTIPATH_VER, "version", &multipath_version,
+ 		sizeof(multipath_version), 0444, NULL, &multipath_proc_readstr},
+ 	{0}
+ };
+ 
+ static ctl_table multipath_dir_table[] = {
+ 	{DEV_MULTIPATH, "multipath", NULL, 0, 0555, multipath_ver_table},
+ 	{0}
+ };
+ 
+ static ctl_table multipath_root_table[] = {
+ 	{CTL_DEV, "dev", NULL, 0, 0555, multipath_dir_table},
+ 	{0}
+ };
  
  static mdk_personality_t multipath_personality;
  static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;
***************
*** 53,58 ****
--- 94,244 ----
  
  static int multipath_diskop(mddev_t *mddev, mdp_disk_t **d, int state);
  
+ static int multipath_proc_register_dev(mddev_t *md )
+ {
+ 	struct multipath_dev_table *t;
+ 	multipath_conf_t *conf = mddev_to_conf(md);
+ 
+ 	t = &(conf->ctl_tbl);
+ 
+ 	memcpy(t, &multipath_dev_template, sizeof(*t));
+ 
+ 	/* fill in fields */
+ 	sprintf( t->mdname, "%d", md->__minor );
+ 
+ 	t->dir[0].procname = t->mdname;
+ 
+ 	t->md[0].data = &conf->routing;
+ 	t->md[1].data = md;
+ 	
+ 	t->dev[0].child = t->mp;
+ 	t->mp[0].child = t->dir;
+ 	t->dir[0].child = t->md;
+ 	
+ 	conf->tbl = register_sysctl_table( t->dev, 1 );
+ 
+ 	return 0;
+ }
+ 
+ static int multipath_proc_unregister_dev( mddev_t *md )
+ {
+ 	multipath_conf_t *conf = mddev_to_conf(md);
+ 
+ 	unregister_sysctl_table( conf->tbl );
+ 
+ 	return 0;
+ } 
+ 
+ 
+ static int multipath_proc_readstr (ctl_table *tbl, int write, struct file *f,
+ 		void *buffer, size_t *lenp)
+ {
+ 	int	n;
+ 
+ 	if ( write )
+ 		return -EACCES;		/* readonly string */
+ 
+ 		/* check for no or zero length data, or data allready read */
+ 	if (!tbl->data || !tbl->maxlen || !*lenp || f->f_pos )
+ 	{
+ 		*lenp = 0;
+ 		return 0;
+ 	}
+ 	
+ 	n = strlen(tbl->data);
+ 
+ 	if (n > tbl->maxlen)
+ 		n = tbl->maxlen;
+ 		
+ 	if ( n > *lenp )
+ 		n = *lenp;
+ 
+ 	if ( n )
+ 		if(copy_to_user( buffer, multipath_version, n))
+ 			return -EFAULT;
+ 	if ( n  < *lenp )
+ 	{
+ 		if(put_user('\n', ((char *)buffer) + n) )
+ 			return -EFAULT;
+ 		n++;
+ 	}
+ 	*lenp = n;
+ 	f->f_pos += n;
+ 
+ 	return 0;
+ }
+ 
+ static int multipath_proc_read_dev (ctl_table *t, int w, struct file *f,
+ 		void *b, size_t *s)
+ {
+ 	mddev_t			*md;
+ 	multipath_conf_t	*conf;
+ 	struct multipath_info	*info;
+ 	int			path, len, i;
+ #define LEN_HDR 48
+ #define LEN_DSK 85
+ 
+ 	if (!t->data || !*s || f->f_pos )
+ 	{
+ 		*s = 0;
+ 		return 0;
+ 	}
+ 
+ 	if ( w )
+ 		return -EACCES;		/* readonly */
+ 
+ 	md = t->data;
+ 	conf = mddev_to_conf( md );
+ 
+ 	if ( f->f_pos == 0 )
+ 	{
+ 		if ( *s < LEN_HDR )	/* must be big enough to handle the */
+ 			return -EFAULT;	/* size of the next sprintf */
+ 
+ 		sprintf( b, "nr_disks %3d: raid_disks %3d: working_disks %3d\n",
+ 			conf->nr_disks&255, conf->raid_disks&255,
+ 			conf->working_disks&255 );
+ 
+ 		len = strlen( b );
+ 		if ( *s < len )	/* check for overflow */
+ 			return -EFAULT;
+ 
+ 		if ( *s < len + LEN_DSK )
+ 		{
+ 			*s = len;
+ 			f->f_pos = len;
+ 			return 0;
+ 		}
+ 	}
+ 
+ 
+ 	for ( path = 0; path < conf->nr_disks; path++ )
+ 	{
+ 		info = &conf->multipaths[path];
+ 
+ 		sprintf( b + len,
+ 			"%3d: disk %3d: dev %3d.%3d\n"
+ 			"\tworking %c: write only %c: spare %c: used %c "
+ 			"ops %10d\n",
+ 			info->number&255, info->raid_disk&255, MAJOR(info->dev),
+ 			MINOR(info->dev),
+ 			info->operational? 'y' : 'n',
+ 			info->write_only? 'y' : 'n',
+ 			info->spare? 'y' : 'n',
+ 			info->used_slot? 'y' : 'n',
+ 			info->nr_ops );
+ 		len = strlen( b );
+ 		if ( *s < len + LEN_DSK )
+ 			break;
+ 	}
+ 
+ 	*s = len;
+ 	f->f_pos = len;
+ 	return 0;
+ #undef LEN_HDR
+ #undef LEN_DSK
+ }
+ 
  static struct buffer_head *multipath_alloc_bh(multipath_conf_t *conf, int cnt)
  {
  	/* return a linked list of "cnt" struct buffer_heads.
***************
*** 364,387 ****
  }
  
  /*
-  * This routine returns the disk from which the requested read should
-  * be done. It bookkeeps the last read position for every disk
-  * in array and when new read requests come, the disk which last
-  * position is nearest to the request, is chosen.
-  *
-  * TODO: now if there are 2 multipaths in the same 2 devices, performance
-  * degrades dramatically because position is multipath, not device based.
-  * This should be changed to be device based. Also atomic sequential
-  * reads should be somehow balanced.
   */
  
! static int multipath_read_balance (multipath_conf_t *conf)
  {
! 	int disk;
  
- 	for (disk = 0; disk < conf->raid_disks; disk++)	
  		if (conf->multipaths[disk].operational)
! 			return disk;
  	BUG();
  	return 0;
  }
--- 550,588 ----
  }
  
  /*
   */
  
! static int multipath_route_select( multipath_conf_t *conf)
  {
! 	struct multipath_info	*info;
! 	int	disk, next;
! 
! 	switch( conf->routing )
! 	{
! 	case 0:
! 		for (disk = 0; disk < conf->nr_disks; disk++)	
! 			if (conf->multipaths[disk].operational)
! 				return disk;
! 		break;
! 	case 1:
! 		for (disk = 0; disk < conf->nr_disks; disk++)
! 			if (conf->multipaths[disk].operational)
! 				break;
  
  		if (conf->multipaths[disk].operational)
! 		{
! 			if (disk == conf->nr_disks)
! 				next = 0;
! 			else
! 				next = disk + 1;
! 			conf->multipaths[disk].operational = 0;
! 			conf->multipaths[next].operational = 1;
! 			return next;
! 		}
! 		break;
! 	default:
! 		break;
! 	}
  	BUG();
  	return 0;
  }
***************
*** 437,443 ****
  	/*
  	 * read balancing logic:
  	 */
! 	multipath = conf->multipaths + multipath_read_balance(conf);
  
  	bh_req = &r1_bh->bh_req;
  	memcpy(bh_req, bh, sizeof(*bh));
--- 638,644 ----
  	/*
  	 * read balancing logic:
  	 */
! 	multipath = conf->multipaths + multipath_route_select(conf);
  
  	bh_req = &r1_bh->bh_req;
  	memcpy(bh_req, bh, sizeof(*bh));
***************
*** 448,453 ****
--- 649,657 ----
  	bh_req->b_end_io = multipath_end_request;
  	bh_req->b_private = r1_bh;
  	generic_make_request (rw, bh_req);
+ 
+ 	multipath->nr_ops++;
+ 
  	return 0;
  }
  
***************
*** 697,702 ****
--- 901,907 ----
  	 * Switch the spare disk to write-only mode:
  	 */
  	case DISKOP_SPARE_WRITE:
+ printk("MD: DISKOP_SPARE_WRITE\n");
  		sdisk = conf->multipaths + spare_disk;
  		sdisk->operational = 1;
  		sdisk->write_only = 1;
***************
*** 705,710 ****
--- 910,916 ----
  	 * Deactivate a spare disk:
  	 */
  	case DISKOP_SPARE_INACTIVE:
+ printk("MD: DISKOP_SPARE_INACTIVE\n");
  		sdisk = conf->multipaths + spare_disk;
  		sdisk->operational = 0;
  		sdisk->write_only = 0;
***************
*** 717,722 ****
--- 923,929 ----
  	 * property)
  	 */
  	case DISKOP_SPARE_ACTIVE:
+ printk("MD: DISKOP_SPARE_ACTIVE\n");
  		sdisk = conf->multipaths + spare_disk;
  		fdisk = conf->multipaths + failed_disk;
  
***************
*** 1040,1045 ****
--- 1247,1258 ----
  	}
  	memset(conf, 0, sizeof(*conf));
  
+ 	if (multipath_proc_register_dev( mddev ))
+ 	{
+ 		printk(ERRORS, mdidx(mddev));
+ 		goto out_free_conf;
+ 	}
+ 
  	ITERATE_RDEV(mddev,rdev,tmp) {
  		if (rdev->faulty) {
  			/* this is a "should never happen" case and if it */
***************
*** 1228,1233 ****
--- 1441,1447 ----
  {
  	multipath_conf_t *conf = mddev_to_conf(mddev);
  
+ 	multipath_proc_unregister_dev( mddev );
  	md_unregister_thread(conf->thread);
  	multipath_shrink_r1bh(conf);
  	multipath_shrink_bh(conf, conf->freebh_cnt);
***************
*** 1250,1260 ****
--- 1464,1478 ----
  
  static int md__init multipath_init (void)
  {
+ 	multipath_table_header = register_sysctl_table(multipath_root_table, 1);
+ 
  	return register_md_personality (MULTIPATH, &multipath_personality);
  }
  
  static void multipath_exit (void)
  {
+ 	unregister_sysctl_table(multipath_table_header);
+ 
  	unregister_md_personality (MULTIPATH);
  }
  
diff -rc linux-2.4.2/include/linux/raid/multipath.h linux/include/linux/raid/multipath.h
*** linux-2.4.2/include/linux/raid/multipath.h	Sun Apr  8 15:56:18 2001
--- linux/include/linux/raid/multipath.h	Wed Jul 11 18:23:07 2001
***************
*** 2,7 ****
--- 2,16 ----
  #define _MULTIPATH_H
  
  #include <linux/raid/md.h>
+ #include <linux/sysctl.h>
+ 
+ struct multipath_dev_table {
+ 	char		mdname[8];
+ 	ctl_table	md[3];
+ 	ctl_table	dir[2];
+ 	ctl_table	mp[2];
+ 	ctl_table	dev[2];
+ };
  
  struct multipath_info {
  	int		number;
***************
*** 18,23 ****
--- 27,33 ----
  	int		spare;
  
  	int		used_slot;
+ 	unsigned int		nr_ops;
  };
  
  struct multipath_private_data {
***************
*** 52,57 ****
--- 62,72 ----
  	md_wait_queue_head_t	wait_done;
  	md_wait_queue_head_t	wait_ready;
  	md_spinlock_t		segment_lock;
+ 	int	last;		/* last used, or prefered route */
+ 	int	routing;	/* routing algorithim 0 = use prefered */
+ 				/*                    1 = round robin etc */
+ 	struct ctl_table_header	*tbl;
+ 	struct multipath_dev_table	ctl_tbl;
  };
  
  typedef struct multipath_private_data multipath_conf_t;
diff -rc linux-2.4.2/include/linux/sysctl.h linux/include/linux/sysctl.h
*** linux-2.4.2/include/linux/sysctl.h	Sun Apr  8 15:47:23 2001
--- linux/include/linux/sysctl.h	Tue Jul 10 16:51:05 2001
***************
*** 594,600 ****
  	DEV_HWMON=2,
  	DEV_PARPORT=3,
  	DEV_RAID=4,
! 	DEV_MAC_HID=5
  };
  
  /* /proc/sys/dev/cdrom */
--- 594,601 ----
  	DEV_HWMON=2,
  	DEV_PARPORT=3,
  	DEV_RAID=4,
! 	DEV_MAC_HID=5,
! 	DEV_MULTIPATH=6
  };
  
  /* /proc/sys/dev/cdrom */
***************
*** 616,621 ****
--- 617,634 ----
  enum {
  	DEV_RAID_SPEED_LIMIT_MIN=1,
  	DEV_RAID_SPEED_LIMIT_MAX=2
+ };
+ 
+ /* /proc/sys/dev/multipath */
+ enum {
+ 	MULTIPATH_VER=1,
+ 	MULTIPATH_DEV=2
+ };
+ 
+ /* /proc/sys/dev/multipath/md n */
+ enum {
+ 	MULTIPATH_ROUTING=1,
+ 	MULTIPATH_CONF=2
  };
  
  /* /proc/sys/dev/parport/default */


----- End forwarded message -----

-- 
Brian Beattie
IBM Linux Technology Center - MPIO/SAN
bbeattie@sequent.com
503.578.5899  Des2-3C-5


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: md/multipath: Multipath, Multiport support and prototype patch for round robin routing
  2001-07-23 20:32 md/multipath: Multipath, Multiport support and prototype patch for round robin routing Brian Beattie
@ 2001-08-15 20:34 ` Christoph Hellwig
  2001-08-17  0:42   ` Brian Beattie
  0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2001-08-15 20:34 UTC (permalink / raw)
  To: Brian Beattie; +Cc: linux-kernel, mingo

In article <20010723133242.B970@dyn9-47-16-69.des.beaverton.ibm.com> you wrote:
> I've been looking at the multipath support Ingo Molnar added to md as
> included in RedHat 7.1.  I'm looking at various improvements that might
> be possible.  To try to get some discussion going, I posting some ideas
> of things I thgink could be improved, and some patches for a prototype
> to add round robin routing.
>
> Some of the things that I think could be done that would improve md, in
> no particular order: different routing options ( prefered route, round
> robin, static weighted, dynamic weighted ), improved error handeling,
> automatic route recovery, automatic device discovery, automatic device
> identification.  Some of these may not be feasible and others may have
> some other features.
>
> All comments welcome

First it would be so much easier to compare this to the current code
if one would not have to download megabytes of redhat kernel-rpms
(ok we have it on kn.org now),  extract the four or five patches and
adopt them to a slightly different tree, but could find it in the stock
or at least -ac tree.  Ingo, is there any specici reason why you didn't
send it to Alan yet?  I stoped my development of the same feature because
I saw yours was moe coplete and now it doesn't get merged..

The second comment actually goes to you, Brian:  could you please try to
create unified diffs (diff -u)?  It's sooo much easier to read..

> + 
> + static struct multipath_dev_table multipath_dev_template = {
> +         "",
> + 	{
> + 		{MULTIPATH_ROUTING, "routing", NULL, sizeof(int), 0644,
> + 			NULL, &proc_dointvec},

Shouldn't this be a property of the md device, instead of a sysctl?
I planned to write that information in the md superblock for my design.

	Christoph

-- 
Of course it doesn't work. We've performed a software upgrade.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: md/multipath: Multipath, Multiport support and prototype patch for round robin routing
  2001-08-15 20:34 ` Christoph Hellwig
@ 2001-08-17  0:42   ` Brian Beattie
  0 siblings, 0 replies; 3+ messages in thread
From: Brian Beattie @ 2001-08-17  0:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Brian Beattie, linux-kernel, mingo

On Wed, Aug 15, 2001 at 10:34:53PM +0200, Christoph Hellwig wrote:
> In article <20010723133242.B970@dyn9-47-16-69.des.beaverton.ibm.com> you wrote:

> 
> The second comment actually goes to you, Brian:  could you please try to
> create unified diffs (diff -u)?  It's sooo much easier to read..
> 

I'm just back from vacation and still getting back into the groove,
I'll try to do that and post it in a day or two.


> > + 
> > + static struct multipath_dev_table multipath_dev_template = {
> > +         "",
> > + 	{
> > + 		{MULTIPATH_ROUTING, "routing", NULL, sizeof(int), 0644,
> > + 			NULL, &proc_dointvec},
> 
> Shouldn't this be a property of the md device, instead of a sysctl?
> I planned to write that information in the md superblock for my design.

I'm not sure what you mean here.  This is not a really complete thing
here.  Adding the information to the superblock sounds, like a good idea,
but I'm also looking at dynamically modifying the operating parameters.

> 
> 	Christoph
> 
> -- 
> Of course it doesn't work. We've performed a software upgrade.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Brian Beattie
IBM Linux Technology Center - MPIO/SAN
bbeattie@sequent.com
503.578.5899  Des2-3C-5

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2001-08-21  4:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-07-23 20:32 md/multipath: Multipath, Multiport support and prototype patch for round robin routing Brian Beattie
2001-08-15 20:34 ` Christoph Hellwig
2001-08-17  0:42   ` Brian Beattie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).