linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* device-mapper patchset
@ 2004-02-20 15:31 Joe Thornber
  2004-02-20 15:34 ` [Patch 1/6] dm: endio method Joe Thornber
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:31 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Mailing List, thornber

Hi,

Here's another device mapper update, some of these are quite big
patches, so I'll run through the list:

endio method
  We've been using this code for many months (years?).  Needed for the
  more complicated targets.

Remove the version-1 ioctl interface
  This didn't get in last time I submitted it.  Leave it out if you
  still disagree.

Audit for list_for_each_*entry*
  Trivial, please merge

Queue limits
  Please merge.

List targets ioctl
  Adds a command that lets tools query the kernel to see what
  targets/versions are available.

Multipath target
  People really want this, so I'm probably pushing it sooner than I'd
  like.  It would be good if it got a wider audience in the -mm tree or
  as an experimental target in vanilla.

Thanks,

- Joe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch 1/6] dm: endio method
  2004-02-20 15:31 device-mapper patchset Joe Thornber
@ 2004-02-20 15:34 ` Joe Thornber
  2004-02-21  9:58   ` Mike Christie
  2004-02-20 15:34 ` [Patch 2/6] dm: remove v1 ioctl interface Joe Thornber
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:34 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Add an endio method to targets.  This method is allowed to request
another shot at failed ios (think multipath).  Context can be passed
between the map method and the endio method.
--- diff/drivers/md/dm-crypt.c	2004-02-18 15:15:18.000000000 +0000
+++ source/drivers/md/dm-crypt.c	2004-02-18 15:16:36.000000000 +0000
@@ -601,7 +601,8 @@ crypt_clone(struct crypt_config *cc, str
 	return clone;
 }
 
-static int crypt_map(struct dm_target *ti, struct bio *bio)
+static int crypt_map(struct dm_target *ti, struct bio *bio,
+		     union map_info *map_context)
 {
 	struct crypt_config *cc = (struct crypt_config *) ti->private;
 	struct crypt_io *io = mempool_alloc(cc->io_pool, GFP_NOIO);
--- diff/drivers/md/dm-linear.c	2003-09-30 15:46:14.000000000 +0100
+++ source/drivers/md/dm-linear.c	2004-02-18 15:16:23.000000000 +0000
@@ -65,7 +65,8 @@ static void linear_dtr(struct dm_target 
 	kfree(lc);
 }
 
-static int linear_map(struct dm_target *ti, struct bio *bio)
+static int linear_map(struct dm_target *ti, struct bio *bio,
+		      union map_info *map_context)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
 
--- diff/drivers/md/dm-stripe.c	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/dm-stripe.c	2004-02-18 15:16:23.000000000 +0000
@@ -166,7 +166,8 @@ static void stripe_dtr(struct dm_target 
 	kfree(sc);
 }
 
-static int stripe_map(struct dm_target *ti, struct bio *bio)
+static int stripe_map(struct dm_target *ti, struct bio *bio,
+		      union map_info *map_context)
 {
 	struct stripe_c *sc = (struct stripe_c *) ti->private;
 
--- diff/drivers/md/dm-target.c	2003-06-30 10:07:21.000000000 +0100
+++ source/drivers/md/dm-target.c	2004-02-18 15:16:23.000000000 +0000
@@ -157,7 +157,8 @@ static void io_err_dtr(struct dm_target 
 	/* empty */
 }
 
-static int io_err_map(struct dm_target *ti, struct bio *bio)
+static int io_err_map(struct dm_target *ti, struct bio *bio,
+		      union map_info *map_context)
 {
 	return -EIO;
 }
--- diff/drivers/md/dm.c	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/dm.c	2004-02-18 15:16:23.000000000 +0000
@@ -21,6 +21,9 @@ static const char *_name = DM_NAME;
 static unsigned int major = 0;
 static unsigned int _major = 0;
 
+/*
+ * One of these is allocated per bio.
+ */
 struct dm_io {
 	struct mapped_device *md;
 	int error;
@@ -29,6 +32,21 @@ struct dm_io {
 };
 
 /*
+ * One of these is allocated per target within a bio.  Hopefully
+ * this will be simplified out one day.
+ */
+struct target_io {
+	struct dm_io *io;
+	struct dm_target *ti;
+	union map_info info;
+
+	sector_t bi_sector;
+	struct block_device *bi_bdev;
+	unsigned int bi_size;
+	unsigned short bi_idx;
+};
+
+/*
  * Bits for the md->flags field.
  */
 #define DMF_BLOCK_IO 0
@@ -59,6 +77,7 @@ struct mapped_device {
 	 * io objects are allocated from here.
 	 */
 	mempool_t *io_pool;
+	mempool_t *tio_pool;
 
 	/*
 	 * Event handling.
@@ -69,6 +88,7 @@ struct mapped_device {
 
 #define MIN_IOS 256
 static kmem_cache_t *_io_cache;
+static kmem_cache_t *_tio_cache;
 
 static __init int local_init(void)
 {
@@ -80,9 +100,18 @@ static __init int local_init(void)
 	if (!_io_cache)
 		return -ENOMEM;
 
+	/* allocate a slab for the target ios */
+	_tio_cache = kmem_cache_create("dm_tio", sizeof(struct target_io),
+				       0, 0, NULL, NULL);
+	if (!_tio_cache) {
+		kmem_cache_destroy(_io_cache);
+		return -ENOMEM;
+	}
+
 	_major = major;
 	r = register_blkdev(_major, _name);
 	if (r < 0) {
+		kmem_cache_destroy(_tio_cache);
 		kmem_cache_destroy(_io_cache);
 		return r;
 	}
@@ -95,6 +124,7 @@ static __init int local_init(void)
 
 static void local_exit(void)
 {
+	kmem_cache_destroy(_tio_cache);
 	kmem_cache_destroy(_io_cache);
 
 	if (unregister_blkdev(_major, _name) < 0)
@@ -184,6 +214,16 @@ static inline void free_io(struct mapped
 	mempool_free(io, md->io_pool);
 }
 
+static inline struct target_io *alloc_tio(struct mapped_device *md)
+{
+	return mempool_alloc(md->tio_pool, GFP_NOIO);
+}
+
+static inline void free_tio(struct mapped_device *md, struct target_io *tio)
+{
+	mempool_free(tio, md->tio_pool);
+}
+
 /*
  * Add the bio to the list of deferred io.
  */
@@ -232,17 +272,36 @@ static inline void dec_pending(struct dm
 
 static int clone_endio(struct bio *bio, unsigned int done, int error)
 {
-	struct dm_io *io = bio->bi_private;
+	int r = 0;
+	struct target_io *tio = bio->bi_private;
+	struct dm_io *io = tio->io;
+	dm_endio_fn endio = tio->ti->type->end_io;
 
 	if (bio->bi_size)
 		return 1;
 
+	if (endio) {
+		/* Restore bio fields. */
+		bio->bi_sector = tio->bi_sector;
+		bio->bi_bdev = tio->bi_bdev;
+		bio->bi_size = tio->bi_size;
+		bio->bi_idx = tio->bi_idx;
+
+		r = endio(tio->ti, bio, error, &tio->info);
+		if (r < 0)
+			error = r;
+
+		else if (r > 0)
+			/* the target wants another shot at the io */
+			return 1;
+	}
+
+	free_tio(io->md, tio);
 	dec_pending(io, error);
 	bio_put(bio);
-	return 0;
+	return r;
 }
 
-
 static sector_t max_io_len(struct mapped_device *md,
 			   sector_t sector, struct dm_target *ti)
 {
@@ -263,7 +322,8 @@ static sector_t max_io_len(struct mapped
 	return len;
 }
 
-static void __map_bio(struct dm_target *ti, struct bio *clone, struct dm_io *io)
+static void __map_bio(struct dm_target *ti, struct bio *clone,
+		      struct target_io *tio)
 {
 	int r;
 
@@ -273,22 +333,32 @@ static void __map_bio(struct dm_target *
 	BUG_ON(!clone->bi_size);
 
 	clone->bi_end_io = clone_endio;
-	clone->bi_private = io;
+	clone->bi_private = tio;
 
 	/*
 	 * Map the clone.  If r == 0 we don't need to do
 	 * anything, the target has assumed ownership of
 	 * this io.
 	 */
-	atomic_inc(&io->io_count);
-	r = ti->type->map(ti, clone);
-	if (r > 0)
+	atomic_inc(&tio->io->io_count);
+	r = ti->type->map(ti, clone, &tio->info);
+	if (r > 0) {
+		/* Save the bio info so we can restore it during endio. */
+		tio->bi_sector = clone->bi_sector;
+		tio->bi_bdev = clone->bi_bdev;
+		tio->bi_size = clone->bi_size;
+		tio->bi_idx = clone->bi_idx;
+
 		/* the bio has been remapped so dispatch it */
 		generic_make_request(clone);
+	}
 
-	else if (r < 0)
+	else if (r < 0) {
 		/* error the io and bail out */
+		struct dm_io *io = tio->io;
+		free_tio(tio->io->md, tio);
 		dec_pending(io, -EIO);
+	}
 }
 
 struct clone_info {
@@ -348,6 +418,15 @@ static void __clone_and_map(struct clone
 	struct bio *clone, *bio = ci->bio;
 	struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
 	sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti);
+	struct target_io *tio;
+
+	/*
+	 * Allocate a target io object.
+	 */
+	tio = alloc_tio(ci->md);
+	tio->io = ci->io;
+	tio->ti = ti;
+	memset(&tio->info, 0, sizeof(tio->info));
 
 	if (ci->sector_count <= max) {
 		/*
@@ -356,7 +435,7 @@ static void __clone_and_map(struct clone
 		 */
 		clone = clone_bio(bio, ci->sector, ci->idx,
 				  bio->bi_vcnt - ci->idx, ci->sector_count);
-		__map_bio(ti, clone, ci->io);
+		__map_bio(ti, clone, tio);
 		ci->sector_count = 0;
 
 	} else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
@@ -379,7 +458,7 @@ static void __clone_and_map(struct clone
 		}
 
 		clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len);
-		__map_bio(ti, clone, ci->io);
+		__map_bio(ti, clone, tio);
 
 		ci->sector += len;
 		ci->sector_count -= len;
@@ -394,7 +473,7 @@ static void __clone_and_map(struct clone
 
 		clone = split_bvec(bio, ci->sector, ci->idx,
 				   bv->bv_offset, max);
-		__map_bio(ti, clone, ci->io);
+		__map_bio(ti, clone, tio);
 
 		ci->sector += max;
 		ci->sector_count -= max;
@@ -403,7 +482,11 @@ static void __clone_and_map(struct clone
 		len = to_sector(bv->bv_len) - max;
 		clone = split_bvec(bio, ci->sector, ci->idx,
 				   bv->bv_offset + to_bytes(max), len);
-		__map_bio(ti, clone, ci->io);
+		tio = alloc_tio(ci->md);
+		tio->io = ci->io;
+		tio->ti = ti;
+		memset(&tio->info, 0, sizeof(tio->info));
+		__map_bio(ti, clone, tio);
 
 		ci->sector += len;
 		ci->sector_count -= len;
@@ -441,6 +524,16 @@ static void __split_bio(struct mapped_de
  *---------------------------------------------------------------*/
 
 
+static inline void __dm_request(struct mapped_device *md, struct bio *bio)
+{
+	if (!md->map) {
+		bio_io_error(bio, bio->bi_size);
+		return;
+	}
+
+	__split_bio(md, bio);
+}
+
 /*
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
@@ -479,12 +572,7 @@ static int dm_request(request_queue_t *q
 		down_read(&md->lock);
 	}
 
-	if (!md->map) {
-		bio_io_error(bio, bio->bi_size);
-		return 0;
-	}
-
-	__split_bio(md, bio);
+	__dm_request(md, bio);
 	up_read(&md->lock);
 	return 0;
 }
@@ -574,9 +662,14 @@ static struct mapped_device *alloc_dev(u
  	if (!md->io_pool)
  		goto bad2;
 
+	md->tio_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
+				      mempool_free_slab, _tio_cache);
+	if (!md->tio_pool)
+		goto bad3;
+
 	md->disk = alloc_disk(1);
 	if (!md->disk)
-		goto bad3;
+		goto bad4;
 
 	md->disk->major = _major;
 	md->disk->first_minor = minor;
@@ -592,7 +685,8 @@ static struct mapped_device *alloc_dev(u
 
 	return md;
 
-
+ bad4:
+	mempool_destroy(md->tio_pool);
  bad3:
 	mempool_destroy(md->io_pool);
  bad2:
@@ -606,6 +700,7 @@ static struct mapped_device *alloc_dev(u
 static void free_dev(struct mapped_device *md)
 {
 	free_minor(md->disk->first_minor);
+	mempool_destroy(md->tio_pool);
 	mempool_destroy(md->io_pool);
 	del_gendisk(md->disk);
 	put_disk(md->disk);
@@ -644,13 +739,13 @@ static int __bind(struct mapped_device *
 {
 	request_queue_t *q = md->queue;
 	sector_t size;
-	md->map = t;
 
 	size = dm_table_get_size(t);
 	__set_size(md->disk, size);
 	if (size == 0)
 		return 0;
 
+	md->map = t;
 	dm_table_event_callback(md->map, event_callback, md);
 
 	dm_table_get(t);
@@ -710,16 +805,16 @@ void dm_put(struct mapped_device *md)
 }
 
 /*
- * Requeue the deferred bios by calling generic_make_request.
+ * Process the deferred bios
  */
-static void flush_deferred_io(struct bio *c)
+static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
 {
 	struct bio *n;
 
 	while (c) {
 		n = c->bi_next;
 		c->bi_next = NULL;
-		generic_make_request(c);
+		__dm_request(md, c);
 		c = n;
 	}
 }
@@ -814,10 +909,11 @@ int dm_resume(struct mapped_device *md)
 	dm_table_resume_targets(md->map);
 	clear_bit(DMF_SUSPENDED, &md->flags);
 	clear_bit(DMF_BLOCK_IO, &md->flags);
+
 	def = bio_list_get(&md->deferred);
+	__flush_deferred_io(md, def);
 	up_write(&md->lock);
 
-	flush_deferred_io(def);
 	blk_run_queues();
 
 	return 0;
--- diff/include/linux/device-mapper.h	2003-06-30 10:07:24.000000000 +0100
+++ source/include/linux/device-mapper.h	2004-02-18 15:16:23.000000000 +0000
@@ -13,6 +13,11 @@ struct dm_dev;
 
 typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 
+union map_info {
+	void *ptr;
+	unsigned long long ll;
+};
+
 /*
  * In the constructor the target parameter will already have the
  * table, type, begin and len fields filled in.
@@ -32,7 +37,19 @@ typedef void (*dm_dtr_fn) (struct dm_tar
  * = 0: The target will handle the io by resubmitting it later
  * > 0: simple remap complete
  */
-typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
+typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
+			  union map_info *map_context);
+
+/*
+ * Returns:
+ * < 0 : error (currently ignored)
+ * 0   : ended successfully
+ * 1   : for some reason the io has still not completed (eg,
+ *       multipath target might want to requeue a failed io).
+ */
+typedef int (*dm_endio_fn) (struct dm_target *ti,
+			    struct bio *bio, int error,
+			    union map_info *map_context);
 
 typedef void (*dm_suspend_fn) (struct dm_target *ti);
 typedef void (*dm_resume_fn) (struct dm_target *ti);
@@ -60,6 +77,7 @@ struct target_type {
 	dm_ctr_fn ctr;
 	dm_dtr_fn dtr;
 	dm_map_fn map;
+	dm_endio_fn end_io;
 	dm_suspend_fn suspend;
 	dm_resume_fn resume;
 	dm_status_fn status;

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch 2/6] dm: remove v1 ioctl interface
  2004-02-20 15:31 device-mapper patchset Joe Thornber
  2004-02-20 15:34 ` [Patch 1/6] dm: endio method Joe Thornber
@ 2004-02-20 15:34 ` Joe Thornber
  2004-02-20 20:18   ` Andreas Jellinghaus
  2004-02-21  6:15   ` Andrew Morton
  2004-02-20 15:35 ` [Patch 3/6] dm: list_for_each_entry audit Joe Thornber
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:34 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Remove the version-1 ioctl interface.

--- diff/drivers/md/Kconfig	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/Kconfig	2004-02-18 15:25:08.000000000 +0000
@@ -162,14 +162,6 @@ config BLK_DEV_DM
 
 	  If unsure, say N.
 
-config DM_IOCTL_V4
-	bool "ioctl interface version 4"
-	depends on BLK_DEV_DM
-	default y
-	---help---
-	  Recent tools use a new version of the ioctl interface, only
-          select this option if you intend using such tools.
-
 config DM_CRYPT
 	tristate "Crypt target support"
 	depends on BLK_DEV_DM && EXPERIMENTAL
--- diff/drivers/md/dm-ioctl.c	2003-08-20 14:16:09.000000000 +0100
+++ source/drivers/md/dm-ioctl.c	2004-02-18 15:23:23.000000000 +0000
@@ -1,13 +1,1264 @@
 /*
- * Copyright (C) 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
  *
  * This file is released under the GPL.
  */
 
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/devfs_fs_kernel.h>
 #include <linux/dm-ioctl.h>
 
-#ifdef CONFIG_DM_IOCTL_V4
-#include "dm-ioctl-v4.c"
-#else
-#include "dm-ioctl-v1.c"
-#endif
+#include <asm/uaccess.h>
+
+#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
+
+/*-----------------------------------------------------------------
+ * The ioctl interface needs to be able to look up devices by
+ * name or uuid.
+ *---------------------------------------------------------------*/
+struct hash_cell {
+	struct list_head name_list;
+	struct list_head uuid_list;
+
+	char *name;
+	char *uuid;
+	struct mapped_device *md;
+	struct dm_table *new_map;
+};
+
+#define NUM_BUCKETS 64
+#define MASK_BUCKETS (NUM_BUCKETS - 1)
+static struct list_head _name_buckets[NUM_BUCKETS];
+static struct list_head _uuid_buckets[NUM_BUCKETS];
+
+void dm_hash_remove_all(void);
+
+/*
+ * Guards access to both hash tables.
+ */
+static DECLARE_RWSEM(_hash_lock);
+
+static void init_buckets(struct list_head *buckets)
+{
+	unsigned int i;
+
+	for (i = 0; i < NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(buckets + i);
+}
+
+int dm_hash_init(void)
+{
+	init_buckets(_name_buckets);
+	init_buckets(_uuid_buckets);
+	devfs_mk_dir(DM_DIR);
+	return 0;
+}
+
+void dm_hash_exit(void)
+{
+	dm_hash_remove_all();
+	devfs_remove(DM_DIR);
+}
+
+/*-----------------------------------------------------------------
+ * Hash function:
+ * We're not really concerned with the str hash function being
+ * fast since it's only used by the ioctl interface.
+ *---------------------------------------------------------------*/
+static unsigned int hash_str(const char *str)
+{
+	const unsigned int hash_mult = 2654435387U;
+	unsigned int h = 0;
+
+	while (*str)
+		h = (h + (unsigned int) *str++) * hash_mult;
+
+	return h & MASK_BUCKETS;
+}
+
+/*-----------------------------------------------------------------
+ * Code for looking up a device by name
+ *---------------------------------------------------------------*/
+static struct hash_cell *__get_name_cell(const char *str)
+{
+	struct list_head *tmp;
+	struct hash_cell *hc;
+	unsigned int h = hash_str(str);
+
+	list_for_each (tmp, _name_buckets + h) {
+		hc = list_entry(tmp, struct hash_cell, name_list);
+		if (!strcmp(hc->name, str))
+			return hc;
+	}
+
+	return NULL;
+}
+
+static struct hash_cell *__get_uuid_cell(const char *str)
+{
+	struct list_head *tmp;
+	struct hash_cell *hc;
+	unsigned int h = hash_str(str);
+
+	list_for_each (tmp, _uuid_buckets + h) {
+		hc = list_entry(tmp, struct hash_cell, uuid_list);
+		if (!strcmp(hc->uuid, str))
+			return hc;
+	}
+
+	return NULL;
+}
+
+/*-----------------------------------------------------------------
+ * Inserting, removing and renaming a device.
+ *---------------------------------------------------------------*/
+static inline char *kstrdup(const char *str)
+{
+	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
+	if (r)
+		strcpy(r, str);
+	return r;
+}
+
+static struct hash_cell *alloc_cell(const char *name, const char *uuid,
+				    struct mapped_device *md)
+{
+	struct hash_cell *hc;
+
+	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
+	if (!hc)
+		return NULL;
+
+	hc->name = kstrdup(name);
+	if (!hc->name) {
+		kfree(hc);
+		return NULL;
+	}
+
+	if (!uuid)
+		hc->uuid = NULL;
+
+	else {
+		hc->uuid = kstrdup(uuid);
+		if (!hc->uuid) {
+			kfree(hc->name);
+			kfree(hc);
+			return NULL;
+		}
+	}
+
+	INIT_LIST_HEAD(&hc->name_list);
+	INIT_LIST_HEAD(&hc->uuid_list);
+	hc->md = md;
+	hc->new_map = NULL;
+	return hc;
+}
+
+static void free_cell(struct hash_cell *hc)
+{
+	if (hc) {
+		kfree(hc->name);
+		kfree(hc->uuid);
+		kfree(hc);
+	}
+}
+
+/*
+ * devfs stuff.
+ */
+static int register_with_devfs(struct hash_cell *hc)
+{
+	struct gendisk *disk = dm_disk(hc->md);
+
+	devfs_mk_bdev(MKDEV(disk->major, disk->first_minor),
+		      S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+		      DM_DIR "/%s", hc->name);
+	return 0;
+}
+
+static int unregister_with_devfs(struct hash_cell *hc)
+{
+	devfs_remove(DM_DIR"/%s", hc->name);
+	return 0;
+}
+
+/*
+ * The kdev_t and uuid of a device can never change once it is
+ * initially inserted.
+ */
+int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
+{
+	struct hash_cell *cell;
+
+	/*
+	 * Allocate the new cells.
+	 */
+	cell = alloc_cell(name, uuid, md);
+	if (!cell)
+		return -ENOMEM;
+
+	/*
+	 * Insert the cell into both hash tables.
+	 */
+	down_write(&_hash_lock);
+	if (__get_name_cell(name))
+		goto bad;
+
+	list_add(&cell->name_list, _name_buckets + hash_str(name));
+
+	if (uuid) {
+		if (__get_uuid_cell(uuid)) {
+			list_del(&cell->name_list);
+			goto bad;
+		}
+		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
+	}
+	register_with_devfs(cell);
+	dm_get(md);
+	up_write(&_hash_lock);
+
+	return 0;
+
+ bad:
+	up_write(&_hash_lock);
+	free_cell(cell);
+	return -EBUSY;
+}
+
+void __hash_remove(struct hash_cell *hc)
+{
+	/* remove from the dev hash */
+	list_del(&hc->uuid_list);
+	list_del(&hc->name_list);
+	unregister_with_devfs(hc);
+	dm_put(hc->md);
+	if (hc->new_map)
+		dm_table_put(hc->new_map);
+	free_cell(hc);
+}
+
+void dm_hash_remove_all(void)
+{
+	int i;
+	struct hash_cell *hc;
+	struct list_head *tmp, *n;
+
+	down_write(&_hash_lock);
+	for (i = 0; i < NUM_BUCKETS; i++) {
+		list_for_each_safe (tmp, n, _name_buckets + i) {
+			hc = list_entry(tmp, struct hash_cell, name_list);
+			__hash_remove(hc);
+		}
+	}
+	up_write(&_hash_lock);
+}
+
+int dm_hash_rename(const char *old, const char *new)
+{
+	char *new_name, *old_name;
+	struct hash_cell *hc;
+
+	/*
+	 * duplicate new.
+	 */
+	new_name = kstrdup(new);
+	if (!new_name)
+		return -ENOMEM;
+
+	down_write(&_hash_lock);
+
+	/*
+	 * Is new free ?
+	 */
+	hc = __get_name_cell(new);
+	if (hc) {
+		DMWARN("asked to rename to an already existing name %s -> %s",
+		       old, new);
+		up_write(&_hash_lock);
+		kfree(new_name);
+		return -EBUSY;
+	}
+
+	/*
+	 * Is there such a device as 'old' ?
+	 */
+	hc = __get_name_cell(old);
+	if (!hc) {
+		DMWARN("asked to rename a non existent device %s -> %s",
+		       old, new);
+		up_write(&_hash_lock);
+		kfree(new_name);
+		return -ENXIO;
+	}
+
+	/*
+	 * rename and move the name cell.
+	 */
+	unregister_with_devfs(hc);
+
+	list_del(&hc->name_list);
+	old_name = hc->name;
+	hc->name = new_name;
+	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
+
+	/* rename the device node in devfs */
+	register_with_devfs(hc);
+
+	up_write(&_hash_lock);
+	kfree(old_name);
+	return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Implementation of the ioctl commands
+ *---------------------------------------------------------------*/
+/*
+ * All the ioctl commands get dispatched to functions with this
+ * prototype.
+ */
+typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
+
+static int remove_all(struct dm_ioctl *param, size_t param_size)
+{
+	dm_hash_remove_all();
+	param->data_size = 0;
+	return 0;
+}
+
+/*
+ * Round up the ptr to an 8-byte boundary.
+ */
+#define ALIGN_MASK 7
+static inline void *align_ptr(void *ptr)
+{
+	return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
+}
+
+/*
+ * Retrieves the data payload buffer from an already allocated
+ * struct dm_ioctl.
+ */
+static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
+			       size_t *len)
+{
+	param->data_start = align_ptr(param + 1) - (void *) param;
+
+	if (param->data_start < param_size)
+		*len = param_size - param->data_start;
+	else
+		*len = 0;
+
+	return ((void *) param) + param->data_start;
+}
+
+static int list_devices(struct dm_ioctl *param, size_t param_size)
+{
+	unsigned int i;
+	struct hash_cell *hc;
+	size_t len, needed = 0;
+	struct gendisk *disk;
+	struct dm_name_list *nl, *old_nl = NULL;
+
+	down_write(&_hash_lock);
+
+	/*
+	 * Loop through all the devices working out how much
+	 * space we need.
+	 */
+	for (i = 0; i < NUM_BUCKETS; i++) {
+		list_for_each_entry (hc, _name_buckets + i, name_list) {
+			needed += sizeof(struct dm_name_list);
+			needed += strlen(hc->name);
+			needed += ALIGN_MASK;
+		}
+	}
+
+	/*
+	 * Grab our output buffer.
+	 */
+	nl = get_result_buffer(param, param_size, &len);
+	if (len < needed) {
+		param->flags |= DM_BUFFER_FULL_FLAG;
+		goto out;
+	}
+	param->data_size = param->data_start + needed;
+
+	nl->dev = 0;	/* Flags no data */
+
+	/*
+	 * Now loop through filling out the names.
+	 */
+	for (i = 0; i < NUM_BUCKETS; i++) {
+		list_for_each_entry (hc, _name_buckets + i, name_list) {
+			if (old_nl)
+				old_nl->next = (uint32_t) ((void *) nl -
+							   (void *) old_nl);
+			disk = dm_disk(hc->md);
+			nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
+			nl->next = 0;
+			strcpy(nl->name, hc->name);
+
+			old_nl = nl;
+			nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
+		}
+	}
+
+ out:
+	up_write(&_hash_lock);
+	return 0;
+}
+
+static int check_name(const char *name)
+{
+	if (strchr(name, '/')) {
+		DMWARN("invalid device name");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Fills in a dm_ioctl structure, ready for sending back to
+ * userland.
+ */
+static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
+{
+	struct gendisk *disk = dm_disk(md);
+	struct dm_table *table;
+	struct block_device *bdev;
+
+	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
+			  DM_ACTIVE_PRESENT_FLAG);
+
+	if (dm_suspended(md))
+		param->flags |= DM_SUSPEND_FLAG;
+
+	bdev = bdget_disk(disk, 0);
+	if (!bdev)
+		return -ENXIO;
+
+	param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
+
+	/*
+	 * Yes, this will be out of date by the time it gets back
+	 * to userland, but it is still very useful ofr
+	 * debugging.
+	 */
+	param->open_count = bdev->bd_openers;
+	bdput(bdev);
+
+	if (disk->policy)
+		param->flags |= DM_READONLY_FLAG;
+
+	param->event_nr = dm_get_event_nr(md);
+
+	table = dm_get_table(md);
+	if (table) {
+		param->flags |= DM_ACTIVE_PRESENT_FLAG;
+		param->target_count = dm_table_get_num_targets(table);
+		dm_table_put(table);
+	} else
+		param->target_count = 0;
+
+	return 0;
+}
+
+static int dev_create(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+
+	r = check_name(param->name);
+	if (r)
+		return r;
+
+	if (param->flags & DM_PERSISTENT_DEV_FLAG)
+		r = dm_create_with_minor(MINOR(huge_decode_dev(param->dev)), &md);
+	else
+		r = dm_create(&md);
+
+	if (r)
+		return r;
+
+	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
+	if (r) {
+		dm_put(md);
+		return r;
+	}
+
+	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+	r = __dev_status(md, param);
+	dm_put(md);
+
+	return r;
+}
+
+/*
+ * Always use UUID for lookups if it's present, otherwise use name.
+ */
+static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
+{
+	return *param->uuid ?
+	    __get_uuid_cell(param->uuid) : __get_name_cell(param->name);
+}
+
+static inline struct mapped_device *find_device(struct dm_ioctl *param)
+{
+	struct hash_cell *hc;
+	struct mapped_device *md = NULL;
+
+	down_read(&_hash_lock);
+	hc = __find_device_hash_cell(param);
+	if (hc) {
+		md = hc->md;
+
+		/*
+		 * Sneakily write in both the name and the uuid
+		 * while we have the cell.
+		 */
+		strncpy(param->name, hc->name, sizeof(param->name));
+		if (hc->uuid)
+			strncpy(param->uuid, hc->uuid, sizeof(param->uuid)-1);
+		else
+			param->uuid[0] = '\0';
+
+		if (hc->new_map)
+			param->flags |= DM_INACTIVE_PRESENT_FLAG;
+		else
+			param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+		dm_get(md);
+	}
+	up_read(&_hash_lock);
+
+	return md;
+}
+
+static int dev_remove(struct dm_ioctl *param, size_t param_size)
+{
+	struct hash_cell *hc;
+
+	down_write(&_hash_lock);
+	hc = __find_device_hash_cell(param);
+
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	__hash_remove(hc);
+	up_write(&_hash_lock);
+	param->data_size = 0;
+	return 0;
+}
+
+/*
+ * Check a string doesn't overrun the chunk of
+ * memory we copied from userland.
+ */
+static int invalid_str(char *str, void *end)
+{
+	while ((void *) str < end)
+		if (!*str++)
+			return 0;
+
+	return -EINVAL;
+}
+
+static int dev_rename(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	char *new_name = (char *) param + param->data_start;
+
+	if (new_name < (char *) (param + 1) ||
+	    invalid_str(new_name, (void *) param + param_size)) {
+		DMWARN("Invalid new logical volume name supplied.");
+		return -EINVAL;
+	}
+
+	r = check_name(new_name);
+	if (r)
+		return r;
+
+	param->data_size = 0;
+	return dm_hash_rename(param->name, new_name);
+}
+
+static int do_suspend(struct dm_ioctl *param)
+{
+	int r = 0;
+	struct mapped_device *md;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	if (!dm_suspended(md))
+		r = dm_suspend(md);
+
+	if (!r)
+		r = __dev_status(md, param);
+
+	dm_put(md);
+	return r;
+}
+
+static int do_resume(struct dm_ioctl *param)
+{
+	int r = 0;
+	struct hash_cell *hc;
+	struct mapped_device *md;
+	struct dm_table *new_map;
+
+	down_write(&_hash_lock);
+
+	hc = __find_device_hash_cell(param);
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	md = hc->md;
+	dm_get(md);
+
+	new_map = hc->new_map;
+	hc->new_map = NULL;
+	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+	up_write(&_hash_lock);
+
+	/* Do we need to load a new map ? */
+	if (new_map) {
+		/* Suspend if it isn't already suspended */
+		if (!dm_suspended(md))
+			dm_suspend(md);
+
+		r = dm_swap_table(md, new_map);
+		if (r) {
+			dm_put(md);
+			dm_table_put(new_map);
+			return r;
+		}
+
+		if (dm_table_get_mode(new_map) & FMODE_WRITE)
+			set_disk_ro(dm_disk(md), 0);
+		else
+			set_disk_ro(dm_disk(md), 1);
+
+		dm_table_put(new_map);
+	}
+
+	if (dm_suspended(md))
+		r = dm_resume(md);
+
+	if (!r)
+		r = __dev_status(md, param);
+
+	dm_put(md);
+	return r;
+}
+
+/*
+ * Set or unset the suspension state of a device.
+ * If the device already is in the requested state we just return its status.
+ */
+static int dev_suspend(struct dm_ioctl *param, size_t param_size)
+{
+	if (param->flags & DM_SUSPEND_FLAG)
+		return do_suspend(param);
+
+	return do_resume(param);
+}
+
+/*
+ * Copies device info back to user space, used by
+ * the create and info ioctls.
+ */
+static int dev_status(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	r = __dev_status(md, param);
+	dm_put(md);
+	return r;
+}
+
+/*
+ * Build up the status struct for each target
+ */
+static void retrieve_status(struct dm_table *table,
+			    struct dm_ioctl *param, size_t param_size)
+{
+	unsigned int i, num_targets;
+	struct dm_target_spec *spec;
+	char *outbuf, *outptr;
+	status_type_t type;
+	size_t remaining, len, used = 0;
+
+	outptr = outbuf = get_result_buffer(param, param_size, &len);
+
+	if (param->flags & DM_STATUS_TABLE_FLAG)
+		type = STATUSTYPE_TABLE;
+	else
+		type = STATUSTYPE_INFO;
+
+	/* Get all the target info */
+	num_targets = dm_table_get_num_targets(table);
+	for (i = 0; i < num_targets; i++) {
+		struct dm_target *ti = dm_table_get_target(table, i);
+
+		remaining = len - (outptr - outbuf);
+		if (remaining < sizeof(struct dm_target_spec)) {
+			param->flags |= DM_BUFFER_FULL_FLAG;
+			break;
+		}
+
+		spec = (struct dm_target_spec *) outptr;
+
+		spec->status = 0;
+		spec->sector_start = ti->begin;
+		spec->length = ti->len;
+		strncpy(spec->target_type, ti->type->name,
+			sizeof(spec->target_type));
+
+		outptr += sizeof(struct dm_target_spec);
+		remaining = len - (outptr - outbuf);
+
+		/* Get the status/table string from the target driver */
+		if (ti->type->status) {
+			if (ti->type->status(ti, type, outptr, remaining)) {
+				param->flags |= DM_BUFFER_FULL_FLAG;
+				break;
+			}
+		} else
+			outptr[0] = '\0';
+
+		outptr += strlen(outptr) + 1;
+		used = param->data_start + (outptr - outbuf);
+
+		align_ptr(outptr);
+		spec->next = outptr - outbuf;
+	}
+
+	if (used)
+		param->data_size = used;
+
+	param->target_count = num_targets;
+}
+
+/*
+ * Wait for a device to report an event
+ */
+static int dev_wait(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+	struct dm_table *table;
+	DECLARE_WAITQUEUE(wq, current);
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	/*
+	 * Wait for a notification event
+	 */
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (!dm_add_wait_queue(md, &wq, param->event_nr)) {
+		schedule();
+		dm_remove_wait_queue(md, &wq);
+	}
+ 	set_current_state(TASK_RUNNING);
+
+	/*
+	 * The userland program is going to want to know what
+	 * changed to trigger the event, so we may as well tell
+	 * him and save an ioctl.
+	 */
+	r = __dev_status(md, param);
+	if (r)
+		goto out;
+
+	table = dm_get_table(md);
+	if (table) {
+		retrieve_status(table, param, param_size);
+		dm_table_put(table);
+	}
+
+ out:
+	dm_put(md);
+	return r;
+}
+
+static inline int get_mode(struct dm_ioctl *param)
+{
+	int mode = FMODE_READ | FMODE_WRITE;
+
+	if (param->flags & DM_READONLY_FLAG)
+		mode = FMODE_READ;
+
+	return mode;
+}
+
+static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
+		       struct dm_target_spec **spec, char **target_params)
+{
+	*spec = (struct dm_target_spec *) ((unsigned char *) last + next);
+	*target_params = (char *) (*spec + 1);
+
+	if (*spec < (last + 1))
+		return -EINVAL;
+
+	return invalid_str(*target_params, end);
+}
+
+static int populate_table(struct dm_table *table,
+			  struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	unsigned int i = 0;
+	struct dm_target_spec *spec = (struct dm_target_spec *) param;
+	uint32_t next = param->data_start;
+	void *end = (void *) param + param_size;
+	char *target_params;
+
+	if (!param->target_count) {
+		DMWARN("populate_table: no targets specified");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < param->target_count; i++) {
+
+		r = next_target(spec, next, end, &spec, &target_params);
+		if (r) {
+			DMWARN("unable to find target");
+			return r;
+		}
+
+		r = dm_table_add_target(table, spec->target_type,
+					(sector_t) spec->sector_start,
+					(sector_t) spec->length,
+					target_params);
+		if (r) {
+			DMWARN("error adding target to table");
+			return r;
+		}
+
+		next = spec->next;
+	}
+
+	return dm_table_complete(table);
+}
+
+static int table_load(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct hash_cell *hc;
+	struct dm_table *t;
+
+	r = dm_table_create(&t, get_mode(param), param->target_count);
+	if (r)
+		return r;
+
+	r = populate_table(t, param, param_size);
+	if (r) {
+		dm_table_put(t);
+		return r;
+	}
+
+	down_write(&_hash_lock);
+	hc = __find_device_hash_cell(param);
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	if (hc->new_map)
+		dm_table_put(hc->new_map);
+	hc->new_map = t;
+	param->flags |= DM_INACTIVE_PRESENT_FLAG;
+
+	r = __dev_status(hc->md, param);
+	up_write(&_hash_lock);
+	return r;
+}
+
+static int table_clear(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct hash_cell *hc;
+
+	down_write(&_hash_lock);
+
+	hc = __find_device_hash_cell(param);
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	if (hc->new_map) {
+		dm_table_put(hc->new_map);
+		hc->new_map = NULL;
+	}
+
+	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+	r = __dev_status(hc->md, param);
+	up_write(&_hash_lock);
+	return r;
+}
+
+/*
+ * Retrieves a list of devices used by a particular dm device.
+ */
+static void retrieve_deps(struct dm_table *table,
+			  struct dm_ioctl *param, size_t param_size)
+{
+	unsigned int count = 0;
+	struct list_head *tmp;
+	size_t len, needed;
+	struct dm_target_deps *deps;
+
+	deps = get_result_buffer(param, param_size, &len);
+
+	/*
+	 * Count the devices.
+	 */
+	list_for_each(tmp, dm_table_get_devices(table))
+		count++;
+
+	/*
+	 * Check we have enough space.
+	 */
+	needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
+	if (len < needed) {
+		param->flags |= DM_BUFFER_FULL_FLAG;
+		return;
+	}
+
+	/*
+	 * Fill in the devices.
+	 */
+	deps->count = count;
+	count = 0;
+	list_for_each(tmp, dm_table_get_devices(table)) {
+		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+		deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev);
+	}
+
+	param->data_size = param->data_start + needed;
+}
+
+static int table_deps(struct dm_ioctl *param, size_t param_size)
+{
+	int r = 0;
+	struct mapped_device *md;
+	struct dm_table *table;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	r = __dev_status(md, param);
+	if (r)
+		goto out;
+
+	table = dm_get_table(md);
+	if (table) {
+		retrieve_deps(table, param, param_size);
+		dm_table_put(table);
+	}
+
+ out:
+	dm_put(md);
+	return r;
+}
+
+/*
+ * Return the status of a device as a text string for each
+ * target.
+ */
+static int table_status(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+	struct dm_table *table;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	r = __dev_status(md, param);
+	if (r)
+		goto out;
+
+	table = dm_get_table(md);
+	if (table) {
+		retrieve_status(table, param, param_size);
+		dm_table_put(table);
+	}
+
+ out:
+	dm_put(md);
+	return r;
+}
+
+/*-----------------------------------------------------------------
+ * Implementation of open/close/ioctl on the special char
+ * device.
+ *---------------------------------------------------------------*/
+static ioctl_fn lookup_ioctl(unsigned int cmd)
+{
+	static struct {
+		int cmd;
+		ioctl_fn fn;
+	} _ioctls[] = {
+		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
+		{DM_REMOVE_ALL_CMD, remove_all},
+		{DM_LIST_DEVICES_CMD, list_devices},
+
+		{DM_DEV_CREATE_CMD, dev_create},
+		{DM_DEV_REMOVE_CMD, dev_remove},
+		{DM_DEV_RENAME_CMD, dev_rename},
+		{DM_DEV_SUSPEND_CMD, dev_suspend},
+		{DM_DEV_STATUS_CMD, dev_status},
+		{DM_DEV_WAIT_CMD, dev_wait},
+
+		{DM_TABLE_LOAD_CMD, table_load},
+		{DM_TABLE_CLEAR_CMD, table_clear},
+		{DM_TABLE_DEPS_CMD, table_deps},
+		{DM_TABLE_STATUS_CMD, table_status}
+	};
+
+	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
+}
+
+/*
+ * As well as checking the version compatibility this always
+ * copies the kernel interface version out.
+ */
+static int check_version(unsigned int cmd, struct dm_ioctl *user)
+{
+	uint32_t version[3];
+	int r = 0;
+
+	if (copy_from_user(version, user->version, sizeof(version)))
+		return -EFAULT;
+
+	if ((DM_VERSION_MAJOR != version[0]) ||
+	    (DM_VERSION_MINOR < version[1])) {
+		DMWARN("ioctl interface mismatch: "
+		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
+		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
+		       DM_VERSION_PATCHLEVEL,
+		       version[0], version[1], version[2], cmd);
+		r = -EINVAL;
+	}
+
+	/*
+	 * Fill in the kernel version.
+	 */
+	version[0] = DM_VERSION_MAJOR;
+	version[1] = DM_VERSION_MINOR;
+	version[2] = DM_VERSION_PATCHLEVEL;
+	if (copy_to_user(user->version, version, sizeof(version)))
+		return -EFAULT;
+
+	return r;
+}
+
+static void free_params(struct dm_ioctl *param)
+{
+	vfree(param);
+}
+
+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
+{
+	struct dm_ioctl tmp, *dmi;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
+		return -EFAULT;
+
+	if (tmp.data_size < sizeof(tmp))
+		return -EINVAL;
+
+	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+	if (!dmi)
+		return -ENOMEM;
+
+	if (copy_from_user(dmi, user, tmp.data_size)) {
+		vfree(dmi);
+		return -EFAULT;
+	}
+
+	*param = dmi;
+	return 0;
+}
+
+static int validate_params(uint cmd, struct dm_ioctl *param)
+{
+	/* Always clear this flag */
+	param->flags &= ~DM_BUFFER_FULL_FLAG;
+
+	/* Ignores parameters */
+	if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
+		return 0;
+
+	/* Unless creating, either name or uuid but not both */
+	if (cmd != DM_DEV_CREATE_CMD) {
+		if ((!*param->uuid && !*param->name) ||
+		    (*param->uuid && *param->name)) {
+			DMWARN("one of name or uuid must be supplied, cmd(%u)",
+			       cmd);
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure strings are terminated */
+	param->name[DM_NAME_LEN - 1] = '\0';
+	param->uuid[DM_UUID_LEN - 1] = '\0';
+
+	return 0;
+}
+
+static int ctl_ioctl(struct inode *inode, struct file *file,
+		     uint command, ulong u)
+{
+	int r = 0;
+	unsigned int cmd;
+	struct dm_ioctl *param;
+	struct dm_ioctl *user = (struct dm_ioctl *) u;
+	ioctl_fn fn = NULL;
+	size_t param_size;
+
+	/* only root can play with this */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (_IOC_TYPE(command) != DM_IOCTL)
+		return -ENOTTY;
+
+	cmd = _IOC_NR(command);
+
+	/*
+	 * Check the interface version passed in.  This also
+	 * writes out the kernel's interface version.
+	 */
+	r = check_version(cmd, user);
+	if (r)
+		return r;
+
+	/*
+	 * Nothing more to do for the version command.
+	 */
+	if (cmd == DM_VERSION_CMD)
+		return 0;
+
+	fn = lookup_ioctl(cmd);
+	if (!fn) {
+		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
+		return -ENOTTY;
+	}
+
+	/*
+	 * Trying to avoid low memory issues when a device is
+	 * suspended.
+	 */
+	current->flags |= PF_MEMALLOC;
+
+	/*
+	 * Copy the parameters into kernel space.
+	 */
+	r = copy_params(user, &param);
+	if (r) {
+		current->flags &= ~PF_MEMALLOC;
+		return r;
+	}
+
+	/*
+	 * FIXME: eventually we will remove the PF_MEMALLOC flag
+	 * here.  However the tools still do nasty things like
+	 * 'load' while a device is suspended.
+	 */
+
+	r = validate_params(cmd, param);
+	if (r)
+		goto out;
+
+	param_size = param->data_size;
+	param->data_size = sizeof(*param);
+	r = fn(param, param_size);
+
+	/*
+	 * Copy the results back to userland.
+	 */
+	if (!r && copy_to_user(user, param, param->data_size))
+		r = -EFAULT;
+
+ out:
+	free_params(param);
+	current->flags &= ~PF_MEMALLOC;
+	return r;
+}
+
+static struct file_operations _ctl_fops = {
+	.ioctl	 = ctl_ioctl,
+	.owner	 = THIS_MODULE,
+};
+
+static struct miscdevice _dm_misc = {
+	.minor 		= MISC_DYNAMIC_MINOR,
+	.name  		= DM_NAME,
+	.devfs_name 	= "mapper/control",
+	.fops  		= &_ctl_fops
+};
+
+/*
+ * Create misc character device and link to DM_DIR/control.
+ */
+int __init dm_interface_init(void)
+{
+	int r;
+
+	r = dm_hash_init();
+	if (r)
+		return r;
+
+	r = misc_register(&_dm_misc);
+	if (r) {
+		DMERR("misc_register failed for control device");
+		dm_hash_exit();
+		return r;
+	}
+
+	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
+	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
+	       DM_DRIVER_EMAIL);
+	return 0;
+}
+
+void dm_interface_exit(void)
+{
+	if (misc_deregister(&_dm_misc) < 0)
+		DMERR("misc_deregister failed for control device");
+
+	dm_hash_exit();
+}
--- diff/include/linux/dm-ioctl.h	2003-08-20 14:16:14.000000000 +0100
+++ source/include/linux/dm-ioctl.h	2003-08-20 14:16:15.000000000 +0100
@@ -1,18 +1,237 @@
 /*
- * Copyright (C) 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
  *
  * This file is released under the LGPL.
  */
 
-#ifndef _LINUX_DM_IOCTL_H
-#define _LINUX_DM_IOCTL_H
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
 
-#include <linux/config.h>
+#include <linux/types.h>
 
-#ifdef CONFIG_DM_IOCTL_V4
-#include "dm-ioctl-v4.h"
-#else
-#include "dm-ioctl-v1.h"
-#endif
+#define DM_DIR "mapper"		/* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
 
-#endif
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables.  Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled.  The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed.  Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device.  If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_.  Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device.  This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device.  The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start.  If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+	/*
+	 * The version number is made up of three parts:
+	 * major - no backward or forward compatibility,
+	 * minor - only backwards compatible,
+	 * patch - both backwards and forwards compatible.
+	 *
+	 * All clients of the ioctl interface should fill in the
+	 * version number of the interface that they were
+	 * compiled with.
+	 *
+	 * All recognised ioctl commands (ie. those that don't
+	 * return -ENOTTY) fill out this field, even if the
+	 * command failed.
+	 */
+	uint32_t version[3];	/* in/out */
+	uint32_t data_size;	/* total size of data passed in
+				 * including this struct */
+
+	uint32_t data_start;	/* offset to start of data
+				 * relative to start of this struct */
+
+	uint32_t target_count;	/* in/out */
+	int32_t open_count;	/* out */
+	uint32_t flags;		/* in/out */
+	uint32_t event_nr;      	/* in/out */
+	uint32_t padding;
+
+	uint64_t dev;		/* in/out */
+
+	char name[DM_NAME_LEN];	/* device name */
+	char uuid[DM_UUID_LEN];	/* unique identifier for
+				 * the block device */
+};
+
+/*
+ * Used to specify tables.  These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+	uint64_t sector_start;
+	uint64_t length;
+	int32_t status;		/* used when reading from kernel only */
+
+	/*
+	 * Offset in bytes (from the start of this struct) to
+	 * next target_spec.
+	 */
+	uint32_t next;
+
+	char target_type[DM_MAX_TYPE_NAME];
+
+	/*
+	 * Parameter string starts immediately after this object.
+	 * Be careful to add padding after string to ensure correct
+	 * alignment of subsequent dm_target_spec.
+	 */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+	uint32_t count;	/* Array size */
+	uint32_t padding;	/* unused */
+	uint64_t dev[0];	/* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+	uint64_t dev;
+	uint32_t next;		/* offset to the next record from
+				   the _start_ of this */
+	char name[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+	/* Top level cmds */
+	DM_VERSION_CMD = 0,
+	DM_REMOVE_ALL_CMD,
+	DM_LIST_DEVICES_CMD,
+
+	/* device level cmds */
+	DM_DEV_CREATE_CMD,
+	DM_DEV_REMOVE_CMD,
+	DM_DEV_RENAME_CMD,
+	DM_DEV_SUSPEND_CMD,
+	DM_DEV_STATUS_CMD,
+	DM_DEV_WAIT_CMD,
+
+	/* Table level cmds */
+	DM_TABLE_LOAD_CMD,
+	DM_TABLE_CLEAR_CMD,
+	DM_TABLE_DEPS_CMD,
+	DM_TABLE_STATUS_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR	4
+#define DM_VERSION_MINOR	0
+#define DM_VERSION_PATCHLEVEL	0
+#define DM_VERSION_EXTRA	"-ioctl (2003-06-04)"
+
+/* Status bits */
+#define DM_READONLY_FLAG	(1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG		(1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG	(1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG	(1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG   (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG	(1 << 8) /* Out */
+
+#endif				/* _LINUX_DM_IOCTL_H */
--- diff/drivers/md/dm-ioctl-v1.c	2004-01-19 10:22:56.000000000 +0000
+++ source/drivers/md/dm-ioctl-v1.c	1970-01-01 01:00:00.000000000 +0100
@@ -1,1159 +0,0 @@
-/*
- * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/dm-ioctl.h>
-#include <linux/init.h>
-#include <linux/wait.h>
-#include <linux/slab.h>
-#include <linux/devfs_fs_kernel.h>
-
-#include <asm/uaccess.h>
-
-#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
-
-/*-----------------------------------------------------------------
- * The ioctl interface needs to be able to look up devices by
- * name or uuid.
- *---------------------------------------------------------------*/
-struct hash_cell {
-	struct list_head name_list;
-	struct list_head uuid_list;
-
-	char *name;
-	char *uuid;
-	struct mapped_device *md;
-};
-
-#define NUM_BUCKETS 64
-#define MASK_BUCKETS (NUM_BUCKETS - 1)
-static struct list_head _name_buckets[NUM_BUCKETS];
-static struct list_head _uuid_buckets[NUM_BUCKETS];
-
-void dm_hash_remove_all(void);
-
-/*
- * Guards access to all three tables.
- */
-static DECLARE_RWSEM(_hash_lock);
-
-static void init_buckets(struct list_head *buckets)
-{
-	unsigned int i;
-
-	for (i = 0; i < NUM_BUCKETS; i++)
-		INIT_LIST_HEAD(buckets + i);
-}
-
-int dm_hash_init(void)
-{
-	init_buckets(_name_buckets);
-	init_buckets(_uuid_buckets);
-	devfs_mk_dir(DM_DIR);
-	return 0;
-}
-
-void dm_hash_exit(void)
-{
-	dm_hash_remove_all();
-	devfs_remove(DM_DIR);
-}
-
-/*-----------------------------------------------------------------
- * Hash function:
- * We're not really concerned with the str hash function being
- * fast since it's only used by the ioctl interface.
- *---------------------------------------------------------------*/
-static unsigned int hash_str(const char *str)
-{
-	const unsigned int hash_mult = 2654435387U;
-	unsigned int h = 0;
-
-	while (*str)
-		h = (h + (unsigned int) *str++) * hash_mult;
-
-	return h & MASK_BUCKETS;
-}
-
-/*-----------------------------------------------------------------
- * Code for looking up a device by name
- *---------------------------------------------------------------*/
-static struct hash_cell *__get_name_cell(const char *str)
-{
-	struct list_head *tmp;
-	struct hash_cell *hc;
-	unsigned int h = hash_str(str);
-
-	list_for_each (tmp, _name_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, name_list);
-		if (!strcmp(hc->name, str))
-			return hc;
-	}
-
-	return NULL;
-}
-
-static struct hash_cell *__get_uuid_cell(const char *str)
-{
-	struct list_head *tmp;
-	struct hash_cell *hc;
-	unsigned int h = hash_str(str);
-
-	list_for_each (tmp, _uuid_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, uuid_list);
-		if (!strcmp(hc->uuid, str))
-			return hc;
-	}
-
-	return NULL;
-}
-
-/*-----------------------------------------------------------------
- * Inserting, removing and renaming a device.
- *---------------------------------------------------------------*/
-static inline char *kstrdup(const char *str)
-{
-	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
-	if (r)
-		strcpy(r, str);
-	return r;
-}
-
-static struct hash_cell *alloc_cell(const char *name, const char *uuid,
-				    struct mapped_device *md)
-{
-	struct hash_cell *hc;
-
-	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
-	if (!hc)
-		return NULL;
-
-	hc->name = kstrdup(name);
-	if (!hc->name) {
-		kfree(hc);
-		return NULL;
-	}
-
-	if (!uuid)
-		hc->uuid = NULL;
-
-	else {
-		hc->uuid = kstrdup(uuid);
-		if (!hc->uuid) {
-			kfree(hc->name);
-			kfree(hc);
-			return NULL;
-		}
-	}
-
-	INIT_LIST_HEAD(&hc->name_list);
-	INIT_LIST_HEAD(&hc->uuid_list);
-	hc->md = md;
-	return hc;
-}
-
-static void free_cell(struct hash_cell *hc)
-{
-	if (hc) {
-		kfree(hc->name);
-		kfree(hc->uuid);
-		kfree(hc);
-	}
-}
-
-/*
- * devfs stuff.
- */
-static int register_with_devfs(struct hash_cell *hc)
-{
-	struct gendisk *disk = dm_disk(hc->md);
-
-	devfs_mk_bdev(MKDEV(disk->major, disk->first_minor),
-		       S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
-		       DM_DIR "/%s", hc->name);
-	return 0;
-}
-
-static int unregister_with_devfs(struct hash_cell *hc)
-{
-	devfs_remove(DM_DIR"/%s", hc->name);
-	return 0;
-}
-
-/*
- * The kdev_t and uuid of a device can never change once it is
- * initially inserted.
- */
-int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
-{
-	struct hash_cell *cell;
-
-	/*
-	 * Allocate the new cells.
-	 */
-	cell = alloc_cell(name, uuid, md);
-	if (!cell)
-		return -ENOMEM;
-
-	/*
-	 * Insert the cell into all three hash tables.
-	 */
-	down_write(&_hash_lock);
-	if (__get_name_cell(name))
-		goto bad;
-
-	list_add(&cell->name_list, _name_buckets + hash_str(name));
-
-	if (uuid) {
-		if (__get_uuid_cell(uuid)) {
-			list_del(&cell->name_list);
-			goto bad;
-		}
-		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
-	}
-	register_with_devfs(cell);
-	dm_get(md);
-	up_write(&_hash_lock);
-
-	return 0;
-
- bad:
-	up_write(&_hash_lock);
-	free_cell(cell);
-	return -EBUSY;
-}
-
-void __hash_remove(struct hash_cell *hc)
-{
-	/* remove from the dev hash */
-	list_del(&hc->uuid_list);
-	list_del(&hc->name_list);
-	unregister_with_devfs(hc);
-	dm_put(hc->md);
-	free_cell(hc);
-}
-
-void dm_hash_remove_all(void)
-{
-	int i;
-	struct hash_cell *hc;
-	struct list_head *tmp, *n;
-
-	down_write(&_hash_lock);
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_safe (tmp, n, _name_buckets + i) {
-			hc = list_entry(tmp, struct hash_cell, name_list);
-			__hash_remove(hc);
-		}
-	}
-	up_write(&_hash_lock);
-}
-
-int dm_hash_rename(const char *old, const char *new)
-{
-	char *new_name, *old_name;
-	struct hash_cell *hc;
-
-	/*
-	 * duplicate new.
-	 */
-	new_name = kstrdup(new);
-	if (!new_name)
-		return -ENOMEM;
-
-	down_write(&_hash_lock);
-
-	/*
-	 * Is new free ?
-	 */
-	hc = __get_name_cell(new);
-	if (hc) {
-		DMWARN("asked to rename to an already existing name %s -> %s",
-		       old, new);
-		up_write(&_hash_lock);
-		kfree(new_name);
-		return -EBUSY;
-	}
-
-	/*
-	 * Is there such a device as 'old' ?
-	 */
-	hc = __get_name_cell(old);
-	if (!hc) {
-		DMWARN("asked to rename a non existent device %s -> %s",
-		       old, new);
-		up_write(&_hash_lock);
-		kfree(new_name);
-		return -ENXIO;
-	}
-
-	/*
-	 * rename and move the name cell.
-	 */
-	unregister_with_devfs(hc);
-
-	list_del(&hc->name_list);
-	old_name = hc->name;
-	hc->name = new_name;
-	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
-
-	/* rename the device node in devfs */
-	register_with_devfs(hc);
-
-	up_write(&_hash_lock);
-	kfree(old_name);
-	return 0;
-}
-
-
-/*-----------------------------------------------------------------
- * Implementation of the ioctl commands
- *---------------------------------------------------------------*/
-
-/*
- * All the ioctl commands get dispatched to functions with this
- * prototype.
- */
-typedef int (*ioctl_fn)(struct dm_ioctl *param, struct dm_ioctl *user);
-
-/*
- * Check a string doesn't overrun the chunk of
- * memory we copied from userland.
- */
-static int valid_str(char *str, void *begin, void *end)
-{
-	while (((void *) str >= begin) && ((void *) str < end))
-		if (!*str++)
-			return 0;
-
-	return -EINVAL;
-}
-
-static int next_target(struct dm_target_spec *last, uint32_t next,
-		       void *begin, void *end,
-		       struct dm_target_spec **spec, char **params)
-{
-	*spec = (struct dm_target_spec *)
-	    ((unsigned char *) last + next);
-	*params = (char *) (*spec + 1);
-
-	if (*spec < (last + 1) || ((void *) *spec > end))
-		return -EINVAL;
-
-	return valid_str(*params, begin, end);
-}
-
-static int populate_table(struct dm_table *table, struct dm_ioctl *args)
-{
-	int r, first = 1;
-	unsigned int i = 0;
-	struct dm_target_spec *spec;
-	char *params;
-	void *begin, *end;
-
-	if (!args->target_count) {
-		DMWARN("populate_table: no targets specified");
-		return -EINVAL;
-	}
-
-	begin = (void *) args;
-	end = begin + args->data_size;
-
-	for (i = 0; i < args->target_count; i++) {
-
-		if (first)
-			r = next_target((struct dm_target_spec *) args,
-					args->data_start,
-					begin, end, &spec, &params);
-		else
-			r = next_target(spec, spec->next, begin, end,
-					&spec, &params);
-
-		if (r) {
-			DMWARN("unable to find target");
-			return -EINVAL;
-		}
-
-		r = dm_table_add_target(table, spec->target_type,
-					(sector_t) spec->sector_start,
-					(sector_t) spec->length,
-					params);
-		if (r) {
-			DMWARN("internal error adding target to table");
-			return -EINVAL;
-		}
-
-		first = 0;
-	}
-
-	return dm_table_complete(table);
-}
-
-/*
- * Round up the ptr to the next 'align' boundary.  Obviously
- * 'align' must be a power of 2.
- */
-static inline void *align_ptr(void *ptr, unsigned int align)
-{
-	align--;
-	return (void *) (((unsigned long) (ptr + align)) & ~align);
-}
-
-/*
- * Copies a dm_ioctl and an optional additional payload to
- * userland.
- */
-static int results_to_user(struct dm_ioctl *user, struct dm_ioctl *param,
-			   void *data, uint32_t len)
-{
-	int r;
-	void *ptr = NULL;
-
-	if (data) {
-		ptr = align_ptr(user + 1, sizeof(unsigned long));
-		param->data_start = ptr - (void *) user;
-	}
-
-	/*
-	 * The version number has already been filled in, so we
-	 * just copy later fields.
-	 */
-	r = copy_to_user(&user->data_size, &param->data_size,
-			 sizeof(*param) - sizeof(param->version));
-	if (r)
-		return -EFAULT;
-
-	if (data) {
-		if (param->data_start + len > param->data_size)
-			return -ENOSPC;
-
-		if (copy_to_user(ptr, data, len))
-			r = -EFAULT;
-	}
-
-	return r;
-}
-
-/*
- * Fills in a dm_ioctl structure, ready for sending back to
- * userland.
- */
-static int __info(struct mapped_device *md, struct dm_ioctl *param)
-{
-	struct dm_table *table;
-	struct block_device *bdev;
-	struct gendisk *disk = dm_disk(md);
-
-	param->flags = DM_EXISTS_FLAG;
-	if (dm_suspended(md))
-		param->flags |= DM_SUSPEND_FLAG;
-
-	bdev = bdget_disk(disk, 0);
-	if (!bdev)
-		return -ENXIO;
-
-	param->dev = old_encode_dev(bdev->bd_dev);
-	param->open_count = bdev->bd_openers;
-	bdput(bdev);
-
-	if (disk->policy)
-		param->flags |= DM_READONLY_FLAG;
-
-	table = dm_get_table(md);
-	param->target_count = dm_table_get_num_targets(table);
-	dm_table_put(table);
-
-	return 0;
-}
-
-/*
- * Always use UUID for lookups if it's present, otherwise use name.
- */
-static inline struct mapped_device *find_device(struct dm_ioctl *param)
-{
-	struct hash_cell *hc;
-	struct mapped_device *md = NULL;
-
-	down_read(&_hash_lock);
-	hc = *param->uuid ? __get_uuid_cell(param->uuid) :
-		__get_name_cell(param->name);
-	if (hc) {
-		md = hc->md;
-
-		/*
-		 * Sneakily write in both the name and the uuid
-		 * while we have the cell.
-		 */
-		strlcpy(param->name, hc->name, sizeof(param->name));
-		if (hc->uuid)
-			strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
-		else
-			param->uuid[0] = '\0';
-
-		dm_get(md);
-	}
-	up_read(&_hash_lock);
-
-	return md;
-}
-
-#define ALIGNMENT sizeof(int)
-static void *_align(void *ptr, unsigned int a)
-{
-	register unsigned long align = --a;
-
-	return (void *) (((unsigned long) ptr + align) & ~align);
-}
-
-/*
- * Copies device info back to user space, used by
- * the create and info ioctls.
- */
-static int info(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	struct mapped_device *md;
-
-	param->flags = 0;
-
-	md = find_device(param);
-	if (!md)
-		/*
-		 * Device not found - returns cleared exists flag.
-		 */
-		goto out;
-
-	__info(md, param);
-	dm_put(md);
-
-      out:
-	return results_to_user(user, param, NULL, 0);
-}
-
-static inline int get_mode(struct dm_ioctl *param)
-{
-	int mode = FMODE_READ | FMODE_WRITE;
-
-	if (param->flags & DM_READONLY_FLAG)
-		mode = FMODE_READ;
-
-	return mode;
-}
-
-static int check_name(const char *name)
-{
-	if (name[0] == '/') {
-		DMWARN("invalid device name");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int create(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	int r;
-	struct dm_table *t;
-	struct mapped_device *md;
-
-	r = check_name(param->name);
-	if (r)
-		return r;
-
-	r = dm_table_create(&t, get_mode(param), param->target_count);
-	if (r)
-		return r;
-
-	r = populate_table(t, param);
-	if (r) {
-		dm_table_put(t);
-		return r;
-	}
-
-	if (param->flags & DM_PERSISTENT_DEV_FLAG)
-		r = dm_create_with_minor(MINOR(old_decode_dev(param->dev)), &md);
-	else
-		r = dm_create(&md);
-
-	if (r) {
-		dm_table_put(t);
-		return r;
-	}
-
-	/* suspend the device */
-	r = dm_suspend(md);
-	if (r) {
-		DMWARN("suspend failed");
-		dm_table_put(t);
-		dm_put(md);
-		return r;
-	}
-	/* swap in the table */
-	r = dm_swap_table(md, t);
-	if (r) {
-		DMWARN("table swap failed");
-		dm_table_put(t);
-		dm_put(md);
-		return r;
-	}
-
-	/* resume the device */
-	r = dm_resume(md);
-	if (r) {
-		DMWARN("resume failed");
-		dm_table_put(t);
-		dm_put(md);
-		return r;
-	}
-
-	dm_table_put(t);	/* md will have grabbed its own reference */
-
-	set_disk_ro(dm_disk(md), (param->flags & DM_READONLY_FLAG) ? 1 : 0);
-	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
-	dm_put(md);
-
-	return r ? r : info(param, user);
-}
-
-/*
- * Build up the status struct for each target
- */
-static int __status(struct mapped_device *md, struct dm_ioctl *param,
-		    char *outbuf, size_t *len)
-{
-	unsigned int i, num_targets;
-	struct dm_target_spec *spec;
-	char *outptr;
-	status_type_t type;
-	struct dm_table *table = dm_get_table(md);
-
-	if (param->flags & DM_STATUS_TABLE_FLAG)
-		type = STATUSTYPE_TABLE;
-	else
-		type = STATUSTYPE_INFO;
-
-	outptr = outbuf;
-
-	/* Get all the target info */
-	num_targets = dm_table_get_num_targets(table);
-	for (i = 0; i < num_targets; i++) {
-		struct dm_target *ti = dm_table_get_target(table, i);
-
-		if (outptr - outbuf +
-		    sizeof(struct dm_target_spec) > param->data_size) {
-			dm_table_put(table);
-			return -ENOMEM;
-		}
-
-		spec = (struct dm_target_spec *) outptr;
-
-		spec->status = 0;
-		spec->sector_start = ti->begin;
-		spec->length = ti->len;
-		strlcpy(spec->target_type, ti->type->name,
-			sizeof(spec->target_type));
-
-		outptr += sizeof(struct dm_target_spec);
-
-		/* Get the status/table string from the target driver */
-		if (ti->type->status)
-			ti->type->status(ti, type, outptr,
-					 outbuf + param->data_size - outptr);
-		else
-			outptr[0] = '\0';
-
-		outptr += strlen(outptr) + 1;
-		_align(outptr, ALIGNMENT);
-		spec->next = outptr - outbuf;
-	}
-
-	param->target_count = num_targets;
-	*len = outptr - outbuf;
-	dm_table_put(table);
-
-	return 0;
-}
-
-/*
- * Return the status of a device as a text string for each
- * target.
- */
-static int get_status(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	struct mapped_device *md;
-	size_t len = 0;
-	int ret;
-	char *outbuf = NULL;
-
-	md = find_device(param);
-	if (!md)
-		/*
-		 * Device not found - returns cleared exists flag.
-		 */
-		goto out;
-
-	/* We haven't a clue how long the resultant data will be so
-	   just allocate as much as userland has allowed us and make sure
-	   we don't overun it */
-	outbuf = kmalloc(param->data_size, GFP_KERNEL);
-	if (!outbuf)
-		goto out;
-	/*
-	 * Get the status of all targets
-	 */
-	__status(md, param, outbuf, &len);
-
-	/*
-	 * Setup the basic dm_ioctl structure.
-	 */
-	__info(md, param);
-
-      out:
-	if (md)
-		dm_put(md);
-
-	ret = results_to_user(user, param, outbuf, len);
-
-	if (outbuf)
-		kfree(outbuf);
-
-	return ret;
-}
-
-/*
- * Wait for a device to report an event
- */
-static int wait_device_event(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	struct mapped_device *md;
-	DECLARE_WAITQUEUE(wq, current);
-
-	md = find_device(param);
-	if (!md)
-		/*
-		 * Device not found - returns cleared exists flag.
-		 */
-		goto out;
-
-	/*
-	 * Setup the basic dm_ioctl structure.
-	 */
-	__info(md, param);
-
-	/*
-	 * Wait for a notification event
-	 */
-	set_current_state(TASK_INTERRUPTIBLE);
- 	if (!dm_add_wait_queue(md, &wq, dm_get_event_nr(md))) {
- 		schedule();
- 		dm_remove_wait_queue(md, &wq);
- 	}
-  	set_current_state(TASK_RUNNING);
- 	dm_put(md);
-
-      out:
-	return results_to_user(user, param, NULL, 0);
-}
-
-/*
- * Retrieves a list of devices used by a particular dm device.
- */
-static int dep(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	int r;
-	unsigned int count;
-	struct mapped_device *md;
-	struct list_head *tmp;
-	size_t len = 0;
-	struct dm_target_deps *deps = NULL;
-	struct dm_table *table;
-
-	md = find_device(param);
-	if (!md)
-		goto out;
-	table = dm_get_table(md);
-
-	/*
-	 * Setup the basic dm_ioctl structure.
-	 */
-	__info(md, param);
-
-	/*
-	 * Count the devices.
-	 */
-	count = 0;
-	list_for_each(tmp, dm_table_get_devices(table))
-	    count++;
-
-	/*
-	 * Allocate a kernel space version of the dm_target_status
-	 * struct.
-	 */
-	if (array_too_big(sizeof(*deps), sizeof(*deps->dev), count)) {
-		dm_table_put(table);
-		dm_put(md);
-		return -ENOMEM;
-	}
-
-	len = sizeof(*deps) + (sizeof(*deps->dev) * count);
-	deps = kmalloc(len, GFP_KERNEL);
-	if (!deps) {
-		dm_table_put(table);
-		dm_put(md);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Fill in the devices.
-	 */
-	deps->count = count;
-	count = 0;
-	list_for_each(tmp, dm_table_get_devices(table)) {
-		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
-		deps->dev[count++] = old_encode_dev(dd->bdev->bd_dev);
-	}
-	dm_table_put(table);
-	dm_put(md);
-
-      out:
-	r = results_to_user(user, param, deps, len);
-
-	kfree(deps);
-	return r;
-}
-
-static int remove(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	struct hash_cell *hc;
-
-	down_write(&_hash_lock);
-	hc = *param->uuid ? __get_uuid_cell(param->uuid) :
-		__get_name_cell(param->name);
-	if (!hc) {
-		DMWARN("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -EINVAL;
-	}
-
-	/*
-	 * You may ask the interface to drop its reference to an
-	 * in use device.  This is no different to unlinking a
-	 * file that someone still has open.  The device will not
-	 * actually be destroyed until the last opener closes it.
-	 * The name and uuid of the device (both are interface
-	 * properties) will be available for reuse immediately.
-	 *
-	 * You don't want to drop a _suspended_ device from the
-	 * interface, since that will leave you with no way of
-	 * resuming it.
-	 */
-	if (dm_suspended(hc->md)) {
-		DMWARN("refusing to remove a suspended device.");
-		up_write(&_hash_lock);
-		return -EPERM;
-	}
-
-	__hash_remove(hc);
-	up_write(&_hash_lock);
-	return 0;
-}
-
-static int remove_all(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	dm_hash_remove_all();
-	return 0;
-}
-
-static int suspend(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	int r;
-	struct mapped_device *md;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	if (param->flags & DM_SUSPEND_FLAG)
-		r = dm_suspend(md);
-	else
-		r = dm_resume(md);
-
-	dm_put(md);
-	return r;
-}
-
-static int reload(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	int r;
-	struct mapped_device *md;
-	struct dm_table *t;
-
-	r = dm_table_create(&t, get_mode(param), param->target_count);
-	if (r)
-		return r;
-
-	r = populate_table(t, param);
-	if (r) {
-		dm_table_put(t);
-		return r;
-	}
-
-	md = find_device(param);
-	if (!md) {
-		dm_table_put(t);
-		return -ENXIO;
-	}
-
-	r = dm_swap_table(md, t);
-	if (r) {
-		dm_put(md);
-		dm_table_put(t);
-		return r;
-	}
-	dm_table_put(t);	/* md will have taken its own reference */
-
-	set_disk_ro(dm_disk(md), (param->flags & DM_READONLY_FLAG) ? 1 : 0);
-	dm_put(md);
-
-	r = info(param, user);
-	return r;
-}
-
-static int rename(struct dm_ioctl *param, struct dm_ioctl *user)
-{
-	int r;
-	char *new_name = (char *) param + param->data_start;
-
-	if (valid_str(new_name, (void *) param,
-		      (void *) param + param->data_size)) {
-		DMWARN("Invalid new logical volume name supplied.");
-		return -EINVAL;
-	}
-
-	r = check_name(new_name);
-	if (r)
-		return r;
-
-	return dm_hash_rename(param->name, new_name);
-}
-
-
-/*-----------------------------------------------------------------
- * Implementation of open/close/ioctl on the special char
- * device.
- *---------------------------------------------------------------*/
-static ioctl_fn lookup_ioctl(unsigned int cmd)
-{
-	static struct {
-		int cmd;
-		ioctl_fn fn;
-	} _ioctls[] = {
-		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
-		{DM_REMOVE_ALL_CMD, remove_all},
-		{DM_DEV_CREATE_CMD, create},
-		{DM_DEV_REMOVE_CMD, remove},
-		{DM_DEV_RELOAD_CMD, reload},
-		{DM_DEV_RENAME_CMD, rename},
-		{DM_DEV_SUSPEND_CMD, suspend},
-		{DM_DEV_DEPS_CMD, dep},
-		{DM_DEV_STATUS_CMD, info},
-		{DM_TARGET_STATUS_CMD, get_status},
-		{DM_TARGET_WAIT_CMD, wait_device_event},
-	};
-
-	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
-}
-
-/*
- * As well as checking the version compatibility this always
- * copies the kernel interface version out.
- */
-static int check_version(unsigned int cmd, struct dm_ioctl *user)
-{
-	uint32_t version[3];
-	int r = 0;
-
-	if (copy_from_user(version, user->version, sizeof(version)))
-		return -EFAULT;
-
-	if ((DM_VERSION_MAJOR != version[0]) ||
-	    (DM_VERSION_MINOR < version[1])) {
-		DMWARN("ioctl interface mismatch: "
-		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
-		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
-		       DM_VERSION_PATCHLEVEL,
-		       version[0], version[1], version[2], cmd);
-		r = -EINVAL;
-	}
-
-	/*
-	 * Fill in the kernel version.
-	 */
-	version[0] = DM_VERSION_MAJOR;
-	version[1] = DM_VERSION_MINOR;
-	version[2] = DM_VERSION_PATCHLEVEL;
-	if (copy_to_user(user->version, version, sizeof(version)))
-		return -EFAULT;
-
-	return r;
-}
-
-static void free_params(struct dm_ioctl *param)
-{
-	vfree(param);
-}
-
-static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
-{
-	struct dm_ioctl tmp, *dmi;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)))
-		return -EFAULT;
-
-	if (tmp.data_size < sizeof(tmp))
-		return -EINVAL;
-
-	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
-	if (!dmi)
-		return -ENOMEM;
-
-	if (copy_from_user(dmi, user, tmp.data_size)) {
-		vfree(dmi);
-		return -EFAULT;
-	}
-
-	*param = dmi;
-	return 0;
-}
-
-static int validate_params(uint cmd, struct dm_ioctl *param)
-{
-	/* Ignores parameters */
-	if (cmd == DM_REMOVE_ALL_CMD)
-		return 0;
-
-	/* Unless creating, either name of uuid but not both */
-	if (cmd != DM_DEV_CREATE_CMD) {
-		if ((!*param->uuid && !*param->name) ||
-		    (*param->uuid && *param->name)) {
-			DMWARN("one of name or uuid must be supplied");
-			return -EINVAL;
-		}
-	}
-
-	/* Ensure strings are terminated */
-	param->name[DM_NAME_LEN - 1] = '\0';
-	param->uuid[DM_UUID_LEN - 1] = '\0';
-
-	return 0;
-}
-
-static int ctl_ioctl(struct inode *inode, struct file *file,
-		     uint command, ulong u)
-{
-	int r = 0;
-	unsigned int cmd;
-	struct dm_ioctl *param;
-	struct dm_ioctl *user = (struct dm_ioctl *) u;
-	ioctl_fn fn = NULL;
-
-	/* only root can play with this */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (_IOC_TYPE(command) != DM_IOCTL)
-		return -ENOTTY;
-
-	cmd = _IOC_NR(command);
-
-	/*
-	 * Check the interface version passed in.  This also
-	 * writes out the kernels interface version.
-	 */
-	r = check_version(cmd, user);
-	if (r)
-		return r;
-
-	/*
-	 * Nothing more to do for the version command.
-	 */
-	if (cmd == DM_VERSION_CMD)
-		return 0;
-
-	fn = lookup_ioctl(cmd);
-	if (!fn) {
-		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
-		return -ENOTTY;
-	}
-
-	/*
-	 * Copy the parameters into kernel space.
-	 */
-	r = copy_params(user, &param);
-	if (r)
-		return r;
-
-	r = validate_params(cmd, param);
-	if (r) {
-		free_params(param);
-		return r;
-	}
-
-	r = fn(param, user);
-	free_params(param);
-	return r;
-}
-
-static struct file_operations _ctl_fops = {
-	.ioctl	 = ctl_ioctl,
-	.owner	 = THIS_MODULE,
-};
-
-static struct miscdevice _dm_misc = {
-	.minor		= MISC_DYNAMIC_MINOR,
-	.name		= DM_NAME,
-	.devfs_name	= "mapper/control",
-	.fops		= &_ctl_fops
-};
-
-/*
- * Create misc character device and link to DM_DIR/control.
- */
-int __init dm_interface_init(void)
-{
-	int r;
-
-	r = dm_hash_init();
-	if (r)
-		return r;
-
-	r = misc_register(&_dm_misc);
-	if (r) {
-		DMERR("misc_register failed for control device");
-		dm_hash_exit();
-		return r;
-	}
-
-	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
-	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
-	       DM_DRIVER_EMAIL);
-	return 0;
-
-	if (misc_deregister(&_dm_misc) < 0)
-		DMERR("misc_deregister failed for control device");
-	dm_hash_exit();
-	return r;
-}
-
-void dm_interface_exit(void)
-{
-	if (misc_deregister(&_dm_misc) < 0)
-		DMERR("misc_deregister failed for control device");
-	dm_hash_exit();
-}
--- diff/drivers/md/dm-ioctl-v4.c	2004-02-18 15:23:23.000000000 +0000
+++ source/drivers/md/dm-ioctl-v4.c	1970-01-01 01:00:00.000000000 +0100
@@ -1,1264 +0,0 @@
-/*
- * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/init.h>
-#include <linux/wait.h>
-#include <linux/slab.h>
-#include <linux/devfs_fs_kernel.h>
-#include <linux/dm-ioctl.h>
-
-#include <asm/uaccess.h>
-
-#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
-
-/*-----------------------------------------------------------------
- * The ioctl interface needs to be able to look up devices by
- * name or uuid.
- *---------------------------------------------------------------*/
-struct hash_cell {
-	struct list_head name_list;
-	struct list_head uuid_list;
-
-	char *name;
-	char *uuid;
-	struct mapped_device *md;
-	struct dm_table *new_map;
-};
-
-#define NUM_BUCKETS 64
-#define MASK_BUCKETS (NUM_BUCKETS - 1)
-static struct list_head _name_buckets[NUM_BUCKETS];
-static struct list_head _uuid_buckets[NUM_BUCKETS];
-
-void dm_hash_remove_all(void);
-
-/*
- * Guards access to both hash tables.
- */
-static DECLARE_RWSEM(_hash_lock);
-
-static void init_buckets(struct list_head *buckets)
-{
-	unsigned int i;
-
-	for (i = 0; i < NUM_BUCKETS; i++)
-		INIT_LIST_HEAD(buckets + i);
-}
-
-int dm_hash_init(void)
-{
-	init_buckets(_name_buckets);
-	init_buckets(_uuid_buckets);
-	devfs_mk_dir(DM_DIR);
-	return 0;
-}
-
-void dm_hash_exit(void)
-{
-	dm_hash_remove_all();
-	devfs_remove(DM_DIR);
-}
-
-/*-----------------------------------------------------------------
- * Hash function:
- * We're not really concerned with the str hash function being
- * fast since it's only used by the ioctl interface.
- *---------------------------------------------------------------*/
-static unsigned int hash_str(const char *str)
-{
-	const unsigned int hash_mult = 2654435387U;
-	unsigned int h = 0;
-
-	while (*str)
-		h = (h + (unsigned int) *str++) * hash_mult;
-
-	return h & MASK_BUCKETS;
-}
-
-/*-----------------------------------------------------------------
- * Code for looking up a device by name
- *---------------------------------------------------------------*/
-static struct hash_cell *__get_name_cell(const char *str)
-{
-	struct list_head *tmp;
-	struct hash_cell *hc;
-	unsigned int h = hash_str(str);
-
-	list_for_each (tmp, _name_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, name_list);
-		if (!strcmp(hc->name, str))
-			return hc;
-	}
-
-	return NULL;
-}
-
-static struct hash_cell *__get_uuid_cell(const char *str)
-{
-	struct list_head *tmp;
-	struct hash_cell *hc;
-	unsigned int h = hash_str(str);
-
-	list_for_each (tmp, _uuid_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, uuid_list);
-		if (!strcmp(hc->uuid, str))
-			return hc;
-	}
-
-	return NULL;
-}
-
-/*-----------------------------------------------------------------
- * Inserting, removing and renaming a device.
- *---------------------------------------------------------------*/
-static inline char *kstrdup(const char *str)
-{
-	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
-	if (r)
-		strcpy(r, str);
-	return r;
-}
-
-static struct hash_cell *alloc_cell(const char *name, const char *uuid,
-				    struct mapped_device *md)
-{
-	struct hash_cell *hc;
-
-	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
-	if (!hc)
-		return NULL;
-
-	hc->name = kstrdup(name);
-	if (!hc->name) {
-		kfree(hc);
-		return NULL;
-	}
-
-	if (!uuid)
-		hc->uuid = NULL;
-
-	else {
-		hc->uuid = kstrdup(uuid);
-		if (!hc->uuid) {
-			kfree(hc->name);
-			kfree(hc);
-			return NULL;
-		}
-	}
-
-	INIT_LIST_HEAD(&hc->name_list);
-	INIT_LIST_HEAD(&hc->uuid_list);
-	hc->md = md;
-	hc->new_map = NULL;
-	return hc;
-}
-
-static void free_cell(struct hash_cell *hc)
-{
-	if (hc) {
-		kfree(hc->name);
-		kfree(hc->uuid);
-		kfree(hc);
-	}
-}
-
-/*
- * devfs stuff.
- */
-static int register_with_devfs(struct hash_cell *hc)
-{
-	struct gendisk *disk = dm_disk(hc->md);
-
-	devfs_mk_bdev(MKDEV(disk->major, disk->first_minor),
-		      S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
-		      DM_DIR "/%s", hc->name);
-	return 0;
-}
-
-static int unregister_with_devfs(struct hash_cell *hc)
-{
-	devfs_remove(DM_DIR"/%s", hc->name);
-	return 0;
-}
-
-/*
- * The kdev_t and uuid of a device can never change once it is
- * initially inserted.
- */
-int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
-{
-	struct hash_cell *cell;
-
-	/*
-	 * Allocate the new cells.
-	 */
-	cell = alloc_cell(name, uuid, md);
-	if (!cell)
-		return -ENOMEM;
-
-	/*
-	 * Insert the cell into both hash tables.
-	 */
-	down_write(&_hash_lock);
-	if (__get_name_cell(name))
-		goto bad;
-
-	list_add(&cell->name_list, _name_buckets + hash_str(name));
-
-	if (uuid) {
-		if (__get_uuid_cell(uuid)) {
-			list_del(&cell->name_list);
-			goto bad;
-		}
-		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
-	}
-	register_with_devfs(cell);
-	dm_get(md);
-	up_write(&_hash_lock);
-
-	return 0;
-
- bad:
-	up_write(&_hash_lock);
-	free_cell(cell);
-	return -EBUSY;
-}
-
-void __hash_remove(struct hash_cell *hc)
-{
-	/* remove from the dev hash */
-	list_del(&hc->uuid_list);
-	list_del(&hc->name_list);
-	unregister_with_devfs(hc);
-	dm_put(hc->md);
-	if (hc->new_map)
-		dm_table_put(hc->new_map);
-	free_cell(hc);
-}
-
-void dm_hash_remove_all(void)
-{
-	int i;
-	struct hash_cell *hc;
-	struct list_head *tmp, *n;
-
-	down_write(&_hash_lock);
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_safe (tmp, n, _name_buckets + i) {
-			hc = list_entry(tmp, struct hash_cell, name_list);
-			__hash_remove(hc);
-		}
-	}
-	up_write(&_hash_lock);
-}
-
-int dm_hash_rename(const char *old, const char *new)
-{
-	char *new_name, *old_name;
-	struct hash_cell *hc;
-
-	/*
-	 * duplicate new.
-	 */
-	new_name = kstrdup(new);
-	if (!new_name)
-		return -ENOMEM;
-
-	down_write(&_hash_lock);
-
-	/*
-	 * Is new free ?
-	 */
-	hc = __get_name_cell(new);
-	if (hc) {
-		DMWARN("asked to rename to an already existing name %s -> %s",
-		       old, new);
-		up_write(&_hash_lock);
-		kfree(new_name);
-		return -EBUSY;
-	}
-
-	/*
-	 * Is there such a device as 'old' ?
-	 */
-	hc = __get_name_cell(old);
-	if (!hc) {
-		DMWARN("asked to rename a non existent device %s -> %s",
-		       old, new);
-		up_write(&_hash_lock);
-		kfree(new_name);
-		return -ENXIO;
-	}
-
-	/*
-	 * rename and move the name cell.
-	 */
-	unregister_with_devfs(hc);
-
-	list_del(&hc->name_list);
-	old_name = hc->name;
-	hc->name = new_name;
-	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
-
-	/* rename the device node in devfs */
-	register_with_devfs(hc);
-
-	up_write(&_hash_lock);
-	kfree(old_name);
-	return 0;
-}
-
-/*-----------------------------------------------------------------
- * Implementation of the ioctl commands
- *---------------------------------------------------------------*/
-/*
- * All the ioctl commands get dispatched to functions with this
- * prototype.
- */
-typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
-
-static int remove_all(struct dm_ioctl *param, size_t param_size)
-{
-	dm_hash_remove_all();
-	param->data_size = 0;
-	return 0;
-}
-
-/*
- * Round up the ptr to an 8-byte boundary.
- */
-#define ALIGN_MASK 7
-static inline void *align_ptr(void *ptr)
-{
-	return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
-}
-
-/*
- * Retrieves the data payload buffer from an already allocated
- * struct dm_ioctl.
- */
-static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
-			       size_t *len)
-{
-	param->data_start = align_ptr(param + 1) - (void *) param;
-
-	if (param->data_start < param_size)
-		*len = param_size - param->data_start;
-	else
-		*len = 0;
-
-	return ((void *) param) + param->data_start;
-}
-
-static int list_devices(struct dm_ioctl *param, size_t param_size)
-{
-	unsigned int i;
-	struct hash_cell *hc;
-	size_t len, needed = 0;
-	struct gendisk *disk;
-	struct dm_name_list *nl, *old_nl = NULL;
-
-	down_write(&_hash_lock);
-
-	/*
-	 * Loop through all the devices working out how much
-	 * space we need.
-	 */
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_entry (hc, _name_buckets + i, name_list) {
-			needed += sizeof(struct dm_name_list);
-			needed += strlen(hc->name);
-			needed += ALIGN_MASK;
-		}
-	}
-
-	/*
-	 * Grab our output buffer.
-	 */
-	nl = get_result_buffer(param, param_size, &len);
-	if (len < needed) {
-		param->flags |= DM_BUFFER_FULL_FLAG;
-		goto out;
-	}
-	param->data_size = param->data_start + needed;
-
-	nl->dev = 0;	/* Flags no data */
-
-	/*
-	 * Now loop through filling out the names.
-	 */
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_entry (hc, _name_buckets + i, name_list) {
-			if (old_nl)
-				old_nl->next = (uint32_t) ((void *) nl -
-							   (void *) old_nl);
-			disk = dm_disk(hc->md);
-			nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
-			nl->next = 0;
-			strcpy(nl->name, hc->name);
-
-			old_nl = nl;
-			nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
-		}
-	}
-
- out:
-	up_write(&_hash_lock);
-	return 0;
-}
-
-static int check_name(const char *name)
-{
-	if (strchr(name, '/')) {
-		DMWARN("invalid device name");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * Fills in a dm_ioctl structure, ready for sending back to
- * userland.
- */
-static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
-{
-	struct gendisk *disk = dm_disk(md);
-	struct dm_table *table;
-	struct block_device *bdev;
-
-	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
-			  DM_ACTIVE_PRESENT_FLAG);
-
-	if (dm_suspended(md))
-		param->flags |= DM_SUSPEND_FLAG;
-
-	bdev = bdget_disk(disk, 0);
-	if (!bdev)
-		return -ENXIO;
-
-	param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
-
-	/*
-	 * Yes, this will be out of date by the time it gets back
-	 * to userland, but it is still very useful ofr
-	 * debugging.
-	 */
-	param->open_count = bdev->bd_openers;
-	bdput(bdev);
-
-	if (disk->policy)
-		param->flags |= DM_READONLY_FLAG;
-
-	param->event_nr = dm_get_event_nr(md);
-
-	table = dm_get_table(md);
-	if (table) {
-		param->flags |= DM_ACTIVE_PRESENT_FLAG;
-		param->target_count = dm_table_get_num_targets(table);
-		dm_table_put(table);
-	} else
-		param->target_count = 0;
-
-	return 0;
-}
-
-static int dev_create(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct mapped_device *md;
-
-	r = check_name(param->name);
-	if (r)
-		return r;
-
-	if (param->flags & DM_PERSISTENT_DEV_FLAG)
-		r = dm_create_with_minor(MINOR(huge_decode_dev(param->dev)), &md);
-	else
-		r = dm_create(&md);
-
-	if (r)
-		return r;
-
-	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
-	if (r) {
-		dm_put(md);
-		return r;
-	}
-
-	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	r = __dev_status(md, param);
-	dm_put(md);
-
-	return r;
-}
-
-/*
- * Always use UUID for lookups if it's present, otherwise use name.
- */
-static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
-{
-	return *param->uuid ?
-	    __get_uuid_cell(param->uuid) : __get_name_cell(param->name);
-}
-
-static inline struct mapped_device *find_device(struct dm_ioctl *param)
-{
-	struct hash_cell *hc;
-	struct mapped_device *md = NULL;
-
-	down_read(&_hash_lock);
-	hc = __find_device_hash_cell(param);
-	if (hc) {
-		md = hc->md;
-
-		/*
-		 * Sneakily write in both the name and the uuid
-		 * while we have the cell.
-		 */
-		strncpy(param->name, hc->name, sizeof(param->name));
-		if (hc->uuid)
-			strncpy(param->uuid, hc->uuid, sizeof(param->uuid)-1);
-		else
-			param->uuid[0] = '\0';
-
-		if (hc->new_map)
-			param->flags |= DM_INACTIVE_PRESENT_FLAG;
-		else
-			param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-		dm_get(md);
-	}
-	up_read(&_hash_lock);
-
-	return md;
-}
-
-static int dev_remove(struct dm_ioctl *param, size_t param_size)
-{
-	struct hash_cell *hc;
-
-	down_write(&_hash_lock);
-	hc = __find_device_hash_cell(param);
-
-	if (!hc) {
-		DMWARN("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	__hash_remove(hc);
-	up_write(&_hash_lock);
-	param->data_size = 0;
-	return 0;
-}
-
-/*
- * Check a string doesn't overrun the chunk of
- * memory we copied from userland.
- */
-static int invalid_str(char *str, void *end)
-{
-	while ((void *) str < end)
-		if (!*str++)
-			return 0;
-
-	return -EINVAL;
-}
-
-static int dev_rename(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	char *new_name = (char *) param + param->data_start;
-
-	if (new_name < (char *) (param + 1) ||
-	    invalid_str(new_name, (void *) param + param_size)) {
-		DMWARN("Invalid new logical volume name supplied.");
-		return -EINVAL;
-	}
-
-	r = check_name(new_name);
-	if (r)
-		return r;
-
-	param->data_size = 0;
-	return dm_hash_rename(param->name, new_name);
-}
-
-static int do_suspend(struct dm_ioctl *param)
-{
-	int r = 0;
-	struct mapped_device *md;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	if (!dm_suspended(md))
-		r = dm_suspend(md);
-
-	if (!r)
-		r = __dev_status(md, param);
-
-	dm_put(md);
-	return r;
-}
-
-static int do_resume(struct dm_ioctl *param)
-{
-	int r = 0;
-	struct hash_cell *hc;
-	struct mapped_device *md;
-	struct dm_table *new_map;
-
-	down_write(&_hash_lock);
-
-	hc = __find_device_hash_cell(param);
-	if (!hc) {
-		DMWARN("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	md = hc->md;
-	dm_get(md);
-
-	new_map = hc->new_map;
-	hc->new_map = NULL;
-	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	up_write(&_hash_lock);
-
-	/* Do we need to load a new map ? */
-	if (new_map) {
-		/* Suspend if it isn't already suspended */
-		if (!dm_suspended(md))
-			dm_suspend(md);
-
-		r = dm_swap_table(md, new_map);
-		if (r) {
-			dm_put(md);
-			dm_table_put(new_map);
-			return r;
-		}
-
-		if (dm_table_get_mode(new_map) & FMODE_WRITE)
-			set_disk_ro(dm_disk(md), 0);
-		else
-			set_disk_ro(dm_disk(md), 1);
-
-		dm_table_put(new_map);
-	}
-
-	if (dm_suspended(md))
-		r = dm_resume(md);
-
-	if (!r)
-		r = __dev_status(md, param);
-
-	dm_put(md);
-	return r;
-}
-
-/*
- * Set or unset the suspension state of a device.
- * If the device already is in the requested state we just return its status.
- */
-static int dev_suspend(struct dm_ioctl *param, size_t param_size)
-{
-	if (param->flags & DM_SUSPEND_FLAG)
-		return do_suspend(param);
-
-	return do_resume(param);
-}
-
-/*
- * Copies device info back to user space, used by
- * the create and info ioctls.
- */
-static int dev_status(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct mapped_device *md;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	r = __dev_status(md, param);
-	dm_put(md);
-	return r;
-}
-
-/*
- * Build up the status struct for each target
- */
-static void retrieve_status(struct dm_table *table,
-			    struct dm_ioctl *param, size_t param_size)
-{
-	unsigned int i, num_targets;
-	struct dm_target_spec *spec;
-	char *outbuf, *outptr;
-	status_type_t type;
-	size_t remaining, len, used = 0;
-
-	outptr = outbuf = get_result_buffer(param, param_size, &len);
-
-	if (param->flags & DM_STATUS_TABLE_FLAG)
-		type = STATUSTYPE_TABLE;
-	else
-		type = STATUSTYPE_INFO;
-
-	/* Get all the target info */
-	num_targets = dm_table_get_num_targets(table);
-	for (i = 0; i < num_targets; i++) {
-		struct dm_target *ti = dm_table_get_target(table, i);
-
-		remaining = len - (outptr - outbuf);
-		if (remaining < sizeof(struct dm_target_spec)) {
-			param->flags |= DM_BUFFER_FULL_FLAG;
-			break;
-		}
-
-		spec = (struct dm_target_spec *) outptr;
-
-		spec->status = 0;
-		spec->sector_start = ti->begin;
-		spec->length = ti->len;
-		strncpy(spec->target_type, ti->type->name,
-			sizeof(spec->target_type));
-
-		outptr += sizeof(struct dm_target_spec);
-		remaining = len - (outptr - outbuf);
-
-		/* Get the status/table string from the target driver */
-		if (ti->type->status) {
-			if (ti->type->status(ti, type, outptr, remaining)) {
-				param->flags |= DM_BUFFER_FULL_FLAG;
-				break;
-			}
-		} else
-			outptr[0] = '\0';
-
-		outptr += strlen(outptr) + 1;
-		used = param->data_start + (outptr - outbuf);
-
-		align_ptr(outptr);
-		spec->next = outptr - outbuf;
-	}
-
-	if (used)
-		param->data_size = used;
-
-	param->target_count = num_targets;
-}
-
-/*
- * Wait for a device to report an event
- */
-static int dev_wait(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct mapped_device *md;
-	struct dm_table *table;
-	DECLARE_WAITQUEUE(wq, current);
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	/*
-	 * Wait for a notification event
-	 */
-	set_current_state(TASK_INTERRUPTIBLE);
-	if (!dm_add_wait_queue(md, &wq, param->event_nr)) {
-		schedule();
-		dm_remove_wait_queue(md, &wq);
-	}
- 	set_current_state(TASK_RUNNING);
-
-	/*
-	 * The userland program is going to want to know what
-	 * changed to trigger the event, so we may as well tell
-	 * him and save an ioctl.
-	 */
-	r = __dev_status(md, param);
-	if (r)
-		goto out;
-
-	table = dm_get_table(md);
-	if (table) {
-		retrieve_status(table, param, param_size);
-		dm_table_put(table);
-	}
-
- out:
-	dm_put(md);
-	return r;
-}
-
-static inline int get_mode(struct dm_ioctl *param)
-{
-	int mode = FMODE_READ | FMODE_WRITE;
-
-	if (param->flags & DM_READONLY_FLAG)
-		mode = FMODE_READ;
-
-	return mode;
-}
-
-static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
-		       struct dm_target_spec **spec, char **target_params)
-{
-	*spec = (struct dm_target_spec *) ((unsigned char *) last + next);
-	*target_params = (char *) (*spec + 1);
-
-	if (*spec < (last + 1))
-		return -EINVAL;
-
-	return invalid_str(*target_params, end);
-}
-
-static int populate_table(struct dm_table *table,
-			  struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	unsigned int i = 0;
-	struct dm_target_spec *spec = (struct dm_target_spec *) param;
-	uint32_t next = param->data_start;
-	void *end = (void *) param + param_size;
-	char *target_params;
-
-	if (!param->target_count) {
-		DMWARN("populate_table: no targets specified");
-		return -EINVAL;
-	}
-
-	for (i = 0; i < param->target_count; i++) {
-
-		r = next_target(spec, next, end, &spec, &target_params);
-		if (r) {
-			DMWARN("unable to find target");
-			return r;
-		}
-
-		r = dm_table_add_target(table, spec->target_type,
-					(sector_t) spec->sector_start,
-					(sector_t) spec->length,
-					target_params);
-		if (r) {
-			DMWARN("error adding target to table");
-			return r;
-		}
-
-		next = spec->next;
-	}
-
-	return dm_table_complete(table);
-}
-
-static int table_load(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct hash_cell *hc;
-	struct dm_table *t;
-
-	r = dm_table_create(&t, get_mode(param), param->target_count);
-	if (r)
-		return r;
-
-	r = populate_table(t, param, param_size);
-	if (r) {
-		dm_table_put(t);
-		return r;
-	}
-
-	down_write(&_hash_lock);
-	hc = __find_device_hash_cell(param);
-	if (!hc) {
-		DMWARN("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	if (hc->new_map)
-		dm_table_put(hc->new_map);
-	hc->new_map = t;
-	param->flags |= DM_INACTIVE_PRESENT_FLAG;
-
-	r = __dev_status(hc->md, param);
-	up_write(&_hash_lock);
-	return r;
-}
-
-static int table_clear(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct hash_cell *hc;
-
-	down_write(&_hash_lock);
-
-	hc = __find_device_hash_cell(param);
-	if (!hc) {
-		DMWARN("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	if (hc->new_map) {
-		dm_table_put(hc->new_map);
-		hc->new_map = NULL;
-	}
-
-	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	r = __dev_status(hc->md, param);
-	up_write(&_hash_lock);
-	return r;
-}
-
-/*
- * Retrieves a list of devices used by a particular dm device.
- */
-static void retrieve_deps(struct dm_table *table,
-			  struct dm_ioctl *param, size_t param_size)
-{
-	unsigned int count = 0;
-	struct list_head *tmp;
-	size_t len, needed;
-	struct dm_target_deps *deps;
-
-	deps = get_result_buffer(param, param_size, &len);
-
-	/*
-	 * Count the devices.
-	 */
-	list_for_each(tmp, dm_table_get_devices(table))
-		count++;
-
-	/*
-	 * Check we have enough space.
-	 */
-	needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
-	if (len < needed) {
-		param->flags |= DM_BUFFER_FULL_FLAG;
-		return;
-	}
-
-	/*
-	 * Fill in the devices.
-	 */
-	deps->count = count;
-	count = 0;
-	list_for_each(tmp, dm_table_get_devices(table)) {
-		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
-		deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev);
-	}
-
-	param->data_size = param->data_start + needed;
-}
-
-static int table_deps(struct dm_ioctl *param, size_t param_size)
-{
-	int r = 0;
-	struct mapped_device *md;
-	struct dm_table *table;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	r = __dev_status(md, param);
-	if (r)
-		goto out;
-
-	table = dm_get_table(md);
-	if (table) {
-		retrieve_deps(table, param, param_size);
-		dm_table_put(table);
-	}
-
- out:
-	dm_put(md);
-	return r;
-}
-
-/*
- * Return the status of a device as a text string for each
- * target.
- */
-static int table_status(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct mapped_device *md;
-	struct dm_table *table;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	r = __dev_status(md, param);
-	if (r)
-		goto out;
-
-	table = dm_get_table(md);
-	if (table) {
-		retrieve_status(table, param, param_size);
-		dm_table_put(table);
-	}
-
- out:
-	dm_put(md);
-	return r;
-}
-
-/*-----------------------------------------------------------------
- * Implementation of open/close/ioctl on the special char
- * device.
- *---------------------------------------------------------------*/
-static ioctl_fn lookup_ioctl(unsigned int cmd)
-{
-	static struct {
-		int cmd;
-		ioctl_fn fn;
-	} _ioctls[] = {
-		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
-		{DM_REMOVE_ALL_CMD, remove_all},
-		{DM_LIST_DEVICES_CMD, list_devices},
-
-		{DM_DEV_CREATE_CMD, dev_create},
-		{DM_DEV_REMOVE_CMD, dev_remove},
-		{DM_DEV_RENAME_CMD, dev_rename},
-		{DM_DEV_SUSPEND_CMD, dev_suspend},
-		{DM_DEV_STATUS_CMD, dev_status},
-		{DM_DEV_WAIT_CMD, dev_wait},
-
-		{DM_TABLE_LOAD_CMD, table_load},
-		{DM_TABLE_CLEAR_CMD, table_clear},
-		{DM_TABLE_DEPS_CMD, table_deps},
-		{DM_TABLE_STATUS_CMD, table_status}
-	};
-
-	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
-}
-
-/*
- * As well as checking the version compatibility this always
- * copies the kernel interface version out.
- */
-static int check_version(unsigned int cmd, struct dm_ioctl *user)
-{
-	uint32_t version[3];
-	int r = 0;
-
-	if (copy_from_user(version, user->version, sizeof(version)))
-		return -EFAULT;
-
-	if ((DM_VERSION_MAJOR != version[0]) ||
-	    (DM_VERSION_MINOR < version[1])) {
-		DMWARN("ioctl interface mismatch: "
-		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
-		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
-		       DM_VERSION_PATCHLEVEL,
-		       version[0], version[1], version[2], cmd);
-		r = -EINVAL;
-	}
-
-	/*
-	 * Fill in the kernel version.
-	 */
-	version[0] = DM_VERSION_MAJOR;
-	version[1] = DM_VERSION_MINOR;
-	version[2] = DM_VERSION_PATCHLEVEL;
-	if (copy_to_user(user->version, version, sizeof(version)))
-		return -EFAULT;
-
-	return r;
-}
-
-static void free_params(struct dm_ioctl *param)
-{
-	vfree(param);
-}
-
-static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
-{
-	struct dm_ioctl tmp, *dmi;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp)))
-		return -EFAULT;
-
-	if (tmp.data_size < sizeof(tmp))
-		return -EINVAL;
-
-	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
-	if (!dmi)
-		return -ENOMEM;
-
-	if (copy_from_user(dmi, user, tmp.data_size)) {
-		vfree(dmi);
-		return -EFAULT;
-	}
-
-	*param = dmi;
-	return 0;
-}
-
-static int validate_params(uint cmd, struct dm_ioctl *param)
-{
-	/* Always clear this flag */
-	param->flags &= ~DM_BUFFER_FULL_FLAG;
-
-	/* Ignores parameters */
-	if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
-		return 0;
-
-	/* Unless creating, either name or uuid but not both */
-	if (cmd != DM_DEV_CREATE_CMD) {
-		if ((!*param->uuid && !*param->name) ||
-		    (*param->uuid && *param->name)) {
-			DMWARN("one of name or uuid must be supplied, cmd(%u)",
-			       cmd);
-			return -EINVAL;
-		}
-	}
-
-	/* Ensure strings are terminated */
-	param->name[DM_NAME_LEN - 1] = '\0';
-	param->uuid[DM_UUID_LEN - 1] = '\0';
-
-	return 0;
-}
-
-static int ctl_ioctl(struct inode *inode, struct file *file,
-		     uint command, ulong u)
-{
-	int r = 0;
-	unsigned int cmd;
-	struct dm_ioctl *param;
-	struct dm_ioctl *user = (struct dm_ioctl *) u;
-	ioctl_fn fn = NULL;
-	size_t param_size;
-
-	/* only root can play with this */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (_IOC_TYPE(command) != DM_IOCTL)
-		return -ENOTTY;
-
-	cmd = _IOC_NR(command);
-
-	/*
-	 * Check the interface version passed in.  This also
-	 * writes out the kernel's interface version.
-	 */
-	r = check_version(cmd, user);
-	if (r)
-		return r;
-
-	/*
-	 * Nothing more to do for the version command.
-	 */
-	if (cmd == DM_VERSION_CMD)
-		return 0;
-
-	fn = lookup_ioctl(cmd);
-	if (!fn) {
-		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
-		return -ENOTTY;
-	}
-
-	/*
-	 * Trying to avoid low memory issues when a device is
-	 * suspended.
-	 */
-	current->flags |= PF_MEMALLOC;
-
-	/*
-	 * Copy the parameters into kernel space.
-	 */
-	r = copy_params(user, &param);
-	if (r) {
-		current->flags &= ~PF_MEMALLOC;
-		return r;
-	}
-
-	/*
-	 * FIXME: eventually we will remove the PF_MEMALLOC flag
-	 * here.  However the tools still do nasty things like
-	 * 'load' while a device is suspended.
-	 */
-
-	r = validate_params(cmd, param);
-	if (r)
-		goto out;
-
-	param_size = param->data_size;
-	param->data_size = sizeof(*param);
-	r = fn(param, param_size);
-
-	/*
-	 * Copy the results back to userland.
-	 */
-	if (!r && copy_to_user(user, param, param->data_size))
-		r = -EFAULT;
-
- out:
-	free_params(param);
-	current->flags &= ~PF_MEMALLOC;
-	return r;
-}
-
-static struct file_operations _ctl_fops = {
-	.ioctl	 = ctl_ioctl,
-	.owner	 = THIS_MODULE,
-};
-
-static struct miscdevice _dm_misc = {
-	.minor 		= MISC_DYNAMIC_MINOR,
-	.name  		= DM_NAME,
-	.devfs_name 	= "mapper/control",
-	.fops  		= &_ctl_fops
-};
-
-/*
- * Create misc character device and link to DM_DIR/control.
- */
-int __init dm_interface_init(void)
-{
-	int r;
-
-	r = dm_hash_init();
-	if (r)
-		return r;
-
-	r = misc_register(&_dm_misc);
-	if (r) {
-		DMERR("misc_register failed for control device");
-		dm_hash_exit();
-		return r;
-	}
-
-	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
-	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
-	       DM_DRIVER_EMAIL);
-	return 0;
-}
-
-void dm_interface_exit(void)
-{
-	if (misc_deregister(&_dm_misc) < 0)
-		DMERR("misc_deregister failed for control device");
-
-	dm_hash_exit();
-}
--- diff/include/linux/dm-ioctl-v1.h	2003-09-30 15:46:20.000000000 +0100
+++ source/include/linux/dm-ioctl-v1.h	1970-01-01 01:00:00.000000000 +0100
@@ -1,149 +0,0 @@
-/*
- * Copyright (C) 2001 Sistina Software (UK) Limited.
- *
- * This file is released under the LGPL.
- */
-
-#ifndef _LINUX_DM_IOCTL_V1_H
-#define _LINUX_DM_IOCTL_V1_H
-
-#include <linux/types.h>
-
-#define DM_DIR "mapper"	/* Slashes not supported */
-#define DM_MAX_TYPE_NAME 16
-#define DM_NAME_LEN 128
-#define DM_UUID_LEN 129
-
-/*
- * Implements a traditional ioctl interface to the device mapper.
- */
-
-/*
- * All ioctl arguments consist of a single chunk of memory, with
- * this structure at the start.  If a uuid is specified any
- * lookup (eg. for a DM_INFO) will be done on that, *not* the
- * name.
- */
-struct dm_ioctl {
-	/*
-	 * The version number is made up of three parts:
-	 * major - no backward or forward compatibility,
-	 * minor - only backwards compatible,
-	 * patch - both backwards and forwards compatible.
-	 *
-	 * All clients of the ioctl interface should fill in the
-	 * version number of the interface that they were
-	 * compiled with.
-	 *
-	 * All recognised ioctl commands (ie. those that don't
-	 * return -ENOTTY) fill out this field, even if the
-	 * command failed.
-	 */
-	uint32_t version[3];	/* in/out */
-	uint32_t data_size;	/* total size of data passed in
-				 * including this struct */
-
-	uint32_t data_start;	/* offset to start of data
-				 * relative to start of this struct */
-
-	uint32_t target_count;	/* in/out */
-	uint32_t open_count;	/* out */
-	uint32_t flags;		/* in/out */
-
-	__kernel_old_dev_t dev;	/* in/out */
-
-	char name[DM_NAME_LEN];	/* device name */
-	char uuid[DM_UUID_LEN];	/* unique identifier for
-				 * the block device */
-};
-
-/*
- * Used to specify tables.  These structures appear after the
- * dm_ioctl.
- */
-struct dm_target_spec {
-	int32_t status;		/* used when reading from kernel only */
-	uint64_t sector_start;
-	uint32_t length;
-
-	/*
-	 * Offset in bytes (from the start of this struct) to
-	 * next target_spec.
-	 */
-	uint32_t next;
-
-	char target_type[DM_MAX_TYPE_NAME];
-
-	/*
-	 * Parameter string starts immediately after this object.
-	 * Be careful to add padding after string to ensure correct
-	 * alignment of subsequent dm_target_spec.
-	 */
-};
-
-/*
- * Used to retrieve the target dependencies.
- */
-struct dm_target_deps {
-	uint32_t count;
-
-	__kernel_old_dev_t dev[0];	/* out */
-};
-
-/*
- * If you change this make sure you make the corresponding change
- * to dm-ioctl.c:lookup_ioctl()
- */
-enum {
-	/* Top level cmds */
-	DM_VERSION_CMD = 0,
-	DM_REMOVE_ALL_CMD,
-
-	/* device level cmds */
-	DM_DEV_CREATE_CMD,
-	DM_DEV_REMOVE_CMD,
-	DM_DEV_RELOAD_CMD,
-	DM_DEV_RENAME_CMD,
-	DM_DEV_SUSPEND_CMD,
-	DM_DEV_DEPS_CMD,
-	DM_DEV_STATUS_CMD,
-
-	/* target level cmds */
-	DM_TARGET_STATUS_CMD,
-	DM_TARGET_WAIT_CMD
-};
-
-#define DM_IOCTL 0xfd
-
-#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
-#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
-
-#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
-#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
-#define DM_DEV_RELOAD    _IOWR(DM_IOCTL, DM_DEV_RELOAD_CMD, struct dm_ioctl)
-#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
-#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
-#define DM_DEV_DEPS      _IOWR(DM_IOCTL, DM_DEV_DEPS_CMD, struct dm_ioctl)
-#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
-
-#define DM_TARGET_STATUS _IOWR(DM_IOCTL, DM_TARGET_STATUS_CMD, struct dm_ioctl)
-#define DM_TARGET_WAIT   _IOWR(DM_IOCTL, DM_TARGET_WAIT_CMD, struct dm_ioctl)
-
-#define DM_VERSION_MAJOR	1
-#define DM_VERSION_MINOR	0
-#define DM_VERSION_PATCHLEVEL	6
-#define DM_VERSION_EXTRA	"-ioctl (2002-10-15)"
-
-/* Status bits */
-#define DM_READONLY_FLAG	0x00000001
-#define DM_SUSPEND_FLAG		0x00000002
-#define DM_EXISTS_FLAG		0x00000004
-#define DM_PERSISTENT_DEV_FLAG	0x00000008
-
-/*
- * Flag passed into ioctl STATUS command to get table information
- * rather than current status.
- */
-#define DM_STATUS_TABLE_FLAG	0x00000010
-
-#endif				/* _LINUX_DM_IOCTL_H */
--- diff/include/linux/dm-ioctl-v4.h	2003-08-20 14:16:15.000000000 +0100
+++ source/include/linux/dm-ioctl-v4.h	1970-01-01 01:00:00.000000000 +0100
@@ -1,237 +0,0 @@
-/*
- * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
- *
- * This file is released under the LGPL.
- */
-
-#ifndef _LINUX_DM_IOCTL_V4_H
-#define _LINUX_DM_IOCTL_V4_H
-
-#include <linux/types.h>
-
-#define DM_DIR "mapper"		/* Slashes not supported */
-#define DM_MAX_TYPE_NAME 16
-#define DM_NAME_LEN 128
-#define DM_UUID_LEN 129
-
-/*
- * A traditional ioctl interface for the device mapper.
- *
- * Each device can have two tables associated with it, an
- * 'active' table which is the one currently used by io passing
- * through the device, and an 'inactive' one which is a table
- * that is being prepared as a replacement for the 'active' one.
- *
- * DM_VERSION:
- * Just get the version information for the ioctl interface.
- *
- * DM_REMOVE_ALL:
- * Remove all dm devices, destroy all tables.  Only really used
- * for debug.
- *
- * DM_LIST_DEVICES:
- * Get a list of all the dm device names.
- *
- * DM_DEV_CREATE:
- * Create a new device, neither the 'active' or 'inactive' table
- * slots will be filled.  The device will be in suspended state
- * after creation, however any io to the device will get errored
- * since it will be out-of-bounds.
- *
- * DM_DEV_REMOVE:
- * Remove a device, destroy any tables.
- *
- * DM_DEV_RENAME:
- * Rename a device.
- *
- * DM_SUSPEND:
- * This performs both suspend and resume, depending which flag is
- * passed in.
- * Suspend: This command will not return until all pending io to
- * the device has completed.  Further io will be deferred until
- * the device is resumed.
- * Resume: It is no longer an error to issue this command on an
- * unsuspended device.  If a table is present in the 'inactive'
- * slot, it will be moved to the active slot, then the old table
- * from the active slot will be _destroyed_.  Finally the device
- * is resumed.
- *
- * DM_DEV_STATUS:
- * Retrieves the status for the table in the 'active' slot.
- *
- * DM_DEV_WAIT:
- * Wait for a significant event to occur to the device.  This
- * could either be caused by an event triggered by one of the
- * targets of the table in the 'active' slot, or a table change.
- *
- * DM_TABLE_LOAD:
- * Load a table into the 'inactive' slot for the device.  The
- * device does _not_ need to be suspended prior to this command.
- *
- * DM_TABLE_CLEAR:
- * Destroy any table in the 'inactive' slot (ie. abort).
- *
- * DM_TABLE_DEPS:
- * Return a set of device dependencies for the 'active' table.
- *
- * DM_TABLE_STATUS:
- * Return the targets status for the 'active' table.
- */
-
-/*
- * All ioctl arguments consist of a single chunk of memory, with
- * this structure at the start.  If a uuid is specified any
- * lookup (eg. for a DM_INFO) will be done on that, *not* the
- * name.
- */
-struct dm_ioctl {
-	/*
-	 * The version number is made up of three parts:
-	 * major - no backward or forward compatibility,
-	 * minor - only backwards compatible,
-	 * patch - both backwards and forwards compatible.
-	 *
-	 * All clients of the ioctl interface should fill in the
-	 * version number of the interface that they were
-	 * compiled with.
-	 *
-	 * All recognised ioctl commands (ie. those that don't
-	 * return -ENOTTY) fill out this field, even if the
-	 * command failed.
-	 */
-	uint32_t version[3];	/* in/out */
-	uint32_t data_size;	/* total size of data passed in
-				 * including this struct */
-
-	uint32_t data_start;	/* offset to start of data
-				 * relative to start of this struct */
-
-	uint32_t target_count;	/* in/out */
-	int32_t open_count;	/* out */
-	uint32_t flags;		/* in/out */
-	uint32_t event_nr;      	/* in/out */
-	uint32_t padding;
-
-	uint64_t dev;		/* in/out */
-
-	char name[DM_NAME_LEN];	/* device name */
-	char uuid[DM_UUID_LEN];	/* unique identifier for
-				 * the block device */
-};
-
-/*
- * Used to specify tables.  These structures appear after the
- * dm_ioctl.
- */
-struct dm_target_spec {
-	uint64_t sector_start;
-	uint64_t length;
-	int32_t status;		/* used when reading from kernel only */
-
-	/*
-	 * Offset in bytes (from the start of this struct) to
-	 * next target_spec.
-	 */
-	uint32_t next;
-
-	char target_type[DM_MAX_TYPE_NAME];
-
-	/*
-	 * Parameter string starts immediately after this object.
-	 * Be careful to add padding after string to ensure correct
-	 * alignment of subsequent dm_target_spec.
-	 */
-};
-
-/*
- * Used to retrieve the target dependencies.
- */
-struct dm_target_deps {
-	uint32_t count;	/* Array size */
-	uint32_t padding;	/* unused */
-	uint64_t dev[0];	/* out */
-};
-
-/*
- * Used to get a list of all dm devices.
- */
-struct dm_name_list {
-	uint64_t dev;
-	uint32_t next;		/* offset to the next record from
-				   the _start_ of this */
-	char name[0];
-};
-
-/*
- * If you change this make sure you make the corresponding change
- * to dm-ioctl.c:lookup_ioctl()
- */
-enum {
-	/* Top level cmds */
-	DM_VERSION_CMD = 0,
-	DM_REMOVE_ALL_CMD,
-	DM_LIST_DEVICES_CMD,
-
-	/* device level cmds */
-	DM_DEV_CREATE_CMD,
-	DM_DEV_REMOVE_CMD,
-	DM_DEV_RENAME_CMD,
-	DM_DEV_SUSPEND_CMD,
-	DM_DEV_STATUS_CMD,
-	DM_DEV_WAIT_CMD,
-
-	/* Table level cmds */
-	DM_TABLE_LOAD_CMD,
-	DM_TABLE_CLEAR_CMD,
-	DM_TABLE_DEPS_CMD,
-	DM_TABLE_STATUS_CMD,
-};
-
-#define DM_IOCTL 0xfd
-
-#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
-#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
-#define DM_LIST_DEVICES  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
-
-#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
-#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
-#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
-#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
-#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
-#define DM_DEV_WAIT      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
-
-#define DM_TABLE_LOAD    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
-#define DM_TABLE_CLEAR   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
-#define DM_TABLE_DEPS    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
-#define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
-
-#define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	0
-#define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2003-06-04)"
-
-/* Status bits */
-#define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-#define DM_SUSPEND_FLAG		(1 << 1) /* In/Out */
-#define DM_PERSISTENT_DEV_FLAG	(1 << 3) /* In */
-
-/*
- * Flag passed into ioctl STATUS command to get table information
- * rather than current status.
- */
-#define DM_STATUS_TABLE_FLAG	(1 << 4) /* In */
-
-/*
- * Flags that indicate whether a table is present in either of
- * the two table slots that a device has.
- */
-#define DM_ACTIVE_PRESENT_FLAG   (1 << 5) /* Out */
-#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
-
-/*
- * Indicates that the buffer passed in wasn't big enough for the
- * results.
- */
-#define DM_BUFFER_FULL_FLAG	(1 << 8) /* Out */
-
-#endif				/* _LINUX_DM_IOCTL_H */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch 3/6] dm: list_for_each_entry audit
  2004-02-20 15:31 device-mapper patchset Joe Thornber
  2004-02-20 15:34 ` [Patch 1/6] dm: endio method Joe Thornber
  2004-02-20 15:34 ` [Patch 2/6] dm: remove v1 ioctl interface Joe Thornber
@ 2004-02-20 15:35 ` Joe Thornber
  2004-02-20 15:36 ` [Patch 4/6] dm: default queue limits Joe Thornber
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:35 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Audit for list_for_each_*entry*
--- diff/drivers/md/dm-ioctl.c	2004-02-18 15:23:23.000000000 +0000
+++ source/drivers/md/dm-ioctl.c	2004-02-18 15:34:04.000000000 +0000
@@ -88,30 +88,24 @@ static unsigned int hash_str(const char 
  *---------------------------------------------------------------*/
 static struct hash_cell *__get_name_cell(const char *str)
 {
-	struct list_head *tmp;
 	struct hash_cell *hc;
 	unsigned int h = hash_str(str);
 
-	list_for_each (tmp, _name_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, name_list);
+	list_for_each_entry (hc, _name_buckets + h, name_list)
 		if (!strcmp(hc->name, str))
 			return hc;
-	}
 
 	return NULL;
 }
 
 static struct hash_cell *__get_uuid_cell(const char *str)
 {
-	struct list_head *tmp;
 	struct hash_cell *hc;
 	unsigned int h = hash_str(str);
 
-	list_for_each (tmp, _uuid_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, uuid_list);
+	list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
 		if (!strcmp(hc->uuid, str))
 			return hc;
-	}
 
 	return NULL;
 }
@@ -935,6 +929,7 @@ static void retrieve_deps(struct dm_tabl
 	unsigned int count = 0;
 	struct list_head *tmp;
 	size_t len, needed;
+	struct dm_dev *dd;
 	struct dm_target_deps *deps;
 
 	deps = get_result_buffer(param, param_size, &len);
@@ -942,7 +937,7 @@ static void retrieve_deps(struct dm_tabl
 	/*
 	 * Count the devices.
 	 */
-	list_for_each(tmp, dm_table_get_devices(table))
+	list_for_each (tmp, dm_table_get_devices(table))
 		count++;
 
 	/*
@@ -959,10 +954,8 @@ static void retrieve_deps(struct dm_tabl
 	 */
 	deps->count = count;
 	count = 0;
-	list_for_each(tmp, dm_table_get_devices(table)) {
-		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+	list_for_each_entry (dd, dm_table_get_devices(table), list)
 		deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev);
-	}
 
 	param->data_size = param->data_start + needed;
 }
--- diff/drivers/md/dm-table.c	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/dm-table.c	2004-02-18 15:38:06.000000000 +0000
@@ -329,13 +329,11 @@ static int lookup_device(const char *pat
  */
 static struct dm_dev *find_device(struct list_head *l, dev_t dev)
 {
-	struct list_head *tmp;
+	struct dm_dev *dd;
 
-	list_for_each(tmp, l) {
-		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+	list_for_each_entry (dd, l, list)
 		if (dd->bdev->bd_dev == dev)
 			return dd;
-	}
 
 	return NULL;
 }
--- diff/drivers/md/dm-target.c	2004-02-18 15:16:23.000000000 +0000
+++ source/drivers/md/dm-target.c	2004-02-18 15:38:06.000000000 +0000
@@ -25,15 +25,11 @@ static DECLARE_RWSEM(_lock);
 
 static inline struct tt_internal *__find_target_type(const char *name)
 {
-	struct list_head *tih;
 	struct tt_internal *ti;
 
-	list_for_each(tih, &_targets) {
-		ti = list_entry(tih, struct tt_internal, list);
-
+	list_for_each_entry (ti, &_targets, list)
 		if (!strcmp(name, ti->tt.name))
 			return ti;
-	}
 
 	return NULL;
 }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch 4/6] dm: default queue limits
  2004-02-20 15:31 device-mapper patchset Joe Thornber
                   ` (2 preceding siblings ...)
  2004-02-20 15:35 ` [Patch 3/6] dm: list_for_each_entry audit Joe Thornber
@ 2004-02-20 15:36 ` Joe Thornber
  2004-02-20 15:39   ` Joe Thornber
  2004-02-20 15:37 ` [Patch 5/6] dm: list targets cmd Joe Thornber
  2004-02-20 15:37 ` [Patch 6/6] dm: multipath target Joe Thornber
  5 siblings, 1 reply; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:36 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Audit for list_for_each_*entry*
--- diff/drivers/md/dm-ioctl.c	2004-02-18 15:23:23.000000000 +0000
+++ source/drivers/md/dm-ioctl.c	2004-02-18 15:34:04.000000000 +0000
@@ -88,30 +88,24 @@ static unsigned int hash_str(const char 
  *---------------------------------------------------------------*/
 static struct hash_cell *__get_name_cell(const char *str)
 {
-	struct list_head *tmp;
 	struct hash_cell *hc;
 	unsigned int h = hash_str(str);
 
-	list_for_each (tmp, _name_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, name_list);
+	list_for_each_entry (hc, _name_buckets + h, name_list)
 		if (!strcmp(hc->name, str))
 			return hc;
-	}
 
 	return NULL;
 }
 
 static struct hash_cell *__get_uuid_cell(const char *str)
 {
-	struct list_head *tmp;
 	struct hash_cell *hc;
 	unsigned int h = hash_str(str);
 
-	list_for_each (tmp, _uuid_buckets + h) {
-		hc = list_entry(tmp, struct hash_cell, uuid_list);
+	list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
 		if (!strcmp(hc->uuid, str))
 			return hc;
-	}
 
 	return NULL;
 }
@@ -935,6 +929,7 @@ static void retrieve_deps(struct dm_tabl
 	unsigned int count = 0;
 	struct list_head *tmp;
 	size_t len, needed;
+	struct dm_dev *dd;
 	struct dm_target_deps *deps;
 
 	deps = get_result_buffer(param, param_size, &len);
@@ -942,7 +937,7 @@ static void retrieve_deps(struct dm_tabl
 	/*
 	 * Count the devices.
 	 */
-	list_for_each(tmp, dm_table_get_devices(table))
+	list_for_each (tmp, dm_table_get_devices(table))
 		count++;
 
 	/*
@@ -959,10 +954,8 @@ static void retrieve_deps(struct dm_tabl
 	 */
 	deps->count = count;
 	count = 0;
-	list_for_each(tmp, dm_table_get_devices(table)) {
-		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+	list_for_each_entry (dd, dm_table_get_devices(table), list)
 		deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev);
-	}
 
 	param->data_size = param->data_start + needed;
 }
--- diff/drivers/md/dm-table.c	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/dm-table.c	2004-02-18 15:38:06.000000000 +0000
@@ -329,13 +329,11 @@ static int lookup_device(const char *pat
  */
 static struct dm_dev *find_device(struct list_head *l, dev_t dev)
 {
-	struct list_head *tmp;
+	struct dm_dev *dd;
 
-	list_for_each(tmp, l) {
-		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+	list_for_each_entry (dd, l, list)
 		if (dd->bdev->bd_dev == dev)
 			return dd;
-	}
 
 	return NULL;
 }
--- diff/drivers/md/dm-target.c	2004-02-18 15:16:23.000000000 +0000
+++ source/drivers/md/dm-target.c	2004-02-18 15:38:06.000000000 +0000
@@ -25,15 +25,11 @@ static DECLARE_RWSEM(_lock);
 
 static inline struct tt_internal *__find_target_type(const char *name)
 {
-	struct list_head *tih;
 	struct tt_internal *ti;
 
-	list_for_each(tih, &_targets) {
-		ti = list_entry(tih, struct tt_internal, list);
-
+	list_for_each_entry (ti, &_targets, list)
 		if (!strcmp(name, ti->tt.name))
 			return ti;
-	}
 
 	return NULL;
 }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch 5/6] dm: list targets cmd
  2004-02-20 15:31 device-mapper patchset Joe Thornber
                   ` (3 preceding siblings ...)
  2004-02-20 15:36 ` [Patch 4/6] dm: default queue limits Joe Thornber
@ 2004-02-20 15:37 ` Joe Thornber
  2004-02-21  6:17   ` Andrew Morton
  2004-02-20 15:37 ` [Patch 6/6] dm: multipath target Joe Thornber
  5 siblings, 1 reply; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:37 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

List targets ioctl.  [Patrick Caulfield]
--- diff/drivers/md/dm-crypt.c	2004-02-18 15:16:36.000000000 +0000
+++ source/drivers/md/dm-crypt.c	2004-02-18 15:47:15.000000000 +0000
@@ -720,6 +720,7 @@ static int crypt_status(struct dm_target
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
+	.version= {1, 0, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
--- diff/drivers/md/dm-ioctl.c	2004-02-18 15:34:04.000000000 +0000
+++ source/drivers/md/dm-ioctl.c	2004-02-18 15:45:30.000000000 +0000
@@ -33,6 +33,14 @@ struct hash_cell {
 	struct dm_table *new_map;
 };
 
+struct vers_iter {
+    size_t param_size;
+    struct dm_target_versions *vers, *old_vers;
+    char *end;
+    uint32_t flags;
+};
+
+
 #define NUM_BUCKETS 64
 #define MASK_BUCKETS (NUM_BUCKETS - 1)
 static struct list_head _name_buckets[NUM_BUCKETS];
@@ -409,6 +417,80 @@ static int list_devices(struct dm_ioctl 
 	return 0;
 }
 
+static void list_version_get_needed(struct target_type *tt, void *param)
+{
+    int *needed = param;
+
+    *needed += strlen(tt->name);
+    *needed += sizeof(tt->version);
+    *needed += ALIGN_MASK;
+}
+
+static void list_version_get_info(struct target_type *tt, void *param)
+{
+    struct vers_iter *info = param;
+
+    /* Check space - it might have changed since the first iteration */
+    if ((char *)info->vers + sizeof(tt->version) + strlen(tt->name) + 1 >
+	info->end) {
+
+	info->flags = DM_BUFFER_FULL_FLAG;
+	return;
+    }
+
+    if (info->old_vers)
+	info->old_vers->next = (uint32_t) ((void *)info->vers -
+					   (void *)info->old_vers);
+    info->vers->version[0] = tt->version[0];
+    info->vers->version[1] = tt->version[1];
+    info->vers->version[2] = tt->version[2];
+    info->vers->next = 0;
+    strcpy(info->vers->name, tt->name);
+
+    info->old_vers = info->vers;
+    info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
+}
+
+static int list_versions(struct dm_ioctl *param, size_t param_size)
+{
+	size_t len, needed = 0;
+	struct dm_target_versions *vers;
+	struct vers_iter iter_info;
+
+	/*
+	 * Loop through all the devices working out how much
+	 * space we need.
+	 */
+	dm_target_iterate(list_version_get_needed, &needed);
+
+	/*
+	 * Grab our output buffer.
+	 */
+	vers = get_result_buffer(param, param_size, &len);
+	if (len < needed) {
+		param->flags |= DM_BUFFER_FULL_FLAG;
+		goto out;
+	}
+	param->data_size = param->data_start + needed;
+
+	iter_info.param_size = param_size;
+	iter_info.old_vers = NULL;
+	iter_info.vers = vers;
+	iter_info.flags = 0;
+	iter_info.end = (char *)vers+len;
+
+	/*
+	 * Now loop through filling out the names & versions.
+	 */
+	dm_target_iterate(list_version_get_info, &iter_info);
+	param->flags |= iter_info.flags;
+
+ out:
+	return 0;
+}
+
+
+
 static int check_name(const char *name)
 {
 	if (strchr(name, '/')) {
@@ -1038,7 +1120,9 @@ static ioctl_fn lookup_ioctl(unsigned in
 		{DM_TABLE_LOAD_CMD, table_load},
 		{DM_TABLE_CLEAR_CMD, table_clear},
 		{DM_TABLE_DEPS_CMD, table_deps},
-		{DM_TABLE_STATUS_CMD, table_status}
+		{DM_TABLE_STATUS_CMD, table_status},
+
+		{DM_LIST_VERSIONS_CMD, list_versions}
 	};
 
 	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
@@ -1112,7 +1196,9 @@ static int validate_params(uint cmd, str
 	param->flags &= ~DM_BUFFER_FULL_FLAG;
 
 	/* Ignores parameters */
-	if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
+	if (cmd == DM_REMOVE_ALL_CMD ||
+	    cmd == DM_LIST_DEVICES_CMD ||
+	    cmd == DM_LIST_VERSIONS_CMD)
 		return 0;
 
 	/* Unless creating, either name or uuid but not both */
--- diff/drivers/md/dm-linear.c	2004-02-18 15:16:23.000000000 +0000
+++ source/drivers/md/dm-linear.c	2004-02-18 15:44:06.000000000 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001 Sistina Software (UK) Limited.
+ * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
  *
  * This file is released under the GPL.
  */
@@ -97,6 +97,7 @@ static int linear_status(struct dm_targe
 
 static struct target_type linear_target = {
 	.name   = "linear",
+	.version= {1, 0, 1},
 	.module = THIS_MODULE,
 	.ctr    = linear_ctr,
 	.dtr    = linear_dtr,
--- diff/drivers/md/dm-stripe.c	2004-02-18 15:16:23.000000000 +0000
+++ source/drivers/md/dm-stripe.c	2004-02-18 15:44:06.000000000 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001 Sistina Software (UK) Limited.
+ * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
  *
  * This file is released under the GPL.
  */
@@ -212,6 +212,7 @@ static int stripe_status(struct dm_targe
 
 static struct target_type stripe_target = {
 	.name   = "striped",
+	.version= {1, 0, 1},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
--- diff/drivers/md/dm-target.c	2004-02-18 15:38:06.000000000 +0000
+++ source/drivers/md/dm-target.c	2004-02-18 15:44:06.000000000 +0000
@@ -96,6 +96,20 @@ static struct tt_internal *alloc_target(
 	return ti;
 }
 
+
+int dm_target_iterate(void (*iter_func)(struct target_type *tt,
+					void *param), void *param)
+{
+	struct tt_internal *ti;
+
+	down_read(&_lock);
+	list_for_each_entry (ti, &_targets, list)
+		iter_func(&ti->tt, param);
+	up_read(&_lock);
+
+	return 0;
+}
+
 int dm_register_target(struct target_type *t)
 {
 	int rv = 0;
@@ -161,6 +175,7 @@ static int io_err_map(struct dm_target *
 
 static struct target_type error_target = {
 	.name = "error",
+	.version = {1, 0, 1},
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
--- diff/drivers/md/dm.h	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/dm.h	2004-02-18 15:44:06.000000000 +0000
@@ -123,6 +123,8 @@ int dm_target_init(void);
 void dm_target_exit(void);
 struct target_type *dm_get_target_type(const char *name);
 void dm_put_target_type(struct target_type *t);
+int dm_target_iterate(void (*iter_func)(struct target_type *tt,
+					void *param), void *param);
 
 
 /*-----------------------------------------------------------------
--- diff/include/linux/device-mapper.h	2004-02-18 15:16:23.000000000 +0000
+++ source/include/linux/device-mapper.h	2004-02-18 15:44:06.000000000 +0000
@@ -74,6 +74,7 @@ void dm_put_device(struct dm_target *ti,
 struct target_type {
 	const char *name;
 	struct module *module;
+        unsigned version[3];
 	dm_ctr_fn ctr;
 	dm_dtr_fn dtr;
 	dm_map_fn map;
@@ -104,7 +105,7 @@ struct dm_target {
 	sector_t split_io;
 
 	/*
-	 * These are automaticall filled in by
+	 * These are automatically filled in by
 	 * dm_table_get_device.
 	 */
 	struct io_restrictions limits;
--- diff/include/linux/dm-ioctl.h	2003-08-20 14:16:15.000000000 +0100
+++ source/include/linux/dm-ioctl.h	2004-02-18 15:44:17.000000000 +0000
@@ -163,6 +163,16 @@ struct dm_name_list {
 };
 
 /*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+        uint32_t next;
+        uint32_t version[3];
+
+        char name[0];
+};
+
+/*
  * If you change this make sure you make the corresponding change
  * to dm-ioctl.c:lookup_ioctl()
  */
@@ -185,6 +195,9 @@ enum {
 	DM_TABLE_CLEAR_CMD,
 	DM_TABLE_DEPS_CMD,
 	DM_TABLE_STATUS_CMD,
+
+	/* Added later */
+	DM_LIST_VERSIONS_CMD,
 };
 
 #define DM_IOCTL 0xfd
@@ -205,10 +218,12 @@ enum {
 #define DM_TABLE_DEPS    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
 #define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
 
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	0
+#define DM_VERSION_MINOR	1
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2003-06-04)"
+#define DM_VERSION_EXTRA	"-ioctl (2003-12-10)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Patch 6/6] dm: multipath target
  2004-02-20 15:31 device-mapper patchset Joe Thornber
                   ` (4 preceding siblings ...)
  2004-02-20 15:37 ` [Patch 5/6] dm: list targets cmd Joe Thornber
@ 2004-02-20 15:37 ` Joe Thornber
  5 siblings, 0 replies; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:37 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Multipath target
--- diff/drivers/md/Kconfig	2004-02-18 15:25:08.000000000 +0000
+++ source/drivers/md/Kconfig	2004-02-18 15:49:35.000000000 +0000
@@ -180,5 +180,10 @@ config DM_CRYPT
 
 	  If unsure, say N.
 
-endmenu
+config DM_MULTIPATH
+       tristate "Multipath target (EXPERIMENTAL)"
+       depends on BLK_DEV_DM && EXPERIMENTAL
+       ---help---
+         Allow volume managers to support multipath hardware.
 
+endmenu
--- diff/drivers/md/Makefile	2004-02-18 15:15:13.000000000 +0000
+++ source/drivers/md/Makefile	2004-02-18 15:50:12.000000000 +0000
@@ -4,6 +4,7 @@
 
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
 		   dm-ioctl.o
+dm-multipath-objs := dm-path-selector.o dm-mpath.o
 raid6-objs	:= raid6main.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
 		   raid6int8.o raid6int16.o raid6int32.o \
@@ -24,6 +25,7 @@ obj-$(CONFIG_MD_MULTIPATH)	+= multipath.
 obj-$(CONFIG_BLK_DEV_MD)	+= md.o
 obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
 obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
+obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o
 
 quiet_cmd_unroll = UNROLL  $@
       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
--- diff/drivers/md/dm-mpath.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-mpath.c	2004-02-18 15:51:06.000000000 +0000
@@ -0,0 +1,729 @@
+/*
+ * Copyright (C) 2003 Sistina Software Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include "dm-path-selector.h"
+#include "dm-bio-list.h"
+
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/workqueue.h>
+#include <asm/atomic.h>
+
+/* FIXME: get rid of this */
+#define MPATH_FAIL_COUNT	1
+
+/*
+ * We don't want to call the path selector for every single io
+ * that comes through, so instead we only consider changing paths
+ * every MPATH_MIN_IO ios.  This number should be selected to be
+ * big enough that we can reduce the overhead of the path
+ * selector, but also small enough that we don't take the policy
+ * decision away from the path selector.
+ *
+ * So people should _not_ be tuning this number to try and get
+ * the most performance from some particular type of hardware.
+ * All the smarts should be going into the path selector.
+ */
+#define MPATH_MIN_IO		1000
+
+/* Path properties */
+struct path {
+	struct list_head list;
+
+	struct dm_dev *dev;
+	struct priority_group *pg;
+
+	spinlock_t failed_lock;
+	int has_failed;
+	unsigned fail_count;
+};
+
+struct priority_group {
+	struct list_head list;
+
+	unsigned priority;
+	struct multipath *m;
+	struct path_selector *ps;
+
+	unsigned nr_paths;
+	struct list_head paths;
+};
+
+/* Multipath context */
+struct multipath {
+	struct list_head list;
+	struct dm_target *ti;
+
+	unsigned nr_paths;
+	unsigned nr_priority_groups;
+	struct list_head priority_groups;
+
+	spinlock_t lock;
+	unsigned nr_valid_paths;
+
+	struct path *current_path;
+	unsigned current_count;
+
+	struct work_struct dispatch_failed;
+	struct bio_list failed_ios;
+
+	struct work_struct trigger_event;
+};
+
+static void dispatch_failed_ios(void *data);
+static void trigger_event(void *data);
+
+static struct path *alloc_path(void)
+{
+	struct path *path = kmalloc(sizeof(*path), GFP_KERNEL);
+
+	if (path) {
+		memset(path, 0, sizeof(*path));
+		path->failed_lock = SPIN_LOCK_UNLOCKED;
+		path->fail_count = MPATH_FAIL_COUNT;
+	}
+
+	return path;
+}
+
+static inline void free_path(struct path *p)
+{
+	kfree(p);
+}
+
+static struct priority_group *alloc_priority_group(void)
+{
+	struct priority_group *pg;
+
+	pg = kmalloc(sizeof(*pg), GFP_KERNEL);
+	if (!pg)
+		return NULL;
+
+	pg->ps = kmalloc(sizeof(*pg->ps), GFP_KERNEL);
+	if (!pg->ps) {
+		kfree(pg);
+		return NULL;
+	}
+	memset(pg->ps, 0, sizeof(*pg->ps));
+
+	INIT_LIST_HEAD(&pg->paths);
+
+	return pg;
+}
+
+static void free_paths(struct list_head *paths, struct dm_target *ti)
+{
+	struct path *path, *tmp;
+
+	list_for_each_entry_safe (path, tmp, paths, list) {
+		list_del(&path->list);
+		dm_put_device(ti, path->dev);
+		free_path(path);
+	}
+}
+
+static void free_priority_group(struct priority_group *pg,
+				struct dm_target *ti)
+{
+	struct path_selector *ps = pg->ps;
+
+	if (ps) {
+		if (ps->type) {
+			ps->type->dtr(ps);
+			dm_put_path_selector(ps->type);
+		}
+		kfree(ps);
+	}
+
+	free_paths(&pg->paths, ti);
+	kfree(pg);
+}
+
+static struct multipath *alloc_multipath(void)
+{
+	struct multipath *m;
+
+	m = kmalloc(sizeof(*m), GFP_KERNEL);
+	if (m) {
+		memset(m, 0, sizeof(*m));
+		INIT_LIST_HEAD(&m->priority_groups);
+		m->lock = SPIN_LOCK_UNLOCKED;
+		INIT_WORK(&m->dispatch_failed, dispatch_failed_ios, m);
+		INIT_WORK(&m->trigger_event, trigger_event, m);
+	}
+
+	return m;
+}
+
+static void free_multipath(struct multipath *m)
+{
+	struct priority_group *pg, *tmp;
+
+	list_for_each_entry_safe (pg, tmp, &m->priority_groups, list) {
+		list_del(&pg->list);
+		free_priority_group(pg, m->ti);
+	}
+
+	kfree(m);
+}
+
+/*-----------------------------------------------------------------
+ * The multipath daemon is responsible for resubmitting failed ios.
+ *---------------------------------------------------------------*/
+static struct workqueue_struct *_kmpathd_wq;
+
+static int __choose_path(struct multipath *m)
+{
+	struct priority_group *pg;
+	struct path *path = NULL;
+
+	if (m->nr_valid_paths) {
+		/* loop through the priority groups until we find a valid path. */
+		list_for_each_entry (pg, &m->priority_groups, list) {
+			path = pg->ps->type->select_path(pg->ps);
+			if (path)
+				break;
+		}
+	}
+
+	m->current_path = path;
+	m->current_count = MPATH_MIN_IO;
+
+	return 0;
+}
+
+static struct path *get_current_path(struct multipath *m)
+{
+	struct path *path;
+	unsigned long flags;
+
+	spin_lock_irqsave(&m->lock, flags);
+
+	/* Do we need to select a new path? */
+	if (!m->current_path || --m->current_count == 0)
+		__choose_path(m);
+
+	path = m->current_path;
+
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	return path;
+}
+
+static int map_io(struct multipath *m, struct bio *bio)
+{
+	struct path *path;
+
+	path = get_current_path(m);
+	if (!path)
+		return -EIO;
+
+	bio->bi_bdev = path->dev->bdev;
+	return 0;
+}
+
+static void dispatch_failed_ios(void *data)
+{
+	struct multipath *m = (struct multipath *) data;
+
+	int r;
+	unsigned long flags;
+	struct bio *bio = NULL, *next;
+
+	spin_lock_irqsave(&m->lock, flags);
+	bio = bio_list_get(&m->failed_ios);
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	while (bio) {
+		next = bio->bi_next;
+		bio->bi_next = NULL;
+
+		r = map_io(m, bio);
+		if (r)
+			/*
+			 * This wont loop forever because the
+			 * end_io function will fail the ios if
+			 * we've no valid paths left.
+			 */
+			bio_io_error(bio, bio->bi_size);
+		else
+			generic_make_request(bio);
+
+		bio = next;
+	}
+
+	/*
+	 * FIXME: this now gets called once for each mpath
+	 * target, rather than once per daemon cycle.
+	 */
+ 	blk_run_queues();
+}
+
+static void trigger_event(void *data)
+{
+	struct multipath *m = (struct multipath *) data;
+	dm_table_event(m->ti->table);
+}
+
+/*-----------------------------------------------------------------
+ * Constructor/argument parsing:
+ * <poll interval> <num priority groups> [<priority> <selector>
+ * <num selector args> <num paths> [<path> [<arg>]* ]+ ]+
+ *---------------------------------------------------------------*/
+struct param {
+	unsigned min;
+	unsigned max;
+	char *error;
+};
+
+#define ESTR(s) ("dm-multipath: " s)
+
+static int read_param(struct param *param, char *str, unsigned *v, char **error)
+{
+	if (!str ||
+	    (sscanf(str, "%u", v) != 1) ||
+	    (*v < param->min) ||
+	    (*v > param->max)) {
+		*error = param->error;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct arg_set {
+	unsigned argc;
+	char **argv;
+};
+
+static char *shift(struct arg_set *as)
+{
+	char *r;
+
+	if (as->argc) {
+		as->argc--;
+		r = *as->argv;
+		as->argv++;
+		return r;
+	}
+
+	return NULL;
+}
+
+static void consume(struct arg_set *as, unsigned n)
+{
+	BUG_ON (as->argc < n);
+	as->argc -= n;
+	as->argv += n;
+}
+
+static struct path *parse_path(struct arg_set *as, struct path_selector *ps,
+			       struct dm_target *ti)
+{
+	int r;
+	struct path *p;
+
+	/* we need at least a path arg */
+	if (as->argc < 1) {
+		ti->error = ESTR("no device given");
+		return NULL;
+	}
+
+	p = alloc_path();
+	if (!p)
+		return NULL;
+
+	r = dm_get_device(ti, shift(as), ti->begin, ti->len,
+			  dm_table_get_mode(ti->table), &p->dev);
+	if (r) {
+		ti->error = ESTR("error getting device");
+		goto bad;
+	}
+
+	r = ps->type->add_path(ps, p, as->argc, as->argv, &ti->error);
+	if (r) {
+		dm_put_device(ti, p->dev);
+		goto bad;
+	}
+
+	return p;
+
+ bad:
+	free_path(p);
+	return NULL;
+}
+
+static struct priority_group *parse_priority_group(struct arg_set *as,
+						   struct multipath *m,
+						   struct dm_target *ti)
+{
+	static struct param _params[] = {
+		{0, 1024, ESTR("invalid priority")},
+		{1, 1024, ESTR("invalid number of paths")},
+		{0, 1024, ESTR("invalid number of selector args")}
+	};
+
+	int r;
+	unsigned i, nr_selector_args, nr_params;
+	struct priority_group *pg;
+	struct path_selector_type *pst;
+
+	if (as->argc < 3) {
+		as->argc = 0;
+		ti->error = ESTR("not enough priority group aruments");
+		return NULL;
+	}
+
+	pg = alloc_priority_group();
+	if (!pg) {
+		ti->error = ESTR("couldn't allocate priority group");
+		return NULL;
+	}
+
+	r = read_param(_params, shift(as), &pg->priority, &ti->error);
+	if (r)
+		goto bad;
+
+	pst = dm_get_path_selector(shift(as));
+	if (!pst) {
+		ti->error = ESTR("unknown path selector type");
+		goto bad;
+	}
+
+	r = pst->ctr(pg->ps);
+	if (r) {
+		/* FIXME: need to put the pst ? fix after
+		 * factoring out the register */
+		goto bad;
+	}
+	pg->ps->type = pst;
+
+	/*
+	 * read the paths
+	 */
+	r = read_param(_params + 1, shift(as), &pg->nr_paths, &ti->error);
+	if (r)
+		goto bad;
+
+	r = read_param(_params + 2, shift(as), &nr_selector_args, &ti->error);
+	if (r)
+		goto bad;
+
+	nr_params = 1 + nr_selector_args;
+	for (i = 0; i < pg->nr_paths; i++) {
+		struct path *path;
+		struct arg_set path_args;
+
+		if (as->argc < nr_params)
+			goto bad;
+
+		path_args.argc = nr_params;
+		path_args.argv = as->argv;
+
+		path = parse_path(&path_args, pg->ps, ti);
+		if (!path)
+			goto bad;
+
+		path->pg = pg;
+		list_add_tail(&path->list, &pg->paths);
+		consume(as, nr_params);
+	}
+
+	return pg;
+
+ bad:
+	free_priority_group(pg, ti);
+	return NULL;
+}
+
+static void __insert_priority_group(struct multipath *m,
+				    struct priority_group *pg)
+{
+	struct priority_group *tmp;
+
+	list_for_each_entry (tmp, &m->priority_groups, list)
+		if (tmp->priority > pg->priority)
+			break;
+
+	list_add_tail(&pg->list, &tmp->list);
+	pg->m = m;
+}
+
+static int multipath_ctr(struct dm_target *ti, unsigned int argc,
+			 char **argv)
+{
+	/* target parameters */
+	static struct param _params[] = {
+		{1, 1024, ESTR("invalid number of priority groups")},
+	};
+
+	int r;
+	struct multipath *m;
+	struct arg_set as;
+
+	as.argc = argc;
+	as.argv = argv;
+
+	m = alloc_multipath();
+	if (!m) {
+		ti->error = ESTR("can't allocate multipath");
+		return -EINVAL;
+	}
+
+	r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
+	if (r)
+		goto bad;
+
+	/* parse the priority groups */
+	while (as.argc) {
+		struct priority_group *pg;
+		pg = parse_priority_group(&as, m, ti);
+		if (pg) {
+			m->nr_paths += pg->nr_paths;
+			__insert_priority_group(m, pg);
+		}
+	}
+	m->nr_valid_paths = m->nr_paths;
+
+	ti->private = m;
+	m->ti = ti;
+
+	return 0;
+
+ bad:
+	free_multipath(m);
+	return -EINVAL;
+}
+
+static void multipath_dtr(struct dm_target *ti)
+{
+	struct multipath *m = (struct multipath *) ti->private;
+	free_multipath(m);
+}
+
+static int multipath_map(struct dm_target *ti, struct bio *bio,
+			 union map_info *map_context)
+{
+	int r;
+	struct multipath *m = (struct multipath *) ti->private;
+
+	bio->bi_rw |= (1 << BIO_RW_FAILFAST);
+	r = map_io(m, bio);
+	if (r)
+		return r;
+
+	return 1;
+}
+
+/*
+ * Only called on the error path.
+ */
+static struct path *find_path(struct multipath *m, struct block_device *bdev)
+{
+	struct path *p;
+	struct priority_group *pg;
+
+	list_for_each_entry (pg, &m->priority_groups, list)
+		list_for_each_entry (p, &pg->paths, list)
+			if (p->dev->bdev == bdev)
+				return p;
+
+	return NULL;
+}
+
+static void fail_path(struct path *path)
+{
+	unsigned long flags;
+	struct multipath *m;
+
+	spin_lock_irqsave(&path->failed_lock, flags);
+
+	/* FIXME: path->fail_count is brain dead */
+	if (!path->has_failed && !--path->fail_count) {
+		m = path->pg->m;
+
+		path->has_failed = 1;
+		path->pg->ps->type->fail_path(path->pg->ps, path);
+		queue_work(_kmpathd_wq, &m->trigger_event);
+
+		spin_lock(&m->lock);
+		m->nr_valid_paths--;
+
+		if (path == m->current_path)
+			m->current_path = NULL;
+
+		spin_unlock(&m->lock);
+	}
+
+	spin_unlock_irqrestore(&path->failed_lock, flags);
+}
+
+static int multipath_end_io(struct dm_target *ti, struct bio *bio,
+			    int error, union map_info *map_context)
+{
+	struct path *path;
+	struct multipath *m = (struct multipath *) ti->private;
+
+	if (error) {
+		spin_lock(&m->lock);
+		if (!m->nr_valid_paths) {
+			spin_unlock(&m->lock);
+			return -EIO;
+		}
+		spin_unlock(&m->lock);
+
+		path = find_path(m, bio->bi_bdev);
+		fail_path(path);
+
+		/* queue for the daemon to resubmit */
+		spin_lock(&m->lock);
+		bio_list_add(&m->failed_ios, bio);
+		spin_unlock(&m->lock);
+
+		queue_work(_kmpathd_wq, &m->dispatch_failed);
+		return 1;	/* io not complete */
+	}
+
+	return 0;
+}
+
+/*
+ * Info string has the following format:
+ * num_groups [num_paths num_selector_args [path_dev A|F fail_count [selector_args]* ]+ ]+
+ *
+ * Table string has the following format (identical to the constructor string):
+ * num_groups [priority selector-name num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
+ */
+static int multipath_status(struct dm_target *ti, status_type_t type,
+			    char *result, unsigned int maxlen)
+{
+	int sz = 0;
+	unsigned long flags;
+	struct multipath *m = (struct multipath *) ti->private;
+	struct priority_group *pg;
+	struct path *p;
+	char buffer[32];
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		sz += snprintf(result + sz, maxlen - sz, "%u ", m->nr_priority_groups);
+
+		list_for_each_entry(pg, &m->priority_groups, list) {
+			sz += snprintf(result + sz, maxlen - sz, "%u %u ",
+                                       pg->nr_paths,
+				       pg->ps->type->info_args);
+
+			list_for_each_entry(p, &pg->paths, list) {
+				format_dev_t(buffer, p->dev->bdev->bd_dev);
+				spin_lock_irqsave(&p->failed_lock, flags);
+				sz += snprintf(result + sz, maxlen - sz,
+					       "%s %s %u ", buffer,
+					       p->has_failed ? "F" : "A",
+					       p->fail_count);
+				pg->ps->type->status(pg->ps, p, type,
+						     result + sz, maxlen - sz);
+				spin_unlock_irqrestore(&p->failed_lock, flags);
+
+				sz = strlen(result);
+				if (sz >= maxlen)
+					break;
+			}
+		}
+
+		break;
+
+	case STATUSTYPE_TABLE:
+		sz += snprintf(result + sz, maxlen - sz, "%u ", m->nr_priority_groups);
+
+		list_for_each_entry(pg, &m->priority_groups, list) {
+			sz += snprintf(result + sz, maxlen - sz, "%u %s %u %u ",
+				       pg->priority, pg->ps->type->name,
+				       pg->nr_paths, pg->ps->type->table_args);
+
+			list_for_each_entry(p, &pg->paths, list) {
+				format_dev_t(buffer, p->dev->bdev->bd_dev);
+				sz += snprintf(result + sz, maxlen - sz,
+					       "%s ", buffer);
+				pg->ps->type->status(pg->ps, p, type,
+						     result + sz, maxlen - sz);
+
+				sz = strlen(result);
+				if (sz >= maxlen)
+					break;
+			}
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Module setup
+ *---------------------------------------------------------------*/
+static struct target_type multipath_target = {
+	.name = "multipath",
+	.version = {1, 0, 2},
+	.module = THIS_MODULE,
+	.ctr = multipath_ctr,
+	.dtr = multipath_dtr,
+	.map = multipath_map,
+	.end_io = multipath_end_io,
+	.status = multipath_status,
+};
+
+int __init dm_multipath_init(void)
+{
+	int r;
+
+	r = dm_register_target(&multipath_target);
+	if (r < 0) {
+		DMERR("%s: register failed %d", multipath_target.name, r);
+		return -EINVAL;
+	}
+
+	r = dm_register_path_selectors();
+	if (r && r != -EEXIST) {
+		dm_unregister_target(&multipath_target);
+		return r;
+	}
+
+	_kmpathd_wq = create_workqueue("dm-mpath");
+	if (!_kmpathd_wq) {
+		/* FIXME: remove this */
+		dm_unregister_path_selectors();
+		dm_unregister_target(&multipath_target);
+	} else
+		DMINFO("dm_multipath v0.2.0");
+
+	return r;
+}
+
+void __exit dm_multipath_exit(void)
+{
+	int r;
+
+	destroy_workqueue(_kmpathd_wq);
+	dm_unregister_path_selectors();
+	r = dm_unregister_target(&multipath_target);
+	if (r < 0)
+		DMERR("%s: target unregister failed %d",
+		      multipath_target.name, r);
+}
+
+module_init(dm_multipath_init);
+module_exit(dm_multipath_exit);
+
+MODULE_DESCRIPTION(DM_NAME " multipath target");
+MODULE_AUTHOR("Sistina software <dm@uk.sistina.com>");
+MODULE_LICENSE("GPL");
--- diff/drivers/md/dm-path-selector.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-path-selector.c	2004-02-18 15:54:59.000000000 +0000
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2003 Sistina Software.
+ *
+ * Module Author: Heinz Mauelshagen
+ *
+ * This file is released under the GPL.
+ *
+ * Path selector housekeeping (register/unregister/...)
+ */
+
+#include "dm.h"
+#include "dm-path-selector.h"
+
+#include <linux/slab.h>
+
+struct ps_internal {
+	struct path_selector_type pt;
+
+	struct list_head list;
+	long use;
+};
+
+static LIST_HEAD(_path_selectors);
+static DECLARE_MUTEX(_lock);
+
+struct path_selector_type *__find_path_selector_type(const char *name)
+{
+	struct ps_internal *li;
+
+	list_for_each_entry (li, &_path_selectors, list) {
+		if (!strcmp(name, li->pt.name))
+			return &li->pt;
+	}
+
+	return NULL;
+}
+
+struct path_selector_type *dm_get_path_selector(const char *name)
+{
+	struct path_selector_type *lb;
+
+	if (!name)
+		return NULL;
+
+	down(&_lock);
+	lb = __find_path_selector_type(name);
+	if (lb) {
+		struct ps_internal *li = (struct ps_internal *) lb;
+		li->use++;
+	}
+	up(&_lock);
+
+	return lb;
+}
+
+void dm_put_path_selector(struct path_selector_type *l)
+{
+	struct ps_internal *li = (struct ps_internal *) l;
+
+	down(&_lock);
+	if (--li->use < 0)
+		BUG();
+	up(&_lock);
+
+	return;
+}
+
+static struct ps_internal *_alloc_path_selector(struct path_selector_type *pt)
+{
+	struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL);
+
+	if (psi) {
+		memset(psi, 0, sizeof(*psi));
+		memcpy(psi, pt, sizeof(*pt));
+	}
+
+	return psi;
+}
+
+int dm_register_path_selector(struct path_selector_type *pst)
+{
+	int r = 0;
+	struct ps_internal *psi = _alloc_path_selector(pst);
+
+	if (!psi)
+		return -ENOMEM;
+
+	down(&_lock);
+	if (__find_path_selector_type(pst->name)) {
+		kfree(psi);
+		r = -EEXIST;
+	} else
+		list_add(&psi->list, &_path_selectors);
+
+	up(&_lock);
+
+	return r;
+}
+
+int dm_unregister_path_selector(struct path_selector_type *pst)
+{
+	struct ps_internal *psi;
+
+	down(&_lock);
+	psi = (struct ps_internal *) __find_path_selector_type(pst->name);
+	if (!psi) {
+		up(&_lock);
+		return -EINVAL;
+	}
+
+	if (psi->use) {
+		up(&_lock);
+		return -ETXTBSY;
+	}
+
+	list_del(&psi->list);
+	up(&_lock);
+
+	kfree(psi);
+
+	return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Path handling code, paths are held in lists
+ *---------------------------------------------------------------*/
+struct path_info {
+	struct list_head list;
+	struct path *path;
+};
+
+static struct path_info *path_lookup(struct list_head *head, struct path *p)
+{
+	struct path_info *pi;
+
+	list_for_each_entry (pi, head, list)
+		if (pi->path == p)
+			return pi;
+
+	return NULL;
+}
+
+/*-----------------------------------------------------------------
+ * Round robin selector
+ *---------------------------------------------------------------*/
+struct selector {
+	spinlock_t lock;
+
+	struct list_head valid_paths;
+	struct list_head invalid_paths;
+};
+
+static struct selector *alloc_selector(void)
+{
+	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (s) {
+		INIT_LIST_HEAD(&s->valid_paths);
+		INIT_LIST_HEAD(&s->invalid_paths);
+		s->lock = SPIN_LOCK_UNLOCKED;
+	}
+
+	return s;
+}
+
+/* Path selector constructor */
+static int rr_ctr(struct path_selector *ps)
+{
+	struct selector *s;
+
+	s = alloc_selector();
+	if (!s)
+		return -ENOMEM;
+
+	ps->context = s;
+	return 0;
+}
+
+static void free_paths(struct list_head *paths)
+{
+	struct path_info *pi, *next;
+
+	list_for_each_entry_safe (pi, next, paths, list) {
+		list_del(&pi->list);
+		kfree(pi);
+	}
+}
+
+/* Path selector destructor */
+static void rr_dtr(struct path_selector *ps)
+{
+	struct selector *s = (struct selector *) ps->context;
+	free_paths(&s->valid_paths);
+	free_paths(&s->invalid_paths);
+	kfree(s);
+}
+
+/* Path add context */
+static int rr_add_path(struct path_selector *ps, struct path *path,
+		       int argc, char **argv, char **error)
+{
+	struct selector *s = (struct selector *) ps->context;
+	struct path_info *pi;
+
+	/* parse the path arguments */
+	if (argc != 0) {
+		*error = "round-robin ps: incorrect number of arguments";
+		return -EINVAL;
+	}
+
+	/* allocate the path */
+	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
+	if (!pi) {
+		*error = "round-robin ps: Error allocating path context";
+		return -ENOMEM;
+	}
+
+	pi->path = path;
+
+	spin_lock(&s->lock);
+	list_add(&pi->list, &s->valid_paths);
+	spin_unlock(&s->lock);
+
+	return 0;
+}
+
+static void rr_fail_path(struct path_selector *ps, struct path *p)
+{
+	unsigned long flags;
+	struct selector *s = (struct selector *) ps->context;
+	struct path_info *pi;
+
+	/*
+	 * This function will be called infrequently so we don't
+	 * mind the expense of these searches.
+	 */
+	spin_lock_irqsave(&s->lock, flags);
+	pi = path_lookup(&s->valid_paths, p);
+	if (!pi)
+		pi = path_lookup(&s->invalid_paths, p);
+
+	if (!pi)
+		DMWARN("asked to change the state of an unknown path");
+
+	else
+		list_move(&pi->list, &s->invalid_paths);
+
+	spin_unlock_irqrestore(&s->lock, flags);
+}
+
+/* Path selector */
+static struct path *rr_select_path(struct path_selector *ps)
+{
+	unsigned long flags;
+	struct selector *s = (struct selector *) ps->context;
+	struct path_info *pi = NULL;
+
+	spin_lock_irqsave(&s->lock, flags);
+	if (!list_empty(&s->valid_paths)) {
+		pi = list_entry(s->valid_paths.next, struct path_info, list);
+		list_move_tail(&pi->list, &s->valid_paths);
+	}
+	spin_unlock_irqrestore(&s->lock, flags);
+
+	return pi ? pi->path : NULL;
+}
+
+/* Path status */
+static int rr_status(struct path_selector *ps, struct path *path,
+		     status_type_t type, char *result, unsigned int maxlen)
+{
+	return 0;
+}
+
+static struct path_selector_type rr_ps = {
+	.name = "round-robin",
+	.table_args = 0,
+	.info_args = 0,
+	.ctr = rr_ctr,
+	.dtr = rr_dtr,
+	.add_path = rr_add_path,
+	.fail_path = rr_fail_path,
+	.select_path = rr_select_path,
+	.status = rr_status,
+};
+
+/*
+ * (Un)register all path selectors (FIXME: remove this after tests)
+ */
+int dm_register_path_selectors(void)
+{
+	return dm_register_path_selector(&rr_ps);
+}
+
+void dm_unregister_path_selectors(void)
+{
+	dm_unregister_path_selector(&rr_ps);
+}
--- diff/drivers/md/dm-path-selector.h	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-path-selector.h	2004-02-18 15:53:24.000000000 +0000
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2003 Sistina Software.
+ *
+ * Module Author: Heinz Mauelshagen
+ *
+ * This file is released under the GPL.
+ *
+ * Path-Selector interface/registration/unregistration definitions
+ *
+ */
+
+#ifndef	DM_PATH_SELECTOR_H
+#define	DM_PATH_SELECTOR_H
+
+#include <linux/device-mapper.h>
+
+struct path;
+
+/*
+ * We provide an abstraction for the code that chooses which path
+ * to send some io down.
+ */
+struct path_selector_type;
+struct path_selector {
+	struct path_selector_type *type;
+	void *context;
+};
+
+/*
+ * Constructs a path selector object, takes custom arguments
+ */
+typedef int (*ps_ctr_fn) (struct path_selector *ps);
+typedef void (*ps_dtr_fn) (struct path_selector *ps);
+
+/*
+ * Add an opaque path object, along with some selector specific
+ * path args (eg, path priority).
+ */
+typedef	int (*ps_add_path_fn) (struct path_selector *ps,
+			       struct path *path,
+			       int argc, char **argv, char **error);
+
+/*
+ * Chooses a path for this io, if no paths are available then
+ * NULL will be returned. The selector may set the map_info
+ * object if it wishes, this will be fed back into the endio fn.
+ *
+ * Must ensure that _any_ dynamically allocated selection context is
+ * reused or reallocated because an endio call (which needs to free it)
+ * might happen after a couple of select calls.
+ */
+typedef	struct path *(*ps_select_path_fn) (struct path_selector *ps);
+
+/*
+ * Notify the selector that a path has failed.
+ */
+typedef	void (*ps_fail_path_fn) (struct path_selector *ps,
+				 struct path *p);
+
+/*
+ * Table content based on parameters added in ps_add_path_fn
+ * or path selector status
+ */
+typedef	int (*ps_status_fn) (struct path_selector *ps,
+			     struct path *path,
+			     status_type_t type,
+			     char *result, unsigned int maxlen);
+
+/* Information about a path selector type */
+struct path_selector_type {
+	char *name;
+	unsigned int table_args;
+	unsigned int info_args;
+	ps_ctr_fn ctr;
+	ps_dtr_fn dtr;
+
+	ps_add_path_fn add_path;
+	ps_fail_path_fn fail_path;
+	ps_select_path_fn select_path;
+	ps_status_fn status;
+};
+
+/*
+ * FIXME: Factor out registration code.
+ */
+
+/* Register a path selector */
+int dm_register_path_selector(struct path_selector_type *type);
+
+/* Unregister a path selector */
+int dm_unregister_path_selector(struct path_selector_type *type);
+
+/* Returns a registered path selector type */
+struct path_selector_type *dm_get_path_selector(const char *name);
+
+/* Releases a path selector  */
+void dm_put_path_selector(struct path_selector_type *pst);
+
+/* FIXME: remove these */
+int dm_register_path_selectors(void);
+void dm_unregister_path_selectors(void);
+
+#endif

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 4/6] dm: default queue limits
  2004-02-20 15:36 ` [Patch 4/6] dm: default queue limits Joe Thornber
@ 2004-02-20 15:39   ` Joe Thornber
  0 siblings, 0 replies; 17+ messages in thread
From: Joe Thornber @ 2004-02-20 15:39 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Fill in missing queue limitations when table is complete instead of
enforcing the "default" limits on every dm device.
Problem noticed by Mike Christie.

[Christophe Saout]
--- diff/drivers/md/dm-table.c	2004-02-18 15:38:06.000000000 +0000
+++ source/drivers/md/dm-table.c	2004-02-18 15:40:07.000000000 +0000
@@ -629,14 +629,20 @@ static int split_args(int *argc, char **
 	return 0;
 }
 
-static void set_default_limits(struct io_restrictions *rs)
+static void check_for_valid_limits(struct io_restrictions *rs)
 {
-	rs->max_sectors = MAX_SECTORS;
-	rs->max_phys_segments = MAX_PHYS_SEGMENTS;
-	rs->max_hw_segments = MAX_HW_SEGMENTS;
-	rs->hardsect_size = 1 << SECTOR_SHIFT;
-	rs->max_segment_size = MAX_SEGMENT_SIZE;
-	rs->seg_boundary_mask = -1;
+	if (!rs->max_sectors)
+		rs->max_sectors = MAX_SECTORS;
+	if (!rs->max_phys_segments)
+		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
+	if (!rs->max_hw_segments)
+		rs->max_hw_segments = MAX_HW_SEGMENTS;
+	if (!rs->hardsect_size)
+		rs->hardsect_size = 1 << SECTOR_SHIFT;
+	if (!rs->max_segment_size)
+		rs->max_segment_size = MAX_SEGMENT_SIZE;
+	if (!rs->seg_boundary_mask)
+		rs->seg_boundary_mask = -1;
 }
 
 int dm_table_add_target(struct dm_table *t, const char *type,
@@ -651,7 +657,6 @@ int dm_table_add_target(struct dm_table 
 
 	tgt = t->targets + t->num_targets;
 	memset(tgt, 0, sizeof(*tgt));
-	set_default_limits(&tgt->limits);
 
 	if (!len) {
 		tgt->error = "zero-length target";
@@ -736,6 +741,8 @@ int dm_table_complete(struct dm_table *t
 	int r = 0;
 	unsigned int leaf_nodes;
 
+	check_for_valid_limits(&t->limits);
+
 	/* how many indexes will the btree have ? */
 	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
 	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 2/6] dm: remove v1 ioctl interface
  2004-02-20 15:34 ` [Patch 2/6] dm: remove v1 ioctl interface Joe Thornber
@ 2004-02-20 20:18   ` Andreas Jellinghaus
  2004-02-21  6:15   ` Andrew Morton
  1 sibling, 0 replies; 17+ messages in thread
From: Andreas Jellinghaus @ 2004-02-20 20:18 UTC (permalink / raw)
  To: linux-kernel

On Fri, 20 Feb 2004 15:47:33 +0000, Joe Thornber wrote:

> Remove the version-1 ioctl interface.
> 
> --- diff/drivers/md/dm-ioctl.c	2003-08-20 14:16:09.000000000 +0100
> +++ source/drivers/md/dm-ioctl.c	2004-02-18 15:23:23.000000000 +0000
> @@ -1,13 +1,1264 @@
>  /*
> - * Copyright (C) 2003 Sistina Software (UK) Limited.
> + * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.

new code, old copyright?


Andreas


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 2/6] dm: remove v1 ioctl interface
  2004-02-20 15:34 ` [Patch 2/6] dm: remove v1 ioctl interface Joe Thornber
  2004-02-20 20:18   ` Andreas Jellinghaus
@ 2004-02-21  6:15   ` Andrew Morton
  1 sibling, 0 replies; 17+ messages in thread
From: Andrew Morton @ 2004-02-21  6:15 UTC (permalink / raw)
  To: Joe Thornber; +Cc: linux-kernel

Joe Thornber <thornber@redhat.com> wrote:
>
>  Remove the version-1 ioctl interface.

This breaks the build on 64-bit machines.




 include/linux/compat_ioctl.h |    7 -------
 1 files changed, 7 deletions(-)

diff -puN include/linux/compat_ioctl.h~dm-02-compat_ioctl-fix include/linux/compat_ioctl.h
--- 25-power4/include/linux/compat_ioctl.h~dm-02-compat_ioctl-fix	2004-02-20 22:12:40.000000000 -0800
+++ 25-power4-akpm/include/linux/compat_ioctl.h	2004-02-20 22:12:40.000000000 -0800
@@ -140,13 +140,6 @@ COMPATIBLE_IOCTL(DM_VERSION)
 COMPATIBLE_IOCTL(DM_REMOVE_ALL)
 COMPATIBLE_IOCTL(DM_DEV_CREATE)
 COMPATIBLE_IOCTL(DM_DEV_REMOVE)
-COMPATIBLE_IOCTL(DM_DEV_RELOAD)
-COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
-COMPATIBLE_IOCTL(DM_DEV_RENAME)
-COMPATIBLE_IOCTL(DM_DEV_DEPS)
-COMPATIBLE_IOCTL(DM_DEV_STATUS)
-COMPATIBLE_IOCTL(DM_TARGET_STATUS)
-COMPATIBLE_IOCTL(DM_TARGET_WAIT)
 #endif
 /* Big K */
 COMPATIBLE_IOCTL(PIO_FONT)

_


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 5/6] dm: list targets cmd
  2004-02-20 15:37 ` [Patch 5/6] dm: list targets cmd Joe Thornber
@ 2004-02-21  6:17   ` Andrew Morton
  0 siblings, 0 replies; 17+ messages in thread
From: Andrew Morton @ 2004-02-21  6:17 UTC (permalink / raw)
  To: Joe Thornber; +Cc: linux-kernel

Joe Thornber <thornber@redhat.com> wrote:
>
> List targets ioctl.  [Patrick Caulfield]
>  
> ...

> +#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)

Does this not need 64-bit emulation support?

Has it been tested on a 64-bit system?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 1/6] dm: endio method
  2004-02-20 15:34 ` [Patch 1/6] dm: endio method Joe Thornber
@ 2004-02-21  9:58   ` Mike Christie
  2004-02-21 10:44     ` Christophe Saout
  2004-02-23 10:05     ` Joe Thornber
  0 siblings, 2 replies; 17+ messages in thread
From: Mike Christie @ 2004-02-21  9:58 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Joe Thornber wrote:

> Add an endio method to targets.  This method is allowed to request
> another shot at failed ios (think multipath). 


> +	if (endio) {
> +		/* Restore bio fields. */
> +		bio->bi_sector = tio->bi_sector;
> +		bio->bi_bdev = tio->bi_bdev;
> +		bio->bi_size = tio->bi_size;
> +		bio->bi_idx = tio->bi_idx;
> +
> +		r = endio(tio->ti, bio, error, &tio->info);


> +	r = ti->type->map(ti, clone, &tio->info);
> +	if (r > 0) {
> +		/* Save the bio info so we can restore it during endio. */
> +		tio->bi_sector = clone->bi_sector;
> +		tio->bi_bdev = clone->bi_bdev;
> +		tio->bi_size = clone->bi_size;
> +		tio->bi_idx = clone->bi_idx;


Saving and restoring bi_bdev is going to break multipath. When a bio is 
remapped and resent multiple times by the target becuase of multiple 
path failures, restoring bi_bdev to the original value will cause only 
that path to be marked as failed instead of the paths that the bio was 
remapped to.

This is DM's cloned bio. Is there a guarantee that this value should be 
safe from lower level drivers overwriting it, or is it similar to b_rdev 
for buffer_heads?

Mike Christie

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 1/6] dm: endio method
  2004-02-21  9:58   ` Mike Christie
@ 2004-02-21 10:44     ` Christophe Saout
  2004-02-23 10:05     ` Joe Thornber
  1 sibling, 0 replies; 17+ messages in thread
From: Christophe Saout @ 2004-02-21 10:44 UTC (permalink / raw)
  To: Mike Christie; +Cc: Joe Thornber, Andrew Morton, Linux Mailing List

Am Sa, den 21.02.2004 schrieb Mike Christie um 10:58:

> This is DM's cloned bio. Is there a guarantee that this value should be 
> safe from lower level drivers overwriting it, or is it similar to b_rdev 
> for buffer_heads?

The block layer is allowed to remap the device. This is done for
partitions, e.g. hda5 -> hda + sector number change
(blk_partition_remap).



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 1/6] dm: endio method
  2004-02-21  9:58   ` Mike Christie
  2004-02-21 10:44     ` Christophe Saout
@ 2004-02-23 10:05     ` Joe Thornber
  2004-02-23 22:08       ` Mike Christie
  1 sibling, 1 reply; 17+ messages in thread
From: Joe Thornber @ 2004-02-23 10:05 UTC (permalink / raw)
  To: Mike Christie; +Cc: Joe Thornber, Andrew Morton, Linux Mailing List

Mike,

On Sat, Feb 21, 2004 at 01:58:38AM -0800, Mike Christie wrote:
> Saving and restoring bi_bdev is going to break multipath.

Yes, we'll have to fall back to plan A and use the map_context pointer
to hold the path being used (attached patch for illustration only).  I
had been hoping we could keep the map_context unused so that we could
allow the path selectors to use it.  I should have spotted this.

I'll also move the failed bio remap back to mpath_end_io(), so that
the context can be reused there (it moved to the daemon when we were
trying to do path testing in the kernel).

- Joe


--- diff/drivers/md/dm-mpath.c	2004-02-18 15:51:06.000000000 +0000
+++ source/drivers/md/dm-mpath.c	2004-02-23 09:58:31.000000000 +0000
@@ -219,7 +219,7 @@ static struct path *get_current_path(str
 	return path;
 }
 
-static int map_io(struct multipath *m, struct bio *bio)
+static int map_io(struct multipath *m, struct bio *bio, union map_info *map_context)
 {
 	struct path *path;
 
@@ -228,6 +228,7 @@ static int map_io(struct multipath *m, s
 		return -EIO;
 
 	bio->bi_bdev = path->dev->bdev;
+	map_context->ptr = path;
 	return 0;
 }
 
@@ -517,29 +518,13 @@ static int multipath_map(struct dm_targe
 	struct multipath *m = (struct multipath *) ti->private;
 
 	bio->bi_rw |= (1 << BIO_RW_FAILFAST);
-	r = map_io(m, bio);
+	r = map_io(m, bio, map_context);
 	if (r)
 		return r;
 
 	return 1;
 }
 
-/*
- * Only called on the error path.
- */
-static struct path *find_path(struct multipath *m, struct block_device *bdev)
-{
-	struct path *p;
-	struct priority_group *pg;
-
-	list_for_each_entry (pg, &m->priority_groups, list)
-		list_for_each_entry (p, &pg->paths, list)
-			if (p->dev->bdev == bdev)
-				return p;
-
-	return NULL;
-}
-
 static void fail_path(struct path *path)
 {
 	unsigned long flags;
@@ -570,8 +555,8 @@ static void fail_path(struct path *path)
 static int multipath_end_io(struct dm_target *ti, struct bio *bio,
 			    int error, union map_info *map_context)
 {
-	struct path *path;
 	struct multipath *m = (struct multipath *) ti->private;
+	struct path *path = (struct path *) map_context->ptr;
 
 	if (error) {
 		spin_lock(&m->lock);
@@ -581,7 +566,6 @@ static int multipath_end_io(struct dm_ta
 		}
 		spin_unlock(&m->lock);
 
-		path = find_path(m, bio->bi_bdev);
 		fail_path(path);
 
 		/* queue for the daemon to resubmit */

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 1/6] dm: endio method
  2004-02-23 10:05     ` Joe Thornber
@ 2004-02-23 22:08       ` Mike Christie
  2004-02-23 22:29         ` Joe Thornber
  0 siblings, 1 reply; 17+ messages in thread
From: Mike Christie @ 2004-02-23 22:08 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Hey Joe,

Joe Thornber wrote:

> Mike,
> 
> On Sat, Feb 21, 2004 at 01:58:38AM -0800, Mike Christie wrote:
> 
>>Saving and restoring bi_bdev is going to break multipath.
> 
> 
> Yes, we'll have to fall back to plan A and use the map_context pointer
> to hold the path being used (attached patch for illustration only).  I
> had been hoping we could keep the map_context unused so that we could
> allow the path selectors to use it.  I should have spotted this.
> 
> I'll also move the failed bio remap back to mpath_end_io(), so that
> the context can be reused there (it moved to the daemon when we were
> trying to do path testing in the kernel).
>

With this move if the path has to be activated first, will the daemon 
have to call some sort of ps_path_is_initialized() function before it 
calls generic_make_request?

It might be easier if mp's map_io call did not move so it or the ps 
could send commands and wait for the response before selecting a path. I 
guess this would mean you would have to add a access function for the 
tio's map_info so it could be set from the daemon, or mp may need to 
allocate its own io wrapper. It seems the latter may now be needed to 
give ps's a a map_info, becuase dm-mpath needs to store the path in the 
tio's map_info.

Mike

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 1/6] dm: endio method
  2004-02-23 22:08       ` Mike Christie
@ 2004-02-23 22:29         ` Joe Thornber
  2004-02-24  2:18           ` Mike Christie
  0 siblings, 1 reply; 17+ messages in thread
From: Joe Thornber @ 2004-02-23 22:29 UTC (permalink / raw)
  To: Mike Christie; +Cc: Joe Thornber, Andrew Morton, Linux Mailing List

On Mon, Feb 23, 2004 at 02:08:32PM -0800, Mike Christie wrote:
> With this move if the path has to be activated first, will the daemon 
> have to call some sort of ps_path_is_initialized() function before it 
> calls generic_make_request?

Yes, I am planning to add something like this.  Whether it needs to be
per path, or we could get away per priority group is probably a
question that you could answer better than me ?  Do we need a
corresponding deactivate for some hardware ?

> tio's map_info so it could be set from the daemon, or mp may need to 
> allocate its own io wrapper. It seems the latter may now be needed to 
> give ps's a a map_info, becuase dm-mpath needs to store the path in the 
> tio's map_info.

I think the bio recording/reset is going to have to move inside the
target.  It makes sense that the mpath target should be the only one
that incurs this overhead.  So yes, there will have to be a wrapper
which could be used to provide context for the ps.

- Joe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Patch 1/6] dm: endio method
  2004-02-23 22:29         ` Joe Thornber
@ 2004-02-24  2:18           ` Mike Christie
  0 siblings, 0 replies; 17+ messages in thread
From: Mike Christie @ 2004-02-24  2:18 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Andrew Morton, Linux Mailing List

Joe Thornber wrote:

> On Mon, Feb 23, 2004 at 02:08:32PM -0800, Mike Christie wrote:
> 
>>With this move if the path has to be activated first, will the daemon 
>>have to call some sort of ps_path_is_initialized() function before it 
>>calls generic_make_request?
> 
> 
> Yes, I am planning to add something like this.  Whether it needs to be
> per path, or we could get away per priority group is probably a
> question that you could answer better than me ?  Do we need a
> corresponding deactivate for some hardware ?

Sorry, I do not know for sure. All the HW we have will activate one 
group and deactivate the other in one command or automatically.

Mike

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2004-02-24  2:20 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-02-20 15:31 device-mapper patchset Joe Thornber
2004-02-20 15:34 ` [Patch 1/6] dm: endio method Joe Thornber
2004-02-21  9:58   ` Mike Christie
2004-02-21 10:44     ` Christophe Saout
2004-02-23 10:05     ` Joe Thornber
2004-02-23 22:08       ` Mike Christie
2004-02-23 22:29         ` Joe Thornber
2004-02-24  2:18           ` Mike Christie
2004-02-20 15:34 ` [Patch 2/6] dm: remove v1 ioctl interface Joe Thornber
2004-02-20 20:18   ` Andreas Jellinghaus
2004-02-21  6:15   ` Andrew Morton
2004-02-20 15:35 ` [Patch 3/6] dm: list_for_each_entry audit Joe Thornber
2004-02-20 15:36 ` [Patch 4/6] dm: default queue limits Joe Thornber
2004-02-20 15:39   ` Joe Thornber
2004-02-20 15:37 ` [Patch 5/6] dm: list targets cmd Joe Thornber
2004-02-21  6:17   ` Andrew Morton
2004-02-20 15:37 ` [Patch 6/6] dm: multipath target Joe Thornber

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).