linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Device-mapper submission for 2.4
@ 2003-12-09 11:58 Joe Thornber
  2003-12-09 12:24 ` [Patch 1/4] fs.h: b_journal_head Joe Thornber
                   ` (5 more replies)
  0 siblings, 6 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 11:58 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Linux Mailing List, thornber

Marcello,

This set of patches is the core of device mapper for 2.4.  I would
appreciate it if you could merge these into 2.4.24 please.

Thanks,

- Joe

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [Patch 1/4] fs.h: b_journal_head
  2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
@ 2003-12-09 12:24 ` Joe Thornber
  2003-12-09 23:46   ` Nathan Scott
  2003-12-09 12:25 ` [Patch 2/4] dm: mempool backport Joe Thornber
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 12:24 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

Add a new member to struct buffer_head called b_journal_head.  This is
for jbd to use, rather than have it peeking at b_private for in flight
ios.
--- diff/fs/buffer.c	2003-12-09 10:25:27.000000000 +0000
+++ source/fs/buffer.c	2003-12-09 10:32:41.000000000 +0000
@@ -763,6 +763,7 @@
 	bh->b_list = BUF_CLEAN;
 	bh->b_end_io = handler;
 	bh->b_private = private;
+	bh->b_journal_head = NULL;
 }
 
 static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
--- diff/fs/jbd/journal.c	2003-12-09 10:25:27.000000000 +0000
+++ source/fs/jbd/journal.c	2003-12-09 10:32:41.000000000 +0000
@@ -1802,9 +1802,9 @@
 
 		if (buffer_jbd(bh)) {
 			/* Someone did it for us! */
-			J_ASSERT_BH(bh, bh->b_private != NULL);
+ 			J_ASSERT_BH(bh, bh->b_journal_head != NULL);
 			journal_free_journal_head(jh);
-			jh = bh->b_private;
+ 			jh = bh->b_journal_head;
 		} else {
 			/*
 			 * We actually don't need jh_splice_lock when
@@ -1812,7 +1812,7 @@
 			 */
 			spin_lock(&jh_splice_lock);
 			set_bit(BH_JBD, &bh->b_state);
-			bh->b_private = jh;
+			bh->b_journal_head = jh;
 			jh->b_bh = bh;
 			atomic_inc(&bh->b_count);
 			spin_unlock(&jh_splice_lock);
@@ -1821,7 +1821,7 @@
 	}
 	jh->b_jcount++;
 	spin_unlock(&journal_datalist_lock);
-	return bh->b_private;
+	return bh->b_journal_head;
 }
 
 /*
@@ -1854,7 +1854,7 @@
 			J_ASSERT_BH(bh, jh2bh(jh) == bh);
 			BUFFER_TRACE(bh, "remove journal_head");
 			spin_lock(&jh_splice_lock);
-			bh->b_private = NULL;
+			bh->b_journal_head = NULL;
 			jh->b_bh = NULL;	/* debug, really */
 			clear_bit(BH_JBD, &bh->b_state);
 			__brelse(bh);
--- diff/include/linux/fs.h	2003-12-09 10:25:27.000000000 +0000
+++ source/include/linux/fs.h	2003-12-09 10:32:41.000000000 +0000
@@ -265,7 +265,7 @@
 	struct page *b_page;		/* the page this bh is mapped to */
 	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
  	void *b_private;		/* reserved for b_end_io */
-
+ 	void *b_journal_head;		/* ext3 journal_heads */
 	unsigned long b_rsector;	/* Real buffer location on disk */
 	wait_queue_head_t b_wait;
 
--- diff/include/linux/jbd.h	2003-06-16 09:56:12.000000000 +0100
+++ source/include/linux/jbd.h	2003-12-09 10:32:41.000000000 +0000
@@ -311,7 +311,7 @@
 
 static inline struct journal_head *bh2jh(struct buffer_head *bh)
 {
-	return bh->b_private;
+	return bh->b_journal_head;
 }
 
 #define HAVE_JOURNAL_CALLBACK_STATUS

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [Patch 2/4] dm: mempool backport
  2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
  2003-12-09 12:24 ` [Patch 1/4] fs.h: b_journal_head Joe Thornber
@ 2003-12-09 12:25 ` Joe Thornber
  2003-12-09 12:26 ` [Patch 3/4] dm: core files Joe Thornber
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 12:25 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

Backport of mempool code.
--- diff/mm/Makefile	2002-08-05 14:57:44.000000000 +0100
+++ source/mm/Makefile	2003-12-09 10:34:55.000000000 +0000
@@ -9,12 +9,12 @@
 
 O_TARGET := mm.o
 
-export-objs := shmem.o filemap.o memory.o page_alloc.o
+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
 
 obj-y	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
 	    vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
 	    page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
-	    shmem.o
+	    shmem.o mempool.o
 
 obj-$(CONFIG_HIGHMEM) += highmem.o
 
--- diff/include/linux/mempool.h	1970-01-01 01:00:00.000000000 +0100
+++ source/include/linux/mempool.h	2003-12-09 10:34:55.000000000 +0000
@@ -0,0 +1,31 @@
+/*
+ * memory buffer pool support
+ */
+#ifndef _LINUX_MEMPOOL_H
+#define _LINUX_MEMPOOL_H
+
+#include <linux/list.h>
+#include <linux/wait.h>
+
+struct mempool_s;
+typedef struct mempool_s mempool_t;
+
+typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
+typedef void (mempool_free_t)(void *element, void *pool_data);
+
+extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+				 mempool_free_t *free_fn, void *pool_data);
+extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask);
+extern void mempool_destroy(mempool_t *pool);
+extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
+extern void mempool_free(void *element, mempool_t *pool);
+
+/*
+ * A mempool_alloc_t and mempool_free_t that get the memory from
+ * a slab that is passed in through pool_data.
+ */
+void *mempool_alloc_slab(int gfp_mask, void *pool_data);
+void mempool_free_slab(void *element, void *pool_data);
+
+
+#endif /* _LINUX_MEMPOOL_H */
--- diff/mm/mempool.c	1970-01-01 01:00:00.000000000 +0100
+++ source/mm/mempool.c	2003-12-09 10:34:55.000000000 +0000
@@ -0,0 +1,299 @@
+/*
+ *  linux/mm/mempool.c
+ *
+ *  memory buffer pool support. Such pools are mostly used
+ *  for guaranteed, deadlock-free memory allocations during
+ *  extreme VM load.
+ *
+ *  started by Ingo Molnar, Copyright (C) 2001
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mempool.h>
+
+struct mempool_s {
+	spinlock_t lock;
+	int min_nr;		/* nr of elements at *elements */
+	int curr_nr;		/* Current nr of elements at *elements */
+	void **elements;
+
+	void *pool_data;
+	mempool_alloc_t *alloc;
+	mempool_free_t *free;
+	wait_queue_head_t wait;
+};
+
+static void add_element(mempool_t *pool, void *element)
+{
+	BUG_ON(pool->curr_nr >= pool->min_nr);
+	pool->elements[pool->curr_nr++] = element;
+}
+
+static void *remove_element(mempool_t *pool)
+{
+	BUG_ON(pool->curr_nr <= 0);
+	return pool->elements[--pool->curr_nr];
+}
+
+static void free_pool(mempool_t *pool)
+{
+	while (pool->curr_nr) {
+		void *element = remove_element(pool);
+		pool->free(element, pool->pool_data);
+	}
+	kfree(pool->elements);
+	kfree(pool);
+}
+
+/**
+ * mempool_create - create a memory pool
+ * @min_nr:    the minimum number of elements guaranteed to be
+ *             allocated for this pool.
+ * @alloc_fn:  user-defined element-allocation function.
+ * @free_fn:   user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * this function creates and allocates a guaranteed size, preallocated
+ * memory pool. The pool can be used from the mempool_alloc and mempool_free
+ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
+ * functions might sleep - as long as the mempool_alloc function is not called
+ * from IRQ contexts.
+ */
+mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+				mempool_free_t *free_fn, void *pool_data)
+{
+	mempool_t *pool;
+
+	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
+	memset(pool, 0, sizeof(*pool));
+	pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL);
+	if (!pool->elements) {
+		kfree(pool);
+		return NULL;
+	}
+	spin_lock_init(&pool->lock);
+	pool->min_nr = min_nr;
+	pool->pool_data = pool_data;
+	init_waitqueue_head(&pool->wait);
+	pool->alloc = alloc_fn;
+	pool->free = free_fn;
+
+	/*
+	 * First pre-allocate the guaranteed number of buffers.
+	 */
+	while (pool->curr_nr < pool->min_nr) {
+		void *element;
+
+		element = pool->alloc(GFP_KERNEL, pool->pool_data);
+		if (unlikely(!element)) {
+			free_pool(pool);
+			return NULL;
+		}
+		add_element(pool, element);
+	}
+	return pool;
+}
+
+/**
+ * mempool_resize - resize an existing memory pool
+ * @pool:       pointer to the memory pool which was allocated via
+ *              mempool_create().
+ * @new_min_nr: the new minimum number of elements guaranteed to be
+ *              allocated for this pool.
+ * @gfp_mask:   the usual allocation bitmask.
+ *
+ * This function shrinks/grows the pool. In the case of growing,
+ * it cannot be guaranteed that the pool will be grown to the new
+ * size immediately, but new mempool_free() calls will refill it.
+ *
+ * Note, the caller must guarantee that no mempool_destroy is called
+ * while this function is running. mempool_alloc() & mempool_free()
+ * might be called (eg. from IRQ contexts) while this function executes.
+ */
+int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask)
+{
+	void *element;
+	void **new_elements;
+	unsigned long flags;
+
+	BUG_ON(new_min_nr <= 0);
+
+	spin_lock_irqsave(&pool->lock, flags);
+	if (new_min_nr < pool->min_nr) {
+		while (pool->curr_nr > new_min_nr) {
+			element = remove_element(pool);
+			spin_unlock_irqrestore(&pool->lock, flags);
+			pool->free(element, pool->pool_data);
+			spin_lock_irqsave(&pool->lock, flags);
+		}
+		pool->min_nr = new_min_nr;
+		goto out_unlock;
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	/* Grow the pool */
+	new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
+	if (!new_elements)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&pool->lock, flags);
+	memcpy(new_elements, pool->elements,
+			pool->curr_nr * sizeof(*new_elements));
+	kfree(pool->elements);
+	pool->elements = new_elements;
+	pool->min_nr = new_min_nr;
+
+	while (pool->curr_nr < pool->min_nr) {
+		spin_unlock_irqrestore(&pool->lock, flags);
+		element = pool->alloc(gfp_mask, pool->pool_data);
+		if (!element)
+			goto out;
+		spin_lock_irqsave(&pool->lock, flags);
+		if (pool->curr_nr < pool->min_nr)
+			add_element(pool, element);
+		else
+			kfree(element);		/* Raced */
+	}
+out_unlock:
+	spin_unlock_irqrestore(&pool->lock, flags);
+out:
+	return 0;
+}
+
+/**
+ * mempool_destroy - deallocate a memory pool
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps. The caller
+ * has to guarantee that all elements have been returned to the pool (ie:
+ * freed) prior to calling mempool_destroy().
+ */
+void mempool_destroy(mempool_t *pool)
+{
+	if (pool->curr_nr != pool->min_nr)
+		BUG();		/* There were outstanding elements */
+	free_pool(pool);
+}
+
+/**
+ * mempool_alloc - allocate an element from a specific memory pool
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ * @gfp_mask:  the usual allocation bitmask.
+ *
+ * this function only sleeps if the alloc_fn function sleeps or
+ * returns NULL. Note that due to preallocation, this function
+ * *never* fails when called from process contexts. (it might
+ * fail if called from an IRQ context.)
+ */
+void * mempool_alloc(mempool_t *pool, int gfp_mask)
+{
+	void *element;
+	unsigned long flags;
+	int curr_nr;
+	DECLARE_WAITQUEUE(wait, current);
+	int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+
+repeat_alloc:
+	element = pool->alloc(gfp_nowait, pool->pool_data);
+	if (likely(element != NULL))
+		return element;
+
+	/*
+	 * If the pool is less than 50% full then try harder
+	 * to allocate an element:
+	 */
+	if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
+		element = pool->alloc(gfp_mask, pool->pool_data);
+		if (likely(element != NULL))
+			return element;
+	}
+
+	/*
+	 * Kick the VM at this point.
+	 */
+	wakeup_bdflush();
+
+	spin_lock_irqsave(&pool->lock, flags);
+	if (likely(pool->curr_nr)) {
+		element = remove_element(pool);
+		spin_unlock_irqrestore(&pool->lock, flags);
+		return element;
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	/* We must not sleep in the GFP_ATOMIC case */
+	if (gfp_mask == gfp_nowait)
+		return NULL;
+
+	run_task_queue(&tq_disk);
+
+	add_wait_queue_exclusive(&pool->wait, &wait);
+	set_task_state(current, TASK_UNINTERRUPTIBLE);
+
+	spin_lock_irqsave(&pool->lock, flags);
+	curr_nr = pool->curr_nr;
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	if (!curr_nr)
+		schedule();
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&pool->wait, &wait);
+
+	goto repeat_alloc;
+}
+
+/**
+ * mempool_free - return an element to the pool.
+ * @element:   pool element pointer.
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps.
+ */
+void mempool_free(void *element, mempool_t *pool)
+{
+	unsigned long flags;
+
+	if (pool->curr_nr < pool->min_nr) {
+		spin_lock_irqsave(&pool->lock, flags);
+		if (pool->curr_nr < pool->min_nr) {
+			add_element(pool, element);
+			spin_unlock_irqrestore(&pool->lock, flags);
+			wake_up(&pool->wait);
+			return;
+		}
+		spin_unlock_irqrestore(&pool->lock, flags);
+	}
+	pool->free(element, pool->pool_data);
+}
+
+/*
+ * A commonly used alloc and free fn.
+ */
+void *mempool_alloc_slab(int gfp_mask, void *pool_data)
+{
+	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
+	return kmem_cache_alloc(mem, gfp_mask);
+}
+
+void mempool_free_slab(void *element, void *pool_data)
+{
+	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
+	kmem_cache_free(mem, element);
+}
+
+
+EXPORT_SYMBOL(mempool_create);
+EXPORT_SYMBOL(mempool_resize);
+EXPORT_SYMBOL(mempool_destroy);
+EXPORT_SYMBOL(mempool_alloc);
+EXPORT_SYMBOL(mempool_free);
+EXPORT_SYMBOL(mempool_alloc_slab);
+EXPORT_SYMBOL(mempool_free_slab);

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [Patch 3/4] dm: core files
  2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
  2003-12-09 12:24 ` [Patch 1/4] fs.h: b_journal_head Joe Thornber
  2003-12-09 12:25 ` [Patch 2/4] dm: mempool backport Joe Thornber
@ 2003-12-09 12:26 ` Joe Thornber
  2003-12-09 12:26 ` [Patch 4/4] dm: ioctl interface Joe Thornber
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 12:26 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

Device-mapper core.
--- diff/Documentation/Configure.help	2003-12-09 10:25:24.000000000 +0000
+++ source/Documentation/Configure.help	2003-12-09 10:39:47.000000000 +0000
@@ -1912,6 +1912,20 @@
   want), say M here and read <file:Documentation/modules.txt>.  The
   module will be called lvm-mod.o.
 
+Device-mapper support
+CONFIG_BLK_DEV_DM
+  Device-mapper is a low level volume manager.  It works by allowing
+  people to specify mappings for ranges of logical sectors.  Various
+  mapping types are available, in addition people may write their own
+  modules containing custom mappings if they wish.
+
+  Higher level volume managers such as LVM2 use this driver.
+
+  If you want to compile this as a module, say M here and read 
+  <file:Documentation/modules.txt>.  The module will be called dm-mod.o.
+
+  If unsure, say N.
+
 Multiple devices driver support (RAID and LVM)
 CONFIG_MD
   Support multiple physical spindles through a single logical device.
--- diff/MAINTAINERS	2003-12-09 10:25:24.000000000 +0000
+++ source/MAINTAINERS	2003-12-09 10:39:47.000000000 +0000
@@ -570,6 +570,13 @@
 W:	http://www.debian.org/~dz/i8k/
 S:	Maintained
 
+DEVICE MAPPER
+P:	Joe Thornber
+M:	dm@uk.sistina.com
+L:	linux-LVM@sistina.com
+W:	http://www.sistina.com/lvm
+S:	Maintained
+
 DEVICE NUMBER REGISTRY
 P:	H. Peter Anvin
 M:	hpa@zytor.com
--- diff/drivers/md/Config.in	2001-09-26 16:15:05.000000000 +0100
+++ source/drivers/md/Config.in	2003-12-09 10:42:38.000000000 +0000
@@ -14,5 +14,6 @@
 dep_tristate '  Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
 
 dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
+dep_tristate ' Device-mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
 
 endmenu
--- diff/drivers/md/Makefile	2002-01-17 10:07:52.000000000 +0000
+++ source/drivers/md/Makefile	2003-12-09 10:43:27.000000000 +0000
@@ -4,24 +4,32 @@
 
 O_TARGET	:= mddev.o
 
-export-objs	:= md.o xor.o
-list-multi	:= lvm-mod.o
+export-objs	:= md.o xor.o dm-table.o dm-target.o dm.o
+
+list-multi	:= lvm-mod.o dm-mod.o dm-mirror-mod.o
 lvm-mod-objs	:= lvm.o lvm-snap.o lvm-fs.o
+dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o
 
 # Note: link order is important.  All raid personalities
 # and xor.o must come before md.o, as they each initialise 
 # themselves, and md.o may use the personalities when it 
 # auto-initialised.
 
-obj-$(CONFIG_MD_LINEAR)		+= linear.o
-obj-$(CONFIG_MD_RAID0)		+= raid0.o
-obj-$(CONFIG_MD_RAID1)		+= raid1.o
-obj-$(CONFIG_MD_RAID5)		+= raid5.o xor.o
-obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
-obj-$(CONFIG_BLK_DEV_MD)	+= md.o
-obj-$(CONFIG_BLK_DEV_LVM)	+= lvm-mod.o
+obj-$(CONFIG_MD_LINEAR)			+= linear.o
+obj-$(CONFIG_MD_RAID0)			+= raid0.o
+obj-$(CONFIG_MD_RAID1)			+= raid1.o
+obj-$(CONFIG_MD_RAID5)			+= raid5.o xor.o
+obj-$(CONFIG_MD_MULTIPATH)		+= multipath.o
+obj-$(CONFIG_BLK_DEV_MD)		+= md.o
+
+obj-$(CONFIG_BLK_DEV_LVM)		+= lvm-mod.o
+
+obj-$(CONFIG_BLK_DEV_DM)		+= dm-mod.o
 
 include $(TOPDIR)/Rules.make
 
 lvm-mod.o: $(lvm-mod-objs)
 	$(LD) -r -o $@ $(lvm-mod-objs)
+
+dm-mod.o: $(dm-mod-objs)
+	$(LD) -r -o $@ $(dm-mod-objs)
--- diff/drivers/md/dm-linear.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-linear.c	2003-12-09 10:39:55.000000000 +0000
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+
+/*
+ * Linear: maps a linear range of a device.
+ */
+struct linear_c {
+	struct dm_dev *dev;
+	sector_t start;
+};
+
+/*
+ * Construct a linear mapping: <dev_path> <offset>
+ */
+static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct linear_c *lc;
+
+	if (argc != 2) {
+		ti->error = "dm-linear: Invalid argument count";
+		return -EINVAL;
+	}
+
+	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+	if (lc == NULL) {
+		ti->error = "dm-linear: Cannot allocate linear context";
+		return -ENOMEM;
+	}
+
+	if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
+		ti->error = "dm-linear: Invalid device sector";
+		goto bad;
+	}
+
+	if (dm_get_device(ti, argv[0], lc->start, ti->len,
+			  dm_table_get_mode(ti->table), &lc->dev)) {
+		ti->error = "dm-linear: Device lookup failed";
+		goto bad;
+	}
+
+	ti->private = lc;
+	return 0;
+
+      bad:
+	kfree(lc);
+	return -EINVAL;
+}
+
+static void linear_dtr(struct dm_target *ti)
+{
+	struct linear_c *lc = (struct linear_c *) ti->private;
+
+	dm_put_device(ti, lc->dev);
+	kfree(lc);
+}
+
+static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw,
+		      union map_info *map_context)
+{
+	struct linear_c *lc = (struct linear_c *) ti->private;
+
+	bh->b_rdev = lc->dev->dev;
+	bh->b_rsector = lc->start + (bh->b_rsector - ti->begin);
+
+	return 1;
+}
+
+static int linear_status(struct dm_target *ti, status_type_t type,
+			 char *result, unsigned int maxlen)
+{
+	struct linear_c *lc = (struct linear_c *) ti->private;
+	kdev_t kdev;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		result[0] = '\0';
+		break;
+
+	case STATUSTYPE_TABLE:
+		kdev = to_kdev_t(lc->dev->bdev->bd_dev);
+		snprintf(result, maxlen, "%s " SECTOR_FORMAT,
+			 dm_kdevname(kdev), lc->start);
+		break;
+	}
+	return 0;
+}
+
+static struct target_type linear_target = {
+	.name   = "linear",
+	.module = THIS_MODULE,
+	.ctr    = linear_ctr,
+	.dtr    = linear_dtr,
+	.map    = linear_map,
+	.status = linear_status,
+};
+
+int __init dm_linear_init(void)
+{
+	int r = dm_register_target(&linear_target);
+
+	if (r < 0)
+		DMERR("linear: register failed %d", r);
+
+	return r;
+}
+
+void dm_linear_exit(void)
+{
+	int r = dm_unregister_target(&linear_target);
+
+	if (r < 0)
+		DMERR("linear: unregister failed %d", r);
+}
--- diff/drivers/md/dm-stripe.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-stripe.c	2003-12-09 10:39:55.000000000 +0000
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+
+struct stripe {
+	struct dm_dev *dev;
+	sector_t physical_start;
+};
+
+struct stripe_c {
+	uint32_t stripes;
+
+	/* The size of this target / num. stripes */
+	uint32_t stripe_width;
+
+	/* stripe chunk size */
+	uint32_t chunk_shift;
+	sector_t chunk_mask;
+
+	struct stripe stripe[0];
+};
+
+static inline struct stripe_c *alloc_context(unsigned int stripes)
+{
+	size_t len;
+
+	if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
+			  stripes))
+		return NULL;
+
+	len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
+
+	return kmalloc(len, GFP_KERNEL);
+}
+
+/*
+ * Parse a single <dev> <sector> pair
+ */
+static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
+		      unsigned int stripe, char **argv)
+{
+	sector_t start;
+
+	if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
+		return -EINVAL;
+
+	if (dm_get_device(ti, argv[0], start, sc->stripe_width,
+			  dm_table_get_mode(ti->table),
+			  &sc->stripe[stripe].dev))
+		return -ENXIO;
+
+	sc->stripe[stripe].physical_start = start;
+	return 0;
+}
+
+/*
+ * FIXME: Nasty function, only present because we can't link
+ * against __moddi3 and __divdi3.
+ *
+ * returns a == b * n
+ */
+static int multiple(sector_t a, sector_t b, sector_t *n)
+{
+	sector_t acc, prev, i;
+
+	*n = 0;
+	while (a >= b) {
+		for (acc = b, prev = 0, i = 1;
+		     acc <= a;
+		     prev = acc, acc <<= 1, i <<= 1)
+			;
+
+		a -= prev;
+		*n += i >> 1;
+	}
+
+	return a == 0;
+}
+
+/*
+ * Construct a striped mapping.
+ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
+ */
+static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct stripe_c *sc;
+	sector_t width;
+	uint32_t stripes;
+	uint32_t chunk_size;
+	char *end;
+	int r;
+	unsigned int i;
+
+	if (argc < 2) {
+		ti->error = "dm-stripe: Not enough arguments";
+		return -EINVAL;
+	}
+
+	stripes = simple_strtoul(argv[0], &end, 10);
+	if (*end) {
+		ti->error = "dm-stripe: Invalid stripe count";
+		return -EINVAL;
+	}
+
+	chunk_size = simple_strtoul(argv[1], &end, 10);
+	if (*end) {
+		ti->error = "dm-stripe: Invalid chunk_size";
+		return -EINVAL;
+	}
+
+	/*
+	 * chunk_size is a power of two
+	 */
+	if (!chunk_size || (chunk_size & (chunk_size - 1))) {
+		ti->error = "dm-stripe: Invalid chunk size";
+		return -EINVAL;
+	}
+
+	if (!multiple(ti->len, stripes, &width)) {
+		ti->error = "dm-stripe: Target length not divisable by "
+		    "number of stripes";
+		return -EINVAL;
+	}
+
+	/*
+	 * Do we have enough arguments for that many stripes ?
+	 */
+	if (argc != (2 + 2 * stripes)) {
+		ti->error = "dm-stripe: Not enough destinations specified";
+		return -EINVAL;
+	}
+
+	sc = alloc_context(stripes);
+	if (!sc) {
+		ti->error = "dm-stripe: Memory allocation for striped context "
+		    "failed";
+		return -ENOMEM;
+	}
+
+	sc->stripes = stripes;
+	sc->stripe_width = width;
+
+	sc->chunk_mask = ((sector_t) chunk_size) - 1;
+	for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
+		chunk_size >>= 1;
+	sc->chunk_shift--;
+
+	/*
+	 * Get the stripe destinations.
+	 */
+	for (i = 0; i < stripes; i++) {
+		argv += 2;
+
+		r = get_stripe(ti, sc, i, argv);
+		if (r < 0) {
+			ti->error = "dm-stripe: Couldn't parse stripe "
+			    "destination";
+			while (i--)
+				dm_put_device(ti, sc->stripe[i].dev);
+			kfree(sc);
+			return r;
+		}
+	}
+
+	ti->private = sc;
+	return 0;
+}
+
+static void stripe_dtr(struct dm_target *ti)
+{
+	unsigned int i;
+	struct stripe_c *sc = (struct stripe_c *) ti->private;
+
+	for (i = 0; i < sc->stripes; i++)
+		dm_put_device(ti, sc->stripe[i].dev);
+
+	kfree(sc);
+}
+
+static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw,
+		      union map_info *context)
+{
+	struct stripe_c *sc = (struct stripe_c *) ti->private;
+
+	sector_t offset = bh->b_rsector - ti->begin;
+	uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
+	uint32_t stripe = chunk % sc->stripes;	/* 32bit modulus */
+	chunk = chunk / sc->stripes;
+
+	bh->b_rdev = sc->stripe[stripe].dev->dev;
+	bh->b_rsector = sc->stripe[stripe].physical_start +
+	    (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
+	return 1;
+}
+
+static int stripe_status(struct dm_target *ti, status_type_t type,
+			 char *result, unsigned int maxlen)
+{
+	struct stripe_c *sc = (struct stripe_c *) ti->private;
+	int offset;
+	unsigned int i;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		result[0] = '\0';
+		break;
+
+	case STATUSTYPE_TABLE:
+		offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
+				  sc->stripes, sc->chunk_mask + 1);
+		for (i = 0; i < sc->stripes; i++) {
+			offset +=
+			    snprintf(result + offset, maxlen - offset,
+				     " %s " SECTOR_FORMAT,
+		       dm_kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
+				     sc->stripe[i].physical_start);
+		}
+		break;
+	}
+	return 0;
+}
+
+static struct target_type stripe_target = {
+	.name   = "striped",
+	.module = THIS_MODULE,
+	.ctr    = stripe_ctr,
+	.dtr    = stripe_dtr,
+	.map    = stripe_map,
+	.status = stripe_status,
+};
+
+int __init dm_stripe_init(void)
+{
+	int r;
+
+	r = dm_register_target(&stripe_target);
+	if (r < 0)
+		DMWARN("striped target registration failed");
+
+	return r;
+}
+
+void dm_stripe_exit(void)
+{
+	if (dm_unregister_target(&stripe_target))
+		DMWARN("striped target unregistration failed");
+
+	return;
+}
--- diff/drivers/md/dm-table.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-table.c	2003-12-09 10:57:20.000000000 +0000
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <asm/atomic.h>
+
+#define MAX_DEPTH 16
+#define NODE_SIZE L1_CACHE_BYTES
+#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
+
+struct dm_table {
+	atomic_t holders;
+
+	/* btree table */
+	unsigned int depth;
+	unsigned int counts[MAX_DEPTH];	/* in nodes */
+	sector_t *index[MAX_DEPTH];
+
+	unsigned int num_targets;
+	unsigned int num_allocated;
+	sector_t *highs;
+	struct dm_target *targets;
+
+	/*
+	 * Indicates the rw permissions for the new logical
+	 * device.  This should be a combination of FMODE_READ
+	 * and FMODE_WRITE.
+	 */
+	int mode;
+
+	/* a list of devices used by this table */
+	struct list_head devices;
+
+	/* events get handed up using this callback */
+	void (*event_fn)(void *);
+	void *event_context;
+};
+
+/*
+ * Similar to ceiling(log_size(n))
+ */
+static unsigned int int_log(unsigned long n, unsigned long base)
+{
+	int result = 0;
+
+	while (n > 1) {
+		n = dm_div_up(n, base);
+		result++;
+	}
+
+	return result;
+}
+
+/*
+ * Calculate the index of the child node of the n'th node k'th key.
+ */
+static inline unsigned int get_child(unsigned int n, unsigned int k)
+{
+	return (n * CHILDREN_PER_NODE) + k;
+}
+
+/*
+ * Return the n'th node of level l from table t.
+ */
+static inline sector_t *get_node(struct dm_table *t, unsigned int l,
+				 unsigned int n)
+{
+	return t->index[l] + (n * KEYS_PER_NODE);
+}
+
+/*
+ * Return the highest key that you could lookup from the n'th
+ * node on level l of the btree.
+ */
+static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
+{
+	for (; l < t->depth - 1; l++)
+		n = get_child(n, CHILDREN_PER_NODE - 1);
+
+	if (n >= t->counts[l])
+		return (sector_t) - 1;
+
+	return get_node(t, l, n)[KEYS_PER_NODE - 1];
+}
+
+/*
+ * Fills in a level of the btree based on the highs of the level
+ * below it.
+ */
+static int setup_btree_index(unsigned int l, struct dm_table *t)
+{
+	unsigned int n, k;
+	sector_t *node;
+
+	for (n = 0U; n < t->counts[l]; n++) {
+		node = get_node(t, l, n);
+
+		for (k = 0U; k < KEYS_PER_NODE; k++)
+			node[k] = high(t, l + 1, get_child(n, k));
+	}
+
+	return 0;
+}
+
+void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
+{
+	unsigned long size;
+	void *addr;
+
+	/*
+	 * Check that we're not going to overflow.
+	 */
+	if (nmemb > (ULONG_MAX / elem_size))
+		return NULL;
+
+	size = nmemb * elem_size;
+	addr = vmalloc(size);
+	if (addr)
+		memset(addr, 0, size);
+
+	return addr;
+}
+
+int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
+{
+	struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+
+	if (!t)
+		return -ENOMEM;
+
+	memset(t, 0, sizeof(*t));
+	INIT_LIST_HEAD(&t->devices);
+	atomic_set(&t->holders, 1);
+
+	num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
+
+	/* Allocate both the target array and offset array at once. */
+	t->highs = (sector_t *) dm_vcalloc(sizeof(struct dm_target) +
+					   sizeof(sector_t), num_targets);
+	if (!t->highs) {
+		kfree(t);
+		return -ENOMEM;
+	}
+
+	memset(t->highs, -1, sizeof(*t->highs) * num_targets);
+
+	t->targets = (struct dm_target *) (t->highs + num_targets);
+	t->num_allocated = num_targets;
+	t->mode = mode;
+	*result = t;
+	return 0;
+}
+
+static void free_devices(struct list_head *devices)
+{
+	struct list_head *tmp, *next;
+
+	for (tmp = devices->next; tmp != devices; tmp = next) {
+		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+		next = tmp->next;
+		kfree(dd);
+	}
+}
+
+void table_destroy(struct dm_table *t)
+{
+	unsigned int i;
+
+	/* free the indexes (see dm_table_complete) */
+	if (t->depth >= 2)
+		vfree(t->index[t->depth - 2]);
+
+	/* free the targets */
+	for (i = 0; i < t->num_targets; i++) {
+		struct dm_target *tgt = t->targets + i;
+
+		if (tgt->type->dtr)
+			tgt->type->dtr(tgt);
+
+		dm_put_target_type(tgt->type);
+	}
+
+	vfree(t->highs);
+
+	/* free the device list */
+	if (t->devices.next != &t->devices) {
+		DMWARN("devices still present during destroy: "
+		       "dm_table_remove_device calls missing");
+
+		free_devices(&t->devices);
+	}
+
+	kfree(t);
+}
+
+void dm_table_get(struct dm_table *t)
+{
+	atomic_inc(&t->holders);
+}
+
+void dm_table_put(struct dm_table *t)
+{
+	if (atomic_dec_and_test(&t->holders))
+		table_destroy(t);
+}
+
+/*
+ * Convert a device path to a dev_t.
+ */
+static int lookup_device(const char *path, kdev_t *dev)
+{
+	int r;
+	struct nameidata nd;
+	struct inode *inode;
+
+	if (!path_init(path, LOOKUP_FOLLOW, &nd))
+		return 0;
+
+	if ((r = path_walk(path, &nd)))
+		goto out;
+
+	inode = nd.dentry->d_inode;
+	if (!inode) {
+		r = -ENOENT;
+		goto out;
+	}
+
+	if (!S_ISBLK(inode->i_mode)) {
+		r = -ENOTBLK;
+		goto out;
+	}
+
+	*dev = inode->i_rdev;
+
+      out:
+	path_release(&nd);
+	return r;
+}
+
+/*
+ * See if we've already got a device in the list.
+ */
+static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
+{
+	struct list_head *tmp;
+
+	list_for_each(tmp, l) {
+		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+		if (kdev_same(dd->dev, dev))
+			return dd;
+	}
+
+	return NULL;
+}
+
+/*
+ * Open a device so we can use it as a map destination.
+ */
+static int open_dev(struct dm_dev *dd)
+{
+	if (dd->bdev)
+		BUG();
+
+	dd->bdev = bdget(kdev_t_to_nr(dd->dev));
+	if (!dd->bdev)
+		return -ENOMEM;
+
+	return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW);
+}
+
+/*
+ * Close a device that we've been using.
+ */
+static void close_dev(struct dm_dev *dd)
+{
+	if (!dd->bdev)
+		return;
+
+	blkdev_put(dd->bdev, BDEV_RAW);
+	dd->bdev = NULL;
+}
+
+/*
+ * If possible (ie. blk_size[major] is set), this checks an area
+ * of a destination device is valid.
+ */
+static int check_device_area(kdev_t dev, sector_t start, sector_t len)
+{
+	int *sizes;
+	sector_t dev_size;
+
+	if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
+		/* we don't know the device details,
+		 * so give the benefit of the doubt */
+		return 1;
+
+	/* convert to 512-byte sectors */
+	dev_size <<= 1;
+
+	return ((start < dev_size) && (len <= (dev_size - start)));
+}
+
+/*
+ * This upgrades the mode on an already open dm_dev.  Being
+ * careful to leave things as they were if we fail to reopen the
+ * device.
+ */
+static int upgrade_mode(struct dm_dev *dd, int new_mode)
+{
+	int r;
+	struct dm_dev dd_copy;
+
+	memcpy(&dd_copy, dd, sizeof(dd_copy));
+
+	dd->mode |= new_mode;
+	dd->bdev = NULL;
+	r = open_dev(dd);
+	if (!r)
+		close_dev(&dd_copy);
+	else
+		memcpy(dd, &dd_copy, sizeof(dd_copy));
+
+	return r;
+}
+
+/*
+ * Add a device to the list, or just increment the usage count if
+ * it's already present.
+ */
+int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
+		  sector_t len, int mode, struct dm_dev **result)
+{
+	int r;
+	kdev_t dev;
+	struct dm_dev *dd;
+	unsigned major, minor;
+	struct dm_table *t = ti->table;
+
+	if (!t)
+		BUG();
+
+	if (sscanf(path, "%u:%u", &major, &minor) == 2) {
+		/* Extract the major/minor numbers */
+		dev = mk_kdev(major, minor);
+	} else {
+		/* convert the path to a device */
+		if ((r = lookup_device(path, &dev)))
+			return r;
+	}
+
+	dd = find_device(&t->devices, dev);
+	if (!dd) {
+		dd = kmalloc(sizeof(*dd), GFP_KERNEL);
+		if (!dd)
+			return -ENOMEM;
+
+		dd->dev = dev;
+		dd->mode = mode;
+		dd->bdev = NULL;
+
+		if ((r = open_dev(dd))) {
+			kfree(dd);
+			return r;
+		}
+
+		atomic_set(&dd->count, 0);
+		list_add(&dd->list, &t->devices);
+
+	} else if (dd->mode != (mode | dd->mode)) {
+		r = upgrade_mode(dd, mode);
+		if (r)
+			return r;
+	}
+	atomic_inc(&dd->count);
+
+	if (!check_device_area(dd->dev, start, len)) {
+		DMWARN("device %s too small for target", path);
+		dm_put_device(ti, dd);
+		return -EINVAL;
+	}
+
+	*result = dd;
+
+	return 0;
+}
+
+/*
+ * Decrement a devices use count and remove it if neccessary.
+ */
+void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
+{
+	if (atomic_dec_and_test(&dd->count)) {
+		close_dev(dd);
+		list_del(&dd->list);
+		kfree(dd);
+	}
+}
+
+/*
+ * Checks to see if the target joins onto the end of the table.
+ */
+static int adjoin(struct dm_table *table, struct dm_target *ti)
+{
+	struct dm_target *prev;
+
+	if (!table->num_targets)
+		return !ti->begin;
+
+	prev = &table->targets[table->num_targets - 1];
+	return (ti->begin == (prev->begin + prev->len));
+}
+
+/*
+ * Used to dynamically allocate the arg array.
+ */
+static char **realloc_argv(unsigned *array_size, char **old_argv)
+{
+	char **argv;
+	unsigned new_size;
+
+	new_size = *array_size ? *array_size * 2 : 64;
+	argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
+	if (argv) {
+		memcpy(argv, old_argv, *array_size * sizeof(*argv));
+		*array_size = new_size;
+	}
+
+	kfree(old_argv);
+	return argv;
+}
+
+/*
+ * Destructively splits up the argument list to pass to ctr.
+ */
+static int split_args(int *argc, char ***argvp, char *input)
+{
+	char *start, *end = input, *out, **argv = NULL;
+	unsigned array_size = 0;
+
+	*argc = 0;
+	argv = realloc_argv(&array_size, argv);
+	if (!argv)
+		return -ENOMEM;
+
+	while (1) {
+		start = end;
+
+		/* Skip whitespace */
+		while (*start && isspace(*start))
+			start++;
+
+		if (!*start)
+			break;	/* success, we hit the end */
+
+		/* 'out' is used to remove any back-quotes */
+		end = out = start;
+		while (*end) {
+			/* Everything apart from '\0' can be quoted */
+			if (*end == '\\' && *(end + 1)) {
+				*out++ = *(end + 1);
+				end += 2;
+				continue;
+			}
+
+			if (isspace(*end))
+				break;	/* end of token */
+
+			*out++ = *end++;
+		}
+
+		/* have we already filled the array ? */
+		if ((*argc + 1) > array_size) {
+			argv = realloc_argv(&array_size, argv);
+			if (!argv)
+				return -ENOMEM;
+		}
+
+		/* we know this is whitespace */
+		if (*end)
+			end++;
+
+		/* terminate the string and put it in the array */
+		*out = '\0';
+		argv[*argc] = start;
+		(*argc)++;
+	}
+
+	*argvp = argv;
+	return 0;
+}
+
+int dm_table_add_target(struct dm_table *t, const char *type,
+			sector_t start, sector_t len, char *params)
+{
+	int r = -EINVAL, argc;
+	char **argv;
+	struct dm_target *tgt;
+
+	if (t->num_targets >= t->num_allocated)
+		return -ENOMEM;
+
+	tgt = t->targets + t->num_targets;
+	memset(tgt, 0, sizeof(*tgt));
+
+	tgt->type = dm_get_target_type(type);
+	if (!tgt->type) {
+		tgt->error = "unknown target type";
+		return -EINVAL;
+	}
+
+	tgt->table = t;
+	tgt->begin = start;
+	tgt->len = len;
+	tgt->error = "Unknown error";
+
+	/*
+	 * Does this target adjoin the previous one ?
+	 */
+	if (!adjoin(t, tgt)) {
+		tgt->error = "Gap in table";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	r = split_args(&argc, &argv, params);
+	if (r) {
+		tgt->error = "couldn't split parameters (insufficient memory)";
+		goto bad;
+	}
+
+	r = tgt->type->ctr(tgt, argc, argv);
+	kfree(argv);
+	if (r)
+		goto bad;
+
+	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
+	return 0;
+
+      bad:
+	printk(KERN_ERR DM_NAME ": %s\n", tgt->error);
+	dm_put_target_type(tgt->type);
+	return r;
+}
+
+static int setup_indexes(struct dm_table *t)
+{
+	int i;
+	unsigned int total = 0;
+	sector_t *indexes;
+
+	/* allocate the space for *all* the indexes */
+	for (i = t->depth - 2; i >= 0; i--) {
+		t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
+		total += t->counts[i];
+	}
+
+	indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
+	if (!indexes)
+		return -ENOMEM;
+
+	/* set up internal nodes, bottom-up */
+	for (i = t->depth - 2, total = 0; i >= 0; i--) {
+		t->index[i] = indexes;
+		indexes += (KEYS_PER_NODE * t->counts[i]);
+		setup_btree_index(i, t);
+	}
+
+	return 0;
+}
+
+/*
+ * Builds the btree to index the map.
+ */
+int dm_table_complete(struct dm_table *t)
+{
+	int r = 0;
+	unsigned int leaf_nodes;
+
+	/* how many indexes will the btree have ? */
+	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
+	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
+
+	/* leaf layer has already been set up */
+	t->counts[t->depth - 1] = leaf_nodes;
+	t->index[t->depth - 1] = t->highs;
+
+	if (t->depth >= 2)
+		r = setup_indexes(t);
+
+	return r;
+}
+
+static spinlock_t _event_lock = SPIN_LOCK_UNLOCKED;
+void dm_table_event_callback(struct dm_table *t,
+			     void (*fn)(void *), void *context)
+{
+	spin_lock_irq(&_event_lock);
+	t->event_fn = fn;
+	t->event_context = context;
+	spin_unlock_irq(&_event_lock);
+}
+
+void dm_table_event(struct dm_table *t)
+{
+	spin_lock(&_event_lock);
+	if (t->event_fn)
+		t->event_fn(t->event_context);
+	spin_unlock(&_event_lock);
+}
+
+sector_t dm_table_get_size(struct dm_table *t)
+{
+	return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
+}
+
+struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
+{
+	if (index > t->num_targets)
+		return NULL;
+
+	return t->targets + index;
+}
+
+/*
+ * Search the btree for the correct target.
+ */
+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
+{
+	unsigned int l, n = 0, k = 0;
+	sector_t *node;
+
+	for (l = 0; l < t->depth; l++) {
+		n = get_child(n, k);
+		node = get_node(t, l, n);
+
+		for (k = 0; k < KEYS_PER_NODE; k++)
+			if (node[k] >= sector)
+				break;
+	}
+
+	return &t->targets[(KEYS_PER_NODE * n) + k];
+}
+
+unsigned int dm_table_get_num_targets(struct dm_table *t)
+{
+	return t->num_targets;
+}
+
+struct list_head *dm_table_get_devices(struct dm_table *t)
+{
+	return &t->devices;
+}
+
+int dm_table_get_mode(struct dm_table *t)
+{
+	return t->mode;
+}
+
+void dm_table_suspend_targets(struct dm_table *t)
+{
+	int i;
+
+	for (i = 0; i < t->num_targets; i++) {
+		struct dm_target *ti = t->targets + i;
+
+		if (ti->type->suspend)
+			ti->type->suspend(ti);
+	}
+}
+
+void dm_table_resume_targets(struct dm_table *t)
+{
+	int i;
+
+	for (i = 0; i < t->num_targets; i++) {
+		struct dm_target *ti = t->targets + i;
+
+		if (ti->type->resume)
+			ti->type->resume(ti);
+	}
+}
+
+EXPORT_SYMBOL(dm_get_device);
+EXPORT_SYMBOL(dm_put_device);
+EXPORT_SYMBOL(dm_table_event);
+EXPORT_SYMBOL(dm_table_get_mode);
--- diff/drivers/md/dm-target.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-target.c	2003-12-09 10:39:55.000000000 +0000
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+
+struct tt_internal {
+	struct target_type tt;
+
+	struct list_head list;
+	long use;
+};
+
+static LIST_HEAD(_targets);
+static DECLARE_RWSEM(_lock);
+
+#define DM_MOD_NAME_SIZE 32
+
+static inline struct tt_internal *__find_target_type(const char *name)
+{
+	struct list_head *tih;
+	struct tt_internal *ti;
+
+	list_for_each(tih, &_targets) {
+		ti = list_entry(tih, struct tt_internal, list);
+
+		if (!strcmp(name, ti->tt.name))
+			return ti;
+	}
+
+	return NULL;
+}
+
+static struct tt_internal *get_target_type(const char *name)
+{
+	struct tt_internal *ti;
+
+	down_read(&_lock);
+	ti = __find_target_type(name);
+
+	if (ti) {
+		if (ti->use == 0 && ti->tt.module)
+			__MOD_INC_USE_COUNT(ti->tt.module);
+		ti->use++;
+	}
+	up_read(&_lock);
+
+	return ti;
+}
+
+static void load_module(const char *name)
+{
+	char module_name[DM_MOD_NAME_SIZE] = "dm-";
+
+	/* Length check for strcat() below */
+	if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
+		return;
+
+	strcat(module_name, name);
+	request_module(module_name);
+}
+
+struct target_type *dm_get_target_type(const char *name)
+{
+	struct tt_internal *ti = get_target_type(name);
+
+	if (!ti) {
+		load_module(name);
+		ti = get_target_type(name);
+	}
+
+	return ti ? &ti->tt : NULL;
+}
+
+void dm_put_target_type(struct target_type *t)
+{
+	struct tt_internal *ti = (struct tt_internal *) t;
+
+	down_read(&_lock);
+	if (--ti->use == 0 && ti->tt.module)
+		__MOD_DEC_USE_COUNT(ti->tt.module);
+
+	if (ti->use < 0)
+		BUG();
+	up_read(&_lock);
+
+	return;
+}
+
+static struct tt_internal *alloc_target(struct target_type *t)
+{
+	struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+	if (ti) {
+		memset(ti, 0, sizeof(*ti));
+		ti->tt = *t;
+	}
+
+	return ti;
+}
+
+int dm_register_target(struct target_type *t)
+{
+	int rv = 0;
+	struct tt_internal *ti = alloc_target(t);
+
+	if (!ti)
+		return -ENOMEM;
+
+	down_write(&_lock);
+	if (__find_target_type(t->name)) {
+		kfree(ti);
+		rv = -EEXIST;
+	} else
+		list_add(&ti->list, &_targets);
+
+	up_write(&_lock);
+	return rv;
+}
+
+int dm_unregister_target(struct target_type *t)
+{
+	struct tt_internal *ti;
+
+	down_write(&_lock);
+	if (!(ti = __find_target_type(t->name))) {
+		up_write(&_lock);
+		return -EINVAL;
+	}
+
+	if (ti->use) {
+		up_write(&_lock);
+		return -ETXTBSY;
+	}
+
+	list_del(&ti->list);
+	kfree(ti);
+
+	up_write(&_lock);
+	return 0;
+}
+
+/*
+ * io-err: always fails an io, useful for bringing
+ * up LVs that have holes in them.
+ */
+static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
+{
+	return 0;
+}
+
+static void io_err_dtr(struct dm_target *ti)
+{
+	/* empty */
+}
+
+static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw,
+		      union map_info *map_context)
+{
+	return -EIO;
+}
+
+static struct target_type error_target = {
+	.name = "error",
+	.ctr  = io_err_ctr,
+	.dtr  = io_err_dtr,
+	.map  = io_err_map,
+};
+
+int dm_target_init(void)
+{
+	return dm_register_target(&error_target);
+}
+
+void dm_target_exit(void)
+{
+	if (dm_unregister_target(&error_target))
+		DMWARN("error target unregistration failed");
+}
+
+EXPORT_SYMBOL(dm_register_target);
+EXPORT_SYMBOL(dm_unregister_target);
--- diff/drivers/md/dm.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm.c	2003-12-09 10:46:32.000000000 +0000
@@ -0,0 +1,1112 @@
+/*
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/blk.h>
+#include <linux/blkpg.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <linux/lvm.h>
+
+#include <asm/uaccess.h>
+
+static const char *_name = DM_NAME;
+#define DEFAULT_READ_AHEAD 64
+
+struct dm_io {
+	struct mapped_device *md;
+
+	struct dm_target *ti;
+	int rw;
+	union map_info map_context;
+	void (*end_io) (struct buffer_head * bh, int uptodate);
+	void *context;
+};
+
+struct deferred_io {
+	int rw;
+	struct buffer_head *bh;
+	struct deferred_io *next;
+};
+
+/*
+ * Bits for the md->flags field.
+ */
+#define DMF_BLOCK_IO 0
+#define DMF_SUSPENDED 1
+
+struct mapped_device {
+	struct rw_semaphore lock;
+	atomic_t holders;
+
+	kdev_t dev;
+	unsigned long flags;
+
+	/*
+	 * A list of ios that arrived while we were suspended.
+	 */
+	atomic_t pending;
+	wait_queue_head_t wait;
+	struct deferred_io *deferred;
+
+	/*
+	 * The current mapping.
+	 */
+	struct dm_table *map;
+
+	/*
+	 * io objects are allocated from here.
+	 */
+	mempool_t *io_pool;
+
+	/*
+	 * Event handling.
+	 */
+	uint32_t event_nr;
+	wait_queue_head_t eventq;
+};
+
+#define MIN_IOS 256
+static kmem_cache_t *_io_cache;
+
+static struct mapped_device *get_kdev(kdev_t dev);
+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh);
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
+
+/*-----------------------------------------------------------------
+ * In order to avoid the 256 minor number limit we are going to
+ * register more major numbers as neccessary.
+ *---------------------------------------------------------------*/
+#define MAX_MINORS (1 << MINORBITS)
+
+struct major_details {
+	unsigned int major;
+
+	int transient;
+	struct list_head transient_list;
+
+	unsigned int first_free_minor;
+	int nr_free_minors;
+
+	struct mapped_device *mds[MAX_MINORS];
+	int blk_size[MAX_MINORS];
+	int blksize_size[MAX_MINORS];
+	int hardsect_size[MAX_MINORS];
+};
+
+static struct rw_semaphore _dev_lock;
+static struct major_details *_majors[MAX_BLKDEV];
+
+/*
+ * This holds a list of majors that non-specified device numbers
+ * may be allocated from.  Only majors with free minors appear on
+ * this list.
+ */
+static LIST_HEAD(_transients_free);
+
+static int __alloc_major(unsigned int major, struct major_details **result)
+{
+	int r;
+	unsigned int transient = !major;
+	struct major_details *maj;
+
+	/* Major already allocated? */
+	if (major && _majors[major])
+		return 0;
+
+	maj = kmalloc(sizeof(*maj), GFP_KERNEL);
+	if (!maj)
+		return -ENOMEM;
+
+	memset(maj, 0, sizeof(*maj));
+	INIT_LIST_HEAD(&maj->transient_list);
+
+	maj->nr_free_minors = MAX_MINORS;
+
+	r = register_blkdev(major, _name, &dm_blk_dops);
+	if (r < 0) {
+		DMERR("register_blkdev failed for %d", major);
+		kfree(maj);
+		return r;
+	}
+	if (r > 0)
+		major = r;
+
+	maj->major = major;
+
+	if (transient) {
+		maj->transient = transient;
+		list_add_tail(&maj->transient_list, &_transients_free);
+	}
+
+	_majors[major] = maj;
+
+	blk_size[major] = maj->blk_size;
+	blksize_size[major] = maj->blksize_size;
+	hardsect_size[major] = maj->hardsect_size;
+	read_ahead[major] = DEFAULT_READ_AHEAD;
+
+	blk_queue_make_request(BLK_DEFAULT_QUEUE(major), dm_request);
+
+	*result = maj;
+	return 0;
+}
+
+static void __free_major(struct major_details *maj)
+{
+	unsigned int major = maj->major;
+
+	list_del(&maj->transient_list);
+
+	read_ahead[major] = 0;
+	blk_size[major] = NULL;
+	blksize_size[major] = NULL;
+	hardsect_size[major] = NULL;
+
+	_majors[major] = NULL;
+	kfree(maj);
+
+	if (unregister_blkdev(major, _name) < 0)
+		DMERR("devfs_unregister_blkdev failed");
+}
+
+static void free_all_majors(void)
+{
+	unsigned int major = ARRAY_SIZE(_majors);
+
+	down_write(&_dev_lock);
+
+	while (major--)
+		if (_majors[major])
+			__free_major(_majors[major]);
+
+	up_write(&_dev_lock);
+}
+
+static void free_dev(kdev_t dev)
+{
+	unsigned int major = major(dev);
+	unsigned int minor = minor(dev);
+	struct major_details *maj;
+
+	down_write(&_dev_lock);
+
+	maj = _majors[major];
+	if (!maj)
+		goto out;
+
+	maj->mds[minor] = NULL;
+	maj->nr_free_minors++;
+
+	if (maj->nr_free_minors == MAX_MINORS) {
+		__free_major(maj);
+		goto out;
+	}
+
+	if (!maj->transient)
+		goto out;
+
+	if (maj->nr_free_minors == 1)
+		list_add_tail(&maj->transient_list, &_transients_free);
+
+	if (minor < maj->first_free_minor)
+		maj->first_free_minor = minor;
+
+      out:
+	up_write(&_dev_lock);
+}
+
+static void __alloc_minor(struct major_details *maj, unsigned int minor,
+			  struct mapped_device *md)
+{
+	maj->mds[minor] = md;
+	md->dev = mk_kdev(maj->major, minor);
+	maj->nr_free_minors--;
+
+	if (maj->transient && !maj->nr_free_minors)
+		list_del_init(&maj->transient_list);
+}
+
+/*
+ * See if requested kdev_t is available.
+ */
+static int specific_dev(kdev_t dev, struct mapped_device *md)
+{
+	int r = 0;
+	unsigned int major = major(dev);
+	unsigned int minor = minor(dev);
+	struct major_details *maj;
+
+	if (!major || (major > MAX_BLKDEV) || (minor >= MAX_MINORS)) {
+		DMWARN("device number requested out of range (%d, %d)",
+		       major, minor);
+		return -EINVAL;
+	}
+
+	down_write(&_dev_lock);
+	maj = _majors[major];
+
+	/* Register requested major? */
+	if (!maj) {
+		r = __alloc_major(major, &maj);
+		if (r)
+			goto out;
+
+		major = maj->major;
+	}
+
+	if (maj->mds[minor]) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	__alloc_minor(maj, minor, md);
+
+      out:
+	up_write(&_dev_lock);
+
+	return r;
+}
+
+/*
+ * Find first unused device number, requesting a new major number if required.
+ */
+static int first_free_dev(struct mapped_device *md)
+{
+	int r = 0;
+	struct major_details *maj;
+
+	down_write(&_dev_lock);
+
+	if (list_empty(&_transients_free)) {
+		r = __alloc_major(0, &maj);
+		if (r)
+			goto out;
+	} else
+		maj = list_entry(_transients_free.next, struct major_details,
+				 transient_list);
+
+	while (maj->mds[maj->first_free_minor++])
+		;
+
+	__alloc_minor(maj, maj->first_free_minor - 1, md);
+
+      out:
+	up_write(&_dev_lock);
+
+	return r;
+}
+
+static struct mapped_device *get_kdev(kdev_t dev)
+{
+	struct mapped_device *md;
+	struct major_details *maj;
+
+	down_read(&_dev_lock);
+	maj = _majors[major(dev)];
+	if (!maj) {
+		md = NULL;
+		goto out;
+	}
+	md = maj->mds[minor(dev)];
+	if (md)
+		dm_get(md);
+      out:
+	up_read(&_dev_lock);
+
+	return md;
+}
+
+/*-----------------------------------------------------------------
+ * init/exit code
+ *---------------------------------------------------------------*/
+
+static __init int local_init(void)
+{
+	init_rwsem(&_dev_lock);
+
+	/* allocate a slab for the dm_ios */
+	_io_cache = kmem_cache_create("dm io",
+				      sizeof(struct dm_io), 0, 0, NULL, NULL);
+
+	if (!_io_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void local_exit(void)
+{
+	kmem_cache_destroy(_io_cache);
+	free_all_majors();
+
+	DMINFO("cleaned up");
+}
+
+/*
+ * We have a lot of init/exit functions, so it seems easier to
+ * store them in an array.  The disposable macro 'xx'
+ * expands a prefix into a pair of function names.
+ */
+static struct {
+	int (*init) (void);
+	void (*exit) (void);
+
+} _inits[] = {
+#define xx(n) {n ## _init, n ## _exit},
+	xx(local)
+	xx(dm_target)
+	xx(dm_linear)
+	xx(dm_stripe)
+	xx(dm_interface)
+#undef xx
+};
+
+static int __init dm_init(void)
+{
+	const int count = ARRAY_SIZE(_inits);
+
+	int r, i;
+
+	for (i = 0; i < count; i++) {
+		r = _inits[i].init();
+		if (r)
+			goto bad;
+	}
+
+	return 0;
+
+      bad:
+	while (i--)
+		_inits[i].exit();
+
+	return r;
+}
+
+static void __exit dm_exit(void)
+{
+	int i = ARRAY_SIZE(_inits);
+
+	while (i--)
+		_inits[i].exit();
+}
+
+/*
+ * Block device functions
+ */
+static int dm_blk_open(struct inode *inode, struct file *file)
+{
+	struct mapped_device *md;
+
+	md = get_kdev(inode->i_rdev);
+	if (!md)
+		return -ENXIO;
+
+	return 0;
+}
+
+static int dm_blk_close(struct inode *inode, struct file *file)
+{
+	struct mapped_device *md;
+
+	md = get_kdev(inode->i_rdev);
+	dm_put(md);		/* put the reference gained by dm_blk_open */
+	dm_put(md);
+	return 0;
+}
+
+static inline struct dm_io *alloc_io(struct mapped_device *md)
+{
+	return mempool_alloc(md->io_pool, GFP_NOIO);
+}
+
+static inline void free_io(struct mapped_device *md, struct dm_io *io)
+{
+	mempool_free(io, md->io_pool);
+}
+
+static inline struct deferred_io *alloc_deferred(void)
+{
+	return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
+}
+
+static inline void free_deferred(struct deferred_io *di)
+{
+	kfree(di);
+}
+
+static inline sector_t volume_size(kdev_t dev)
+{
+	return blk_size[major(dev)][minor(dev)] << 1;
+}
+
+/* FIXME: check this */
+static int dm_blk_ioctl(struct inode *inode, struct file *file,
+			unsigned int command, unsigned long a)
+{
+	kdev_t dev = inode->i_rdev;
+	long size;
+
+	switch (command) {
+	case BLKROSET:
+	case BLKROGET:
+	case BLKRASET:
+	case BLKRAGET:
+	case BLKFLSBUF:
+	case BLKSSZGET:
+		//case BLKRRPART: /* Re-read partition tables */
+		//case BLKPG:
+	case BLKELVGET:
+	case BLKELVSET:
+	case BLKBSZGET:
+	case BLKBSZSET:
+		return blk_ioctl(dev, command, a);
+		break;
+
+	case BLKGETSIZE:
+		size = volume_size(dev);
+		if (copy_to_user((void *) a, &size, sizeof(long)))
+			return -EFAULT;
+		break;
+
+	case BLKGETSIZE64:
+		size = volume_size(dev);
+		if (put_user((u64) ((u64) size) << 9, (u64 *) a))
+			return -EFAULT;
+		break;
+
+	case BLKRRPART:
+		return -ENOTTY;
+
+	case LV_BMAP:
+		return dm_user_bmap(inode, (struct lv_bmap *) a);
+
+	default:
+		DMWARN("unknown block ioctl 0x%x", command);
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+/*
+ * Add the buffer to the list of deferred io.
+ */
+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
+{
+	struct deferred_io *di;
+
+	di = alloc_deferred();
+	if (!di)
+		return -ENOMEM;
+
+	down_write(&md->lock);
+
+	if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
+		up_write(&md->lock);
+		free_deferred(di);
+		return 1;
+	}
+
+	di->bh = bh;
+	di->rw = rw;
+	di->next = md->deferred;
+	md->deferred = di;
+
+	up_write(&md->lock);
+	return 0;		/* deferred successfully */
+}
+
+/*
+ * bh->b_end_io routine that decrements the pending count
+ * and then calls the original bh->b_end_io fn.
+ */
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
+	int r;
+	struct dm_io *io = bh->b_private;
+	dm_endio_fn endio = io->ti->type->end_io;
+
+	if (endio) {
+		r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO,
+			  &io->map_context);
+		if (r < 0)
+			uptodate = 0;
+
+		else if (r > 0)
+			/* the target wants another shot at the io */
+			return;
+	}
+
+	if (atomic_dec_and_test(&io->md->pending))
+		/* nudge anyone waiting on suspend queue */
+		wake_up(&io->md->wait);
+
+	bh->b_end_io = io->end_io;
+	bh->b_private = io->context;
+	free_io(io->md, io);
+
+	bh->b_end_io(bh, uptodate);
+}
+
+/*
+ * Do the bh mapping for a given leaf
+ */
+static inline int __map_buffer(struct mapped_device *md, int rw,
+			       struct buffer_head *bh, struct dm_io *io)
+{
+	struct dm_target *ti;
+
+	if (!md->map)
+		return -EINVAL;
+
+	ti = dm_table_find_target(md->map, bh->b_rsector);
+	if (!ti->type)
+		return -EINVAL;
+
+	/* hook the end io request fn */
+	atomic_inc(&md->pending);
+	io->md = md;
+	io->ti = ti;
+	io->rw = rw;
+	io->end_io = bh->b_end_io;
+	io->context = bh->b_private;
+	bh->b_end_io = dec_pending;
+	bh->b_private = io;
+
+	return ti->type->map(ti, bh, rw, &io->map_context);
+}
+
+/*
+ * Checks to see if we should be deferring io, if so it queues it
+ * and returns 1.
+ */
+static inline int __deferring(struct mapped_device *md, int rw,
+			      struct buffer_head *bh)
+{
+	int r;
+
+	/*
+	 * If we're suspended we have to queue this io for later.
+	 */
+	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
+		up_read(&md->lock);
+
+		/*
+		 * There's no point deferring a read ahead
+		 * request, just drop it.
+		 */
+		if (rw == READA) {
+			down_read(&md->lock);
+			return -EIO;
+		}
+
+		r = queue_io(md, bh, rw);
+		down_read(&md->lock);
+
+		if (r < 0)
+			return r;
+
+		if (r == 0)
+			return 1;	/* deferred successfully */
+
+	}
+
+	return 0;
+}
+
+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh)
+{
+	int r;
+	struct dm_io *io;
+	struct mapped_device *md;
+
+	md = get_kdev(bh->b_rdev);
+	if (!md) {
+		buffer_IO_error(bh);
+		return 0;
+	}
+
+	io = alloc_io(md);
+	down_read(&md->lock);
+
+	r = __deferring(md, rw, bh);
+	if (r < 0)
+		goto bad;
+
+	else if (!r) {
+		/* not deferring */
+		r = __map_buffer(md, rw, bh, io);
+		if (r < 0)
+			goto bad;
+	} else
+		r = 0;
+
+	up_read(&md->lock);
+	dm_put(md);
+	return r;
+
+      bad:
+	buffer_IO_error(bh);
+	up_read(&md->lock);
+	dm_put(md);
+	return 0;
+}
+
+static int check_dev_size(kdev_t dev, unsigned long block)
+{
+	unsigned int major = major(dev);
+	unsigned int minor = minor(dev);
+
+	/* FIXME: check this */
+	unsigned long max_sector = (blk_size[major][minor] << 1) + 1;
+	unsigned long sector = (block + 1) * (blksize_size[major][minor] >> 9);
+
+	return (sector > max_sector) ? 0 : 1;
+}
+
+/*
+ * Creates a dummy buffer head and maps it (for lilo).
+ */
+static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block,
+		  kdev_t *r_dev, unsigned long *r_block)
+{
+	struct buffer_head bh;
+	struct dm_target *ti;
+	union map_info map_context;
+	int r;
+
+	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+		return -EPERM;
+	}
+
+	if (!check_dev_size(dev, block)) {
+		return -EINVAL;
+	}
+
+	if (!md->map)
+		return -EINVAL;
+
+	/* setup dummy bh */
+	memset(&bh, 0, sizeof(bh));
+	bh.b_blocknr = block;
+	bh.b_dev = bh.b_rdev = dev;
+	bh.b_size = blksize_size[major(dev)][minor(dev)];
+	bh.b_rsector = block * (bh.b_size >> 9);
+
+	/* find target */
+	ti = dm_table_find_target(md->map, bh.b_rsector);
+
+	/* do the mapping */
+	r = ti->type->map(ti, &bh, READ, &map_context);
+	ti->type->end_io(ti, &bh, READ, 0, &map_context);
+
+	if (!r) {
+		*r_dev = bh.b_rdev;
+		*r_block = bh.b_rsector / (bh.b_size >> 9);
+	}
+
+	return r;
+}
+
+/*
+ * Marshals arguments and results between user and kernel space.
+ */
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
+{
+	struct mapped_device *md;
+	unsigned long block, r_block;
+	kdev_t r_dev;
+	int r;
+
+	if (get_user(block, &lvb->lv_block))
+		return -EFAULT;
+
+	md = get_kdev(inode->i_rdev);
+	if (!md)
+		return -ENXIO;
+
+	down_read(&md->lock);
+	r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block);
+	up_read(&md->lock);
+	dm_put(md);
+
+	if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
+		   put_user(r_block, &lvb->lv_block)))
+		r = -EFAULT;
+
+	return r;
+}
+
+static void free_md(struct mapped_device *md)
+{
+	free_dev(md->dev);
+	mempool_destroy(md->io_pool);
+	kfree(md);
+}
+
+/*
+ * Allocate and initialise a blank device with a given minor.
+ */
+static struct mapped_device *alloc_md(kdev_t dev)
+{
+	int r;
+	struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+	if (!md) {
+		DMWARN("unable to allocate device, out of memory.");
+		return NULL;
+	}
+
+	memset(md, 0, sizeof(*md));
+
+	/* Allocate suitable device number */
+	if (!dev)
+		r = first_free_dev(md);
+	else
+		r = specific_dev(dev, md);
+
+	if (r) {
+		kfree(md);
+		return NULL;
+	}
+
+	md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
+				     mempool_free_slab, _io_cache);
+	if (!md->io_pool) {
+		free_md(md);
+		kfree(md);
+		return NULL;
+	}
+
+	init_rwsem(&md->lock);
+	atomic_set(&md->holders, 1);
+	atomic_set(&md->pending, 0);
+	init_waitqueue_head(&md->wait);
+	init_waitqueue_head(&md->eventq);
+
+	return md;
+}
+
+/*
+ * The hardsect size for a mapped device is the largest hardsect size
+ * from the devices it maps onto.
+ */
+static int __find_hardsect_size(struct list_head *devices)
+{
+	int result = 512, size;
+	struct list_head *tmp;
+
+	list_for_each (tmp, devices) {
+		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+		size = get_hardsect_size(dd->dev);
+		if (size > result)
+			result = size;
+	}
+
+	return result;
+}
+
+/*
+ * Bind a table to the device.
+ */
+static void event_callback(void *context)
+{
+	struct mapped_device *md = (struct mapped_device *) context;
+
+	down_write(&md->lock);
+	md->event_nr++;
+	wake_up_interruptible(&md->eventq);
+	up_write(&md->lock);
+}
+
+static int __bind(struct mapped_device *md, struct dm_table *t)
+{
+	unsigned int minor = minor(md->dev);
+	unsigned int major = major(md->dev);
+	md->map = t;
+
+	/* in k */
+	blk_size[major][minor] = dm_table_get_size(t) >> 1;
+	blksize_size[major][minor] = BLOCK_SIZE;
+	hardsect_size[major][minor] =
+	    __find_hardsect_size(dm_table_get_devices(t));
+	register_disk(NULL, md->dev, 1, &dm_blk_dops, blk_size[major][minor]);
+
+	dm_table_event_callback(md->map, event_callback, md);
+	dm_table_get(t);
+	return 0;
+}
+
+static void __unbind(struct mapped_device *md)
+{
+	unsigned int minor = minor(md->dev);
+	unsigned int major = major(md->dev);
+
+	if (md->map) {
+		dm_table_event_callback(md->map, NULL, NULL);
+		dm_table_put(md->map);
+		md->map = NULL;
+
+	}
+
+	blk_size[major][minor] = 0;
+	blksize_size[major][minor] = 0;
+	hardsect_size[major][minor] = 0;
+}
+
+/*
+ * Constructor for a new device.
+ */
+int dm_create(kdev_t dev, struct mapped_device **result)
+{
+	struct mapped_device *md;
+
+	md = alloc_md(dev);
+	if (!md)
+		return -ENXIO;
+
+	__unbind(md);	/* Ensure zero device size */
+
+	*result = md;
+	return 0;
+}
+
+void dm_get(struct mapped_device *md)
+{
+	atomic_inc(&md->holders);
+}
+
+void dm_put(struct mapped_device *md)
+{
+	if (atomic_dec_and_test(&md->holders)) {
+		if (md->map)
+			dm_table_suspend_targets(md->map);
+		__unbind(md);
+		free_md(md);
+	}
+}
+
+/*
+ * Requeue the deferred io by calling generic_make_request.
+ */
+static void flush_deferred_io(struct deferred_io *c)
+{
+	struct deferred_io *n;
+
+	while (c) {
+		n = c->next;
+		generic_make_request(c->rw, c->bh);
+		free_deferred(c);
+		c = n;
+	}
+}
+
+/*
+ * Swap in a new table (destroying old one).
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *table)
+{
+	int r;
+
+	down_write(&md->lock);
+
+	/*
+	 * The device must be suspended, or have no table bound yet.
+	 */
+	if (md->map && !test_bit(DMF_SUSPENDED, &md->flags)) {
+		up_write(&md->lock);
+		return -EPERM;
+	}
+
+	__unbind(md);
+	r = __bind(md, table);
+	if (r)
+		return r;
+
+	up_write(&md->lock);
+	return 0;
+}
+
+/*
+ * We need to be able to change a mapping table under a mounted
+ * filesystem.  For example we might want to move some data in
+ * the background.  Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to flush any in
+ * flight io and ensure that any further io gets deferred.
+ */
+int dm_suspend(struct mapped_device *md)
+{
+	int r = 0;
+	DECLARE_WAITQUEUE(wait, current);
+
+	down_write(&md->lock);
+
+	/*
+	 * First we set the BLOCK_IO flag so no more ios will be
+	 * mapped.
+	 */
+	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+		up_write(&md->lock);
+		return -EINVAL;
+	}
+
+	set_bit(DMF_BLOCK_IO, &md->flags);
+	add_wait_queue(&md->wait, &wait);
+	up_write(&md->lock);
+
+	/*
+	 * Then we wait for the already mapped ios to
+	 * complete.
+	 */
+	run_task_queue(&tq_disk);
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!atomic_read(&md->pending) || signal_pending(current))
+			break;
+
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+
+	down_write(&md->lock);
+	remove_wait_queue(&md->wait, &wait);
+
+	/* did we flush everything ? */
+	if (atomic_read(&md->pending)) {
+		clear_bit(DMF_BLOCK_IO, &md->flags);
+		r = -EINTR;
+	} else {
+		set_bit(DMF_SUSPENDED, &md->flags);
+		if (md->map)
+			dm_table_suspend_targets(md->map);
+	}
+	up_write(&md->lock);
+
+	return r;
+}
+
+int dm_resume(struct mapped_device *md)
+{
+	struct deferred_io *def;
+
+	down_write(&md->lock);
+	if (!test_bit(DMF_SUSPENDED, &md->flags)) {
+		up_write(&md->lock);
+		return -EINVAL;
+	}
+
+	if (md->map)
+		dm_table_resume_targets(md->map);
+
+	clear_bit(DMF_SUSPENDED, &md->flags);
+	clear_bit(DMF_BLOCK_IO, &md->flags);
+	def = md->deferred;
+	md->deferred = NULL;
+	up_write(&md->lock);
+
+	flush_deferred_io(def);
+	run_task_queue(&tq_disk);
+
+	return 0;
+}
+
+struct dm_table *dm_get_table(struct mapped_device *md)
+{
+	struct dm_table *t;
+
+	down_read(&md->lock);
+	t = md->map;
+	if (t)
+		dm_table_get(t);
+	up_read(&md->lock);
+
+	return t;
+}
+
+/*-----------------------------------------------------------------
+ * Event notification.
+ *---------------------------------------------------------------*/
+uint32_t dm_get_event_nr(struct mapped_device *md)
+{
+	uint32_t r;
+
+	down_read(&md->lock);
+	r = md->event_nr;
+	up_read(&md->lock);
+
+	return r;
+}
+
+int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
+		      uint32_t event_nr)
+{
+	down_write(&md->lock);
+	if (event_nr != md->event_nr) {
+		up_write(&md->lock);
+		return 1;
+	}
+
+	add_wait_queue(&md->eventq, wq);
+	up_write(&md->lock);
+
+	return 0;
+}
+
+const char *dm_kdevname(kdev_t dev)
+{
+	static char buffer[32];
+	sprintf(buffer, "%03d:%03d", MAJOR(dev), MINOR(dev));
+	return buffer;
+}
+
+void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq)
+{
+	down_write(&md->lock);
+	remove_wait_queue(&md->eventq, wq);
+	up_write(&md->lock);
+}
+
+kdev_t dm_kdev(struct mapped_device *md)
+{
+	kdev_t dev;
+
+	down_read(&md->lock);
+	dev = md->dev;
+	up_read(&md->lock);
+
+	return dev;
+}
+
+int dm_suspended(struct mapped_device *md)
+{
+	return test_bit(DMF_SUSPENDED, &md->flags);
+}
+
+struct block_device_operations dm_blk_dops = {
+	.open = dm_blk_open,
+	.release = dm_blk_close,
+	.ioctl = dm_blk_ioctl,
+	.owner = THIS_MODULE
+};
+
+/*
+ * module hooks
+ */
+module_init(dm_init);
+module_exit(dm_exit);
+
+MODULE_DESCRIPTION(DM_NAME " driver");
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(dm_kdevname);
--- diff/drivers/md/dm.h	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm.h	2003-12-09 10:56:38.000000000 +0000
@@ -0,0 +1,172 @@
+/*
+ * Internal header file for device mapper
+ *
+ * Copyright (C) 2001, 2002 Sistina Software
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+#include <linux/blkdev.h>
+
+#define DM_NAME "device-mapper"
+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
+#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
+
+/*
+ * FIXME: I think this should be with the definition of sector_t
+ * in types.h.
+ */
+#ifdef CONFIG_LBD
+#define SECTOR_FORMAT "%Lu"
+#else
+#define SECTOR_FORMAT "%lu"
+#endif
+
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+
+extern struct block_device_operations dm_blk_dops;
+
+/*
+ * List of devices that a metadevice uses and should open/close.
+ */
+struct dm_dev {
+	struct list_head list;
+
+	atomic_t count;
+	int mode;
+	kdev_t dev;
+	struct block_device *bdev;
+};
+
+struct dm_table;
+struct mapped_device;
+
+/*-----------------------------------------------------------------
+ * Functions for manipulating a struct mapped_device.
+ * Drop the reference with dm_put when you finish with the object.
+ *---------------------------------------------------------------*/
+int dm_create(kdev_t dev, struct mapped_device **md);
+
+/*
+ * Reference counting for md.
+ */
+void dm_get(struct mapped_device *md);
+void dm_put(struct mapped_device *md);
+
+/*
+ * A device can still be used while suspended, but I/O is deferred.
+ */
+int dm_suspend(struct mapped_device *md);
+int dm_resume(struct mapped_device *md);
+
+/*
+ * The device must be suspended before calling this method.
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *t);
+
+/*
+ * Drop a reference on the table when you've finished with the
+ * result.
+ */
+struct dm_table *dm_get_table(struct mapped_device *md);
+
+/*
+ * Event functions.
+ */
+uint32_t dm_get_event_nr(struct mapped_device *md);
+int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
+		      uint32_t event_nr);
+void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq);
+
+/*
+ * Info functions.
+ */
+kdev_t dm_kdev(struct mapped_device *md);
+int dm_suspended(struct mapped_device *md);
+
+/*-----------------------------------------------------------------
+ * Functions for manipulating a table.  Tables are also reference
+ * counted.
+ *---------------------------------------------------------------*/
+int dm_table_create(struct dm_table **result, int mode, unsigned num_targets);
+
+void dm_table_get(struct dm_table *t);
+void dm_table_put(struct dm_table *t);
+
+int dm_table_add_target(struct dm_table *t, const char *type,
+			sector_t start,	sector_t len, char *params);
+int dm_table_complete(struct dm_table *t);
+void dm_table_event_callback(struct dm_table *t,
+			     void (*fn)(void *), void *context);
+void dm_table_event(struct dm_table *t);
+sector_t dm_table_get_size(struct dm_table *t);
+struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+unsigned int dm_table_get_num_targets(struct dm_table *t);
+struct list_head *dm_table_get_devices(struct dm_table *t);
+int dm_table_get_mode(struct dm_table *t);
+void dm_table_suspend_targets(struct dm_table *t);
+void dm_table_resume_targets(struct dm_table *t);
+
+/*-----------------------------------------------------------------
+ * A registry of target types.
+ *---------------------------------------------------------------*/
+int dm_target_init(void);
+void dm_target_exit(void);
+struct target_type *dm_get_target_type(const char *name);
+void dm_put_target_type(struct target_type *t);
+
+
+/*-----------------------------------------------------------------
+ * Useful inlines.
+ *---------------------------------------------------------------*/
+static inline int array_too_big(unsigned long fixed, unsigned long obj,
+				unsigned long num)
+{
+	return (num > (ULONG_MAX - fixed) / obj);
+}
+
+/*
+ * ceiling(n / size) * size
+ */
+static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
+{
+	unsigned long r = n % size;
+	return n + (r ? (size - r) : 0);
+}
+
+/*
+ * Ceiling(n / size)
+ */
+static inline unsigned long dm_div_up(unsigned long n, unsigned long size)
+{
+	return dm_round_up(n, size) / size;
+}
+
+const char *dm_kdevname(kdev_t dev);
+
+/*
+ * The device-mapper can be driven through one of two interfaces;
+ * ioctl or filesystem, depending which patch you have applied.
+ */
+int dm_interface_init(void);
+void dm_interface_exit(void);
+
+/*
+ * Targets for linear and striped mappings
+ */
+int dm_linear_init(void);
+void dm_linear_exit(void);
+
+int dm_stripe_init(void);
+void dm_stripe_exit(void);
+
+#endif
--- diff/include/linux/device-mapper.h	1970-01-01 01:00:00.000000000 +0100
+++ source/include/linux/device-mapper.h	2003-12-09 10:39:56.000000000 +0000
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DEVICE_MAPPER_H
+#define _LINUX_DEVICE_MAPPER_H
+
+typedef unsigned long sector_t;
+
+struct dm_target;
+struct dm_table;
+struct dm_dev;
+
+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
+
+union map_info {
+	void *ptr;
+	unsigned long long ll;
+};
+
+/*
+ * In the constructor the target parameter will already have the
+ * table, type, begin and len fields filled in.
+ */
+typedef int (*dm_ctr_fn) (struct dm_target * target, unsigned int argc,
+			  char **argv);
+
+/*
+ * The destructor doesn't need to free the dm_target, just
+ * anything hidden ti->private.
+ */
+typedef void (*dm_dtr_fn) (struct dm_target * ti);
+
+/*
+ * The map function must return:
+ * < 0: error
+ * = 0: The target will handle the io by resubmitting it later
+ * > 0: simple remap complete
+ */
+typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh,
+			  int rw, union map_info *map_context);
+
+/*
+ * Returns:
+ * < 0 : error (currently ignored)
+ * 0   : ended successfully
+ * 1   : for some reason the io has still not completed (eg,
+ *       multipath target might want to requeue a failed io).
+ */
+typedef int (*dm_endio_fn) (struct dm_target * ti,
+			    struct buffer_head * bh, int rw, int error,
+			    union map_info *map_context);
+typedef void (*dm_suspend_fn) (struct dm_target *ti);
+typedef void (*dm_resume_fn) (struct dm_target *ti);
+typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type,
+			     char *result, unsigned int maxlen);
+
+void dm_error(const char *message);
+
+/*
+ * Constructors should call these functions to ensure destination devices
+ * are opened/closed correctly.
+ * FIXME: too many arguments.
+ */
+int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
+		  sector_t len, int mode, struct dm_dev **result);
+void dm_put_device(struct dm_target *ti, struct dm_dev *d);
+
+/*
+ * Information about a target type
+ */
+struct target_type {
+	const char *name;
+	struct module *module;
+	dm_ctr_fn ctr;
+	dm_dtr_fn dtr;
+	dm_map_fn map;
+	dm_endio_fn end_io;
+	dm_suspend_fn suspend;
+	dm_resume_fn resume;
+	dm_status_fn status;
+};
+
+struct dm_target {
+	struct dm_table *table;
+	struct target_type *type;
+
+	/* target limits */
+	sector_t begin;
+	sector_t len;
+
+	/* target specific data */
+	void *private;
+
+	/* Used to provide an error string from the ctr */
+	char *error;
+};
+
+int dm_register_target(struct target_type *t);
+int dm_unregister_target(struct target_type *t);
+
+#endif				/* _LINUX_DEVICE_MAPPER_H */

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [Patch 4/4] dm: ioctl interface
  2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
                   ` (2 preceding siblings ...)
  2003-12-09 12:26 ` [Patch 3/4] dm: core files Joe Thornber
@ 2003-12-09 12:26 ` Joe Thornber
  2003-12-09 13:15 ` Device-mapper submission for 2.4 Marcelo Tosatti
  2003-12-09 19:50 ` William Lee Irwin III
  5 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 12:26 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

ioctl interface for dm
--- diff/arch/mips64/kernel/ioctl32.c	2003-08-26 13:50:03.000000000 +0100
+++ source/arch/mips64/kernel/ioctl32.c	2003-12-09 11:04:13.000000000 +0000
@@ -37,6 +37,7 @@
 #include <linux/ext2_fs.h>
 #include <linux/raid/md_u.h>
 #include <linux/serial.h>
+#include <linux/dm-ioctl.h>
 
 #include <scsi/scsi.h>
 #undef __KERNEL__		/* This file was born to be ugly ...  */
@@ -1222,6 +1223,22 @@
 	IOCTL32_DEFAULT(RESTART_ARRAY_RW),
 #endif /* CONFIG_MD */
 
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+	IOCTL32_DEFAULT(DM_VERSION),
+	IOCTL32_DEFAULT(DM_REMOVE_ALL),
+	IOCTL32_DEFAULT(DM_DEV_CREATE),
+	IOCTL32_DEFAULT(DM_DEV_REMOVE),
+	IOCTL32_DEFAULT(DM_TABLE_LOAD),
+	IOCTL32_DEFAULT(DM_DEV_SUSPEND),
+	IOCTL32_DEFAULT(DM_DEV_RENAME),
+	IOCTL32_DEFAULT(DM_TABLE_DEPS),
+	IOCTL32_DEFAULT(DM_DEV_STATUS),
+	IOCTL32_DEFAULT(DM_TABLE_STATUS),
+	IOCTL32_DEFAULT(DM_DEV_WAIT),
+	IOCTL32_DEFAULT(DM_LIST_DEVICES),
+	IOCTL32_DEFAULT(DM_TABLE_CLEAR),
+#endif /* CONFIG_BLK_DEV_DM */
+
 #ifdef CONFIG_SIBYTE_TBPROF
 	IOCTL32_DEFAULT(SBPROF_ZBSTART),
 	IOCTL32_DEFAULT(SBPROF_ZBSTOP),
--- diff/arch/parisc/kernel/ioctl32.c	2003-08-26 13:50:03.000000000 +0100
+++ source/arch/parisc/kernel/ioctl32.c	2003-12-09 11:04:13.000000000 +0000
@@ -55,6 +55,7 @@
 #define max max */
 #include <linux/lvm.h>
 #endif /* LVM */
+#include <linux/dm-ioctl.h>
 
 #include <scsi/scsi.h>
 /* Ugly hack. */
@@ -3423,6 +3424,22 @@
 COMPATIBLE_IOCTL(LV_BMAP)
 COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
 #endif /* LVM */
+/* Device-Mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION)
+COMPATIBLE_IOCTL(DM_REMOVE_ALL)
+COMPATIBLE_IOCTL(DM_DEV_CREATE)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
+COMPATIBLE_IOCTL(DM_DEV_RENAME)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS)
+COMPATIBLE_IOCTL(DM_DEV_STATUS)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS)
+COMPATIBLE_IOCTL(DM_DEV_WAIT)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
+#endif /* CONFIG_BLK_DEV_DM */
 #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
 COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
 COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
--- diff/arch/ppc64/kernel/ioctl32.c	2003-08-26 13:50:04.000000000 +0100
+++ source/arch/ppc64/kernel/ioctl32.c	2003-12-09 11:04:13.000000000 +0000
@@ -66,6 +66,7 @@
 #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
 #include <linux/lvm.h>
 #endif /* LVM */
+#include <linux/dm-ioctl.h>
 
 #include <scsi/scsi.h>
 /* Ugly hack. */
@@ -4435,6 +4436,22 @@
 COMPATIBLE_IOCTL(NBD_PRINT_DEBUG),
 COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS),
 COMPATIBLE_IOCTL(NBD_DISCONNECT),
+/* device-mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION),
+COMPATIBLE_IOCTL(DM_REMOVE_ALL),
+COMPATIBLE_IOCTL(DM_DEV_CREATE),
+COMPATIBLE_IOCTL(DM_DEV_REMOVE),
+COMPATIBLE_IOCTL(DM_TABLE_LOAD),
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND),
+COMPATIBLE_IOCTL(DM_DEV_RENAME),
+COMPATIBLE_IOCTL(DM_TABLE_DEPS),
+COMPATIBLE_IOCTL(DM_DEV_STATUS),
+COMPATIBLE_IOCTL(DM_TABLE_STATUS),
+COMPATIBLE_IOCTL(DM_DEV_WAIT),
+COMPATIBLE_IOCTL(DM_LIST_DEVICES),
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR),
+#endif /* CONFIG_BLK_DEV_DM */
 /* Remove *PRIVATE in 2.5 */
 COMPATIBLE_IOCTL(SIOCDEVPRIVATE),
 COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1),
--- diff/arch/s390x/kernel/ioctl32.c	2003-08-26 13:50:04.000000000 +0100
+++ source/arch/s390x/kernel/ioctl32.c	2003-12-09 11:04:13.000000000 +0000
@@ -30,6 +30,7 @@
 #include <linux/blk.h>
 #include <linux/elevator.h>
 #include <linux/raw.h>
+#include <linux/dm-ioctl.h>
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/dasd.h>
@@ -627,6 +628,20 @@
 
 	IOCTL32_DEFAULT(SIOCGSTAMP),
 
+	IOCTL32_DEFAULT(DM_VERSION),
+	IOCTL32_DEFAULT(DM_REMOVE_ALL),
+	IOCTL32_DEFAULT(DM_DEV_CREATE),
+	IOCTL32_DEFAULT(DM_DEV_REMOVE),
+	IOCTL32_DEFAULT(DM_TABLE_LOAD),
+	IOCTL32_DEFAULT(DM_DEV_SUSPEND),
+	IOCTL32_DEFAULT(DM_DEV_RENAME),
+	IOCTL32_DEFAULT(DM_TABLE_DEPS),
+	IOCTL32_DEFAULT(DM_DEV_STATUS),
+	IOCTL32_DEFAULT(DM_TABLE_STATUS),
+	IOCTL32_DEFAULT(DM_DEV_WAIT),
+	IOCTL32_DEFAULT(DM_LIST_DEVICES),
+	IOCTL32_DEFAULT(DM_TABLE_CLEAR),
+
 	IOCTL32_DEFAULT(LOOP_SET_FD),
 	IOCTL32_DEFAULT(LOOP_CLR_FD),
 
--- diff/arch/sparc64/kernel/ioctl32.c	2003-12-09 10:25:25.000000000 +0000
+++ source/arch/sparc64/kernel/ioctl32.c	2003-12-09 11:04:13.000000000 +0000
@@ -56,6 +56,7 @@
 #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
 #include <linux/lvm.h>
 #endif /* LVM */
+#include <linux/dm-ioctl.h>
 
 #include <scsi/scsi.h>
 /* Ugly hack. */
@@ -5086,6 +5087,22 @@
 COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
 COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS)
 COMPATIBLE_IOCTL(NBD_DISCONNECT)
+/* device-mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION)
+COMPATIBLE_IOCTL(DM_REMOVE_ALL)
+COMPATIBLE_IOCTL(DM_DEV_CREATE)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
+COMPATIBLE_IOCTL(DM_DEV_RENAME)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS)
+COMPATIBLE_IOCTL(DM_DEV_STATUS)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS)
+COMPATIBLE_IOCTL(DM_DEV_WAIT)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
+#endif /* CONFIG_BLK_DEV_DM */
 /* Linux-1394 */
 #if defined(CONFIG_IEEE1394) || defined(CONFIG_IEEE1394_MODULE)
 COMPATIBLE_IOCTL(AMDTP_IOC_CHANNEL)
--- diff/arch/x86_64/ia32/ia32_ioctl.c	2003-12-09 10:25:25.000000000 +0000
+++ source/arch/x86_64/ia32/ia32_ioctl.c	2003-12-09 11:04:14.000000000 +0000
@@ -67,6 +67,7 @@
 #define max max
 #include <linux/lvm.h>
 #endif /* LVM */
+#include <linux/dm-ioctl.h>
 
 #include <scsi/scsi.h>
 /* Ugly hack. */
@@ -4051,6 +4052,22 @@
 COMPATIBLE_IOCTL(LV_BMAP)
 COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
 #endif /* LVM */
+/* Device-Mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION)
+COMPATIBLE_IOCTL(DM_REMOVE_ALL)
+COMPATIBLE_IOCTL(DM_DEV_CREATE)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
+COMPATIBLE_IOCTL(DM_DEV_RENAME)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS)
+COMPATIBLE_IOCTL(DM_DEV_STATUS)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS)
+COMPATIBLE_IOCTL(DM_DEV_WAIT)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
+#endif /* CONFIG_BLK_DEV_DM */
 #ifdef CONFIG_AUTOFS_FS
 COMPATIBLE_IOCTL(AUTOFS_IOC_READY)
 COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
--- diff/drivers/md/Makefile	2003-12-09 11:03:47.000000000 +0000
+++ source/drivers/md/Makefile	2003-12-09 11:02:02.000000000 +0000
@@ -8,7 +8,8 @@
 
 list-multi	:= lvm-mod.o dm-mod.o dm-mirror-mod.o
 lvm-mod-objs	:= lvm.o lvm-snap.o lvm-fs.o
-dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o
+dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
+		   dm-ioctl.o
 
 # Note: link order is important.  All raid personalities
 # and xor.o must come before md.o, as they each initialise 
--- diff/drivers/md/dm-ioctl.c	1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-ioctl.c	2003-12-09 11:01:41.000000000 +0000
@@ -0,0 +1,1284 @@
+/*
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/dm-ioctl.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/blk.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
+
+/*-----------------------------------------------------------------
+ * The ioctl interface needs to be able to look up devices by
+ * name or uuid.
+ *---------------------------------------------------------------*/
+struct hash_cell {
+	struct list_head name_list;
+	struct list_head uuid_list;
+
+	char *name;
+	char *uuid;
+	struct mapped_device *md;
+	struct dm_table *new_map;
+
+	/* I hate devfs */
+	devfs_handle_t devfs_entry;
+};
+
+#define NUM_BUCKETS 64
+#define MASK_BUCKETS (NUM_BUCKETS - 1)
+static struct list_head _name_buckets[NUM_BUCKETS];
+static struct list_head _uuid_buckets[NUM_BUCKETS];
+
+static devfs_handle_t _dev_dir;
+void dm_hash_remove_all(void);
+
+/*
+ * Guards access to both hash tables.
+ */
+static DECLARE_RWSEM(_hash_lock);
+
+static void init_buckets(struct list_head *buckets)
+{
+	unsigned int i;
+
+	for (i = 0; i < NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(buckets + i);
+}
+
+int dm_hash_init(void)
+{
+	init_buckets(_name_buckets);
+	init_buckets(_uuid_buckets);
+	_dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
+	return 0;
+}
+
+void dm_hash_exit(void)
+{
+	dm_hash_remove_all();
+	devfs_unregister(_dev_dir);
+}
+
+/*-----------------------------------------------------------------
+ * Hash function:
+ * We're not really concerned with the str hash function being
+ * fast since it's only used by the ioctl interface.
+ *---------------------------------------------------------------*/
+static unsigned int hash_str(const char *str)
+{
+	const unsigned int hash_mult = 2654435387U;
+	unsigned int h = 0;
+
+	while (*str)
+		h = (h + (unsigned int) *str++) * hash_mult;
+
+	return h & MASK_BUCKETS;
+}
+
+/*-----------------------------------------------------------------
+ * Code for looking up a device by name
+ *---------------------------------------------------------------*/
+static struct hash_cell *__get_name_cell(const char *str)
+{
+	struct list_head *tmp;
+	struct hash_cell *hc;
+	unsigned int h = hash_str(str);
+
+	list_for_each (tmp, _name_buckets + h) {
+		hc = list_entry(tmp, struct hash_cell, name_list);
+		if (!strcmp(hc->name, str))
+			return hc;
+	}
+
+	return NULL;
+}
+
+static struct hash_cell *__get_uuid_cell(const char *str)
+{
+	struct list_head *tmp;
+	struct hash_cell *hc;
+	unsigned int h = hash_str(str);
+
+	list_for_each (tmp, _uuid_buckets + h) {
+		hc = list_entry(tmp, struct hash_cell, uuid_list);
+		if (!strcmp(hc->uuid, str))
+			return hc;
+	}
+
+	return NULL;
+}
+
+/*-----------------------------------------------------------------
+ * Inserting, removing and renaming a device.
+ *---------------------------------------------------------------*/
+static inline char *kstrdup(const char *str)
+{
+	char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
+	if (r)
+		strcpy(r, str);
+	return r;
+}
+
+static struct hash_cell *alloc_cell(const char *name, const char *uuid,
+				    struct mapped_device *md)
+{
+	struct hash_cell *hc;
+
+	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
+	if (!hc)
+		return NULL;
+
+	hc->name = kstrdup(name);
+	if (!hc->name) {
+		kfree(hc);
+		return NULL;
+	}
+
+	if (!uuid)
+		hc->uuid = NULL;
+
+	else {
+		hc->uuid = kstrdup(uuid);
+		if (!hc->uuid) {
+			kfree(hc->name);
+			kfree(hc);
+			return NULL;
+		}
+	}
+
+	INIT_LIST_HEAD(&hc->name_list);
+	INIT_LIST_HEAD(&hc->uuid_list);
+	hc->md = md;
+	hc->new_map = NULL;
+	return hc;
+}
+
+static void free_cell(struct hash_cell *hc)
+{
+	if (hc) {
+		kfree(hc->name);
+		kfree(hc->uuid);
+		kfree(hc);
+	}
+}
+
+/*
+ * devfs stuff.
+ */
+static int register_with_devfs(struct hash_cell *hc)
+{
+	kdev_t dev = dm_kdev(hc->md);
+
+	hc->devfs_entry =
+	    devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
+			   major(dev), minor(dev),
+			   S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+			   &dm_blk_dops, NULL);
+
+	return 0;
+}
+
+static int unregister_with_devfs(struct hash_cell *hc)
+{
+	devfs_unregister(hc->devfs_entry);
+	return 0;
+}
+
+/*
+ * The kdev_t and uuid of a device can never change once it is
+ * initially inserted.
+ */
+int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
+{
+	struct hash_cell *cell;
+
+	/*
+	 * Allocate the new cells.
+	 */
+	cell = alloc_cell(name, uuid, md);
+	if (!cell)
+		return -ENOMEM;
+
+	/*
+	 * Insert the cell into both hash tables.
+	 */
+	down_write(&_hash_lock);
+	if (__get_name_cell(name))
+		goto bad;
+
+	list_add(&cell->name_list, _name_buckets + hash_str(name));
+
+	if (uuid) {
+		if (__get_uuid_cell(uuid)) {
+			list_del(&cell->name_list);
+			goto bad;
+		}
+		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
+	}
+	register_with_devfs(cell);
+	dm_get(md);
+	up_write(&_hash_lock);
+
+	return 0;
+
+      bad:
+	up_write(&_hash_lock);
+	free_cell(cell);
+	return -EBUSY;
+}
+
+void __hash_remove(struct hash_cell *hc)
+{
+	/* remove from the dev hash */
+	list_del(&hc->uuid_list);
+	list_del(&hc->name_list);
+	unregister_with_devfs(hc);
+	dm_put(hc->md);
+	if (hc->new_map)
+		dm_table_put(hc->new_map);
+	free_cell(hc);
+}
+
+void dm_hash_remove_all(void)
+{
+	int i;
+	struct hash_cell *hc;
+	struct list_head *tmp, *n;
+
+	down_write(&_hash_lock);
+	for (i = 0; i < NUM_BUCKETS; i++) {
+		list_for_each_safe (tmp, n, _name_buckets + i) {
+			hc = list_entry(tmp, struct hash_cell, name_list);
+			__hash_remove(hc);
+		}
+	}
+	up_write(&_hash_lock);
+}
+
+int dm_hash_rename(const char *old, const char *new)
+{
+	char *new_name, *old_name;
+	struct hash_cell *hc;
+
+	/*
+	 * duplicate new.
+	 */
+	new_name = kstrdup(new);
+	if (!new_name)
+		return -ENOMEM;
+
+	down_write(&_hash_lock);
+
+	/*
+	 * Is new free ?
+	 */
+	hc = __get_name_cell(new);
+	if (hc) {
+		DMWARN("asked to rename to an already existing name %s -> %s",
+		       old, new);
+		up_write(&_hash_lock);
+		kfree(new_name);
+		return -EBUSY;
+	}
+
+	/*
+	 * Is there such a device as 'old' ?
+	 */
+	hc = __get_name_cell(old);
+	if (!hc) {
+		DMWARN("asked to rename a non existent device %s -> %s",
+		       old, new);
+		up_write(&_hash_lock);
+		kfree(new_name);
+		return -ENXIO;
+	}
+
+	/*
+	 * rename and move the name cell.
+	 */
+	list_del(&hc->name_list);
+	old_name = hc->name;
+	hc->name = new_name;
+	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
+
+	/* rename the device node in devfs */
+	unregister_with_devfs(hc);
+	register_with_devfs(hc);
+
+	up_write(&_hash_lock);
+	kfree(old_name);
+	return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Implementation of the ioctl commands
+ *---------------------------------------------------------------*/
+/*
+ * All the ioctl commands get dispatched to functions with this
+ * prototype.
+ */
+typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
+
+static int remove_all(struct dm_ioctl *param, size_t param_size)
+{
+	dm_hash_remove_all();
+	param->data_size = 0;
+	return 0;
+}
+
+/*
+ * Round up the ptr to an 8-byte boundary.
+ */
+#define ALIGN_MASK 7
+static inline void *align_ptr(void *ptr)
+{
+	return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
+}
+
+/*
+ * Retrieves the data payload buffer from an already allocated
+ * struct dm_ioctl.
+ */
+static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
+			       size_t *len)
+{
+	param->data_start = align_ptr(param + 1) - (void *) param;
+
+	if (param->data_start < param_size)
+		*len = param_size - param->data_start;
+	else
+		*len = 0;
+
+	return ((void *) param) + param->data_start;
+}
+
+static int list_devices(struct dm_ioctl *param, size_t param_size)
+{
+	unsigned int i;
+	struct hash_cell *hc;
+	size_t len, needed = 0;
+	struct dm_name_list *nl, *old_nl = NULL;
+
+	down_write(&_hash_lock);
+
+	/*
+	 * Loop through all the devices working out how much
+	 * space we need.
+	 */
+	for (i = 0; i < NUM_BUCKETS; i++) {
+		list_for_each_entry (hc, _name_buckets + i, name_list) {
+			needed += sizeof(struct dm_name_list);
+			needed += strlen(hc->name);
+			needed += ALIGN_MASK;
+		}
+	}
+
+	/*
+	 * Grab our output buffer.
+	 */
+	nl = get_result_buffer(param, param_size, &len);
+	if (len < needed) {
+		param->flags |= DM_BUFFER_FULL_FLAG;
+		goto out;
+	}
+	param->data_size = param->data_start + needed;
+
+	nl->dev = 0;	/* Flags no data */
+
+	/*
+	 * Now loop through filling out the names.
+	 */
+	for (i = 0; i < NUM_BUCKETS; i++) {
+		list_for_each_entry (hc, _name_buckets + i, name_list) {
+			if (old_nl)
+				old_nl->next = (uint32_t) ((void *) nl -
+							   (void *) old_nl);
+
+			nl->dev = dm_kdev(hc->md);
+			nl->next = 0;
+			strcpy(nl->name, hc->name);
+
+			old_nl = nl;
+			nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
+		}
+	}
+
+ out:
+	up_write(&_hash_lock);
+	return 0;
+}
+
+static int check_name(const char *name)
+{
+	if (strchr(name, '/')) {
+		DMWARN("invalid device name");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Fills in a dm_ioctl structure, ready for sending back to
+ * userland.
+ */
+static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
+{
+	kdev_t dev = dm_kdev(md);
+	struct dm_table *table;
+	struct block_device *bdev;
+
+	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
+			  DM_ACTIVE_PRESENT_FLAG);
+
+	if (dm_suspended(md))
+		param->flags |= DM_SUSPEND_FLAG;
+
+	param->dev = kdev_t_to_nr(dev);
+
+	if (is_read_only(dev))
+		param->flags |= DM_READONLY_FLAG;
+
+	param->event_nr = dm_get_event_nr(md);
+
+	table = dm_get_table(md);
+	if (table) {
+		param->flags |= DM_ACTIVE_PRESENT_FLAG;
+		param->target_count = dm_table_get_num_targets(table);
+		dm_table_put(table);
+	} else
+		param->target_count = 0;
+
+	bdev = bdget(param->dev);
+	if (!bdev)
+		return -ENXIO;
+	param->open_count = bdev->bd_openers;
+	bdput(bdev);
+
+	return 0;
+}
+
+static int dev_create(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	kdev_t dev = 0;
+	struct mapped_device *md;
+
+	r = check_name(param->name);
+	if (r)
+		return r;
+
+	if (param->flags & DM_PERSISTENT_DEV_FLAG)
+		dev = to_kdev_t(param->dev);
+
+	r = dm_create(dev, &md);
+	if (r)
+		return r;
+
+	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
+	if (r) {
+		dm_put(md);
+		return r;
+	}
+
+	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+	r = __dev_status(md, param);
+	dm_put(md);
+
+	return r;
+}
+
+/*
+ * Always use UUID for lookups if it's present, otherwise use name.
+ */
+static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
+{
+	return *param->uuid ?
+	    __get_uuid_cell(param->uuid) : __get_name_cell(param->name);
+}
+
+static inline struct mapped_device *find_device(struct dm_ioctl *param)
+{
+	struct hash_cell *hc;
+	struct mapped_device *md = NULL;
+
+	down_read(&_hash_lock);
+	hc = __find_device_hash_cell(param);
+	if (hc) {
+		md = hc->md;
+
+		/*
+		 * Sneakily write in both the name and the uuid
+		 * while we have the cell.
+		 */
+		strncpy(param->name, hc->name, sizeof(param->name));
+		if (hc->uuid)
+			strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
+		else
+			param->uuid[0] = '\0';
+
+		if (hc->new_map)
+			param->flags |= DM_INACTIVE_PRESENT_FLAG;
+		else
+			param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+		dm_get(md);
+	}
+	up_read(&_hash_lock);
+
+	return md;
+}
+
+static int dev_remove(struct dm_ioctl *param, size_t param_size)
+{
+	struct hash_cell *hc;
+
+	down_write(&_hash_lock);
+	hc = __find_device_hash_cell(param);
+
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	__hash_remove(hc);
+	up_write(&_hash_lock);
+	param->data_size = 0;
+	return 0;
+}
+
+/*
+ * Check a string doesn't overrun the chunk of
+ * memory we copied from userland.
+ */
+static int invalid_str(char *str, void *end)
+{
+	while ((void *) str < end)
+		if (!*str++)
+			return 0;
+
+	return -EINVAL;
+}
+
+static int dev_rename(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	char *new_name = (char *) param + param->data_start;
+
+	if (new_name < (char *) (param + 1) ||
+	    invalid_str(new_name, (void *) param + param_size)) {
+		DMWARN("Invalid new logical volume name supplied.");
+		return -EINVAL;
+	}
+
+	r = check_name(new_name);
+	if (r)
+		return r;
+
+	param->data_size = 0;
+	return dm_hash_rename(param->name, new_name);
+}
+
+static int do_suspend(struct dm_ioctl *param)
+{
+	int r = 0;
+	struct mapped_device *md;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	if (!dm_suspended(md))
+		r = dm_suspend(md);
+
+	if (!r)
+		r = __dev_status(md, param);
+
+	dm_put(md);
+	return r;
+}
+
+static int do_resume(struct dm_ioctl *param)
+{
+	int r = 0;
+	struct hash_cell *hc;
+	struct mapped_device *md;
+	struct dm_table *new_map;
+
+	down_write(&_hash_lock);
+
+	hc = __find_device_hash_cell(param);
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	md = hc->md;
+	dm_get(md);
+
+	new_map = hc->new_map;
+	hc->new_map = NULL;
+	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+	up_write(&_hash_lock);
+
+	/* Do we need to load a new map ? */
+	if (new_map) {
+		/* Suspend if it isn't already suspended */
+		if (!dm_suspended(md))
+			dm_suspend(md);
+
+		r = dm_swap_table(md, new_map);
+		if (r) {
+			dm_put(md);
+			dm_table_put(new_map);
+			return r;
+		}
+
+		if (dm_table_get_mode(new_map) & FMODE_WRITE)
+			set_device_ro(dm_kdev(md), 0);
+		else
+			set_device_ro(dm_kdev(md), 1);
+
+		dm_table_put(new_map);
+	}
+
+	if (dm_suspended(md))
+		r = dm_resume(md);
+
+	if (!r)
+		r = __dev_status(md, param);
+
+	dm_put(md);
+	return r;
+}
+
+/*
+ * Set or unset the suspension state of a device.
+ * If the device already is in the requested state we just return its status.
+ */
+static int dev_suspend(struct dm_ioctl *param, size_t param_size)
+{
+	if (param->flags & DM_SUSPEND_FLAG)
+		return do_suspend(param);
+
+	return do_resume(param);
+}
+
+/*
+ * Copies device info back to user space, used by
+ * the create and info ioctls.
+ */
+static int dev_status(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	r = __dev_status(md, param);
+	dm_put(md);
+	return r;
+}
+
+/*
+ * Build up the status struct for each target
+ */
+static void retrieve_status(struct dm_table *table, struct dm_ioctl *param,
+			    size_t param_size)
+{
+	unsigned int i, num_targets;
+	struct dm_target_spec *spec;
+	char *outbuf, *outptr;
+	status_type_t type;
+	size_t remaining, len, used = 0;
+
+	outptr = outbuf = get_result_buffer(param, param_size, &len);
+
+	if (param->flags & DM_STATUS_TABLE_FLAG)
+		type = STATUSTYPE_TABLE;
+	else
+		type = STATUSTYPE_INFO;
+
+	/* Get all the target info */
+	num_targets = dm_table_get_num_targets(table);
+	for (i = 0; i < num_targets; i++) {
+		struct dm_target *ti = dm_table_get_target(table, i);
+
+		remaining = len - (outptr - outbuf);
+		if (remaining < sizeof(struct dm_target_spec)) {
+			param->flags |= DM_BUFFER_FULL_FLAG;
+			break;
+		}
+
+		spec = (struct dm_target_spec *) outptr;
+
+		spec->status = 0;
+		spec->sector_start = ti->begin;
+		spec->length = ti->len;
+		strncpy(spec->target_type, ti->type->name,
+			sizeof(spec->target_type));
+
+		outptr += sizeof(struct dm_target_spec);
+		remaining = len - (outptr - outbuf);
+
+		/* Get the status/table string from the target driver */
+		if (ti->type->status) {
+			if (ti->type->status(ti, type, outptr, remaining)) {
+				param->flags |= DM_BUFFER_FULL_FLAG;
+				break;
+			}
+		} else
+			outptr[0] = '\0';
+
+		outptr += strlen(outptr) + 1;
+		used = param->data_start + (outptr - outbuf);
+
+		align_ptr(outptr);
+		spec->next = outptr - outbuf;
+	}
+
+	if (used)
+		param->data_size = used;
+
+	param->target_count = num_targets;
+}
+
+/*
+ * Wait for a device to report an event
+ */
+static int dev_wait(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+	struct dm_table *table;
+	DECLARE_WAITQUEUE(wq, current);
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	/*
+	 * Wait for a notification event
+	 */
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (!dm_add_wait_queue(md, &wq, param->event_nr)) {
+		schedule();
+		dm_remove_wait_queue(md, &wq);
+	}
+	set_current_state(TASK_RUNNING);
+
+	/*
+	 * The userland program is going to want to know what
+	 * changed to trigger the event, so we may as well tell
+	 * him and save an ioctl.
+	 */
+	r = __dev_status(md, param);
+	if (r)
+		goto out;
+
+	table = dm_get_table(md);
+	if (table) {
+		retrieve_status(table, param, param_size);
+		dm_table_put(table);
+	}
+
+ out:
+	dm_put(md);
+	return r;
+}
+
+static inline int get_mode(struct dm_ioctl *param)
+{
+	int mode = FMODE_READ | FMODE_WRITE;
+
+	if (param->flags & DM_READONLY_FLAG)
+		mode = FMODE_READ;
+
+	return mode;
+}
+
+static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
+		       struct dm_target_spec **spec, char **target_params)
+{
+	*spec = (struct dm_target_spec *) ((unsigned char *) last + next);
+	*target_params = (char *) (*spec + 1);
+
+	if (*spec < (last + 1))
+		return -EINVAL;
+
+	return invalid_str(*target_params, end);
+}
+
+static int populate_table(struct dm_table *table, struct dm_ioctl *param,
+			  size_t param_size)
+{
+	int r;
+	unsigned int i = 0;
+	struct dm_target_spec *spec = (struct dm_target_spec *) param;
+	uint32_t next = param->data_start;
+	void *end = (void *) param + param_size;
+	char *target_params;
+
+	if (!param->target_count) {
+		DMWARN("populate_table: no targets specified");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < param->target_count; i++) {
+
+		r = next_target(spec, next, end, &spec, &target_params);
+		if (r) {
+			DMWARN("unable to find target");
+			return r;
+		}
+
+		r = dm_table_add_target(table, spec->target_type,
+					(sector_t) spec->sector_start,
+					(sector_t) spec->length,
+					target_params);
+		if (r) {
+			DMWARN("error adding target to table");
+			return r;
+		}
+
+		next = spec->next;
+	}
+
+	return dm_table_complete(table);
+}
+
+static int table_load(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct hash_cell *hc;
+	struct dm_table *t;
+
+	r = dm_table_create(&t, get_mode(param), param->target_count);
+	if (r)
+		return r;
+
+	r = populate_table(t, param, param_size);
+	if (r) {
+		dm_table_put(t);
+		return r;
+	}
+
+	down_write(&_hash_lock);
+	hc = __find_device_hash_cell(param);
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	if (hc->new_map)
+		dm_table_put(hc->new_map);
+	hc->new_map = t;
+	param->flags |= DM_INACTIVE_PRESENT_FLAG;
+
+	r = __dev_status(hc->md, param);
+	up_write(&_hash_lock);
+	return r;
+}
+
+static int table_clear(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct hash_cell *hc;
+
+	down_write(&_hash_lock);
+
+	hc = __find_device_hash_cell(param);
+	if (!hc) {
+		DMWARN("device doesn't appear to be in the dev hash table.");
+		up_write(&_hash_lock);
+		return -ENXIO;
+	}
+
+	if (hc->new_map) {
+		dm_table_put(hc->new_map);
+		hc->new_map = NULL;
+	}
+
+	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+	r = __dev_status(hc->md, param);
+	up_write(&_hash_lock);
+	return r;
+}
+
+/*
+ * Retrieves a list of devices used by a particular dm device.
+ */
+static void retrieve_deps(struct dm_table *table, struct dm_ioctl *param,
+			  size_t param_size)
+{
+	unsigned int count = 0;
+	struct list_head *tmp;
+	size_t len, needed;
+	struct dm_target_deps *deps;
+
+	deps = get_result_buffer(param, param_size, &len);
+
+	/*
+	 * Count the devices.
+	 */
+	list_for_each(tmp, dm_table_get_devices(table))
+		count++;
+
+	/*
+	 * Check we have enough space.
+	 */
+	needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
+	if (len < needed) {
+		param->flags |= DM_BUFFER_FULL_FLAG;
+		return;
+	}
+
+	/*
+	 * Fill in the devices.
+	 */
+	deps->count = count;
+	count = 0;
+	list_for_each(tmp, dm_table_get_devices(table)) {
+		struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+		deps->dev[count++] = dd->bdev->bd_dev;
+	}
+
+	param->data_size = param->data_start + needed;
+}
+
+static int table_deps(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+	struct dm_table *table;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	r = __dev_status(md, param);
+	if (r)
+		goto out;
+
+	table = dm_get_table(md);
+	if (table) {
+		retrieve_deps(table, param, param_size);
+		dm_table_put(table);
+	}
+
+ out:
+	dm_put(md);
+	return r;
+}
+
+/*
+ * Return the status of a device as a text string for each
+ * target.
+ */
+static int table_status(struct dm_ioctl *param, size_t param_size)
+{
+	int r;
+	struct mapped_device *md;
+	struct dm_table *table;
+
+	md = find_device(param);
+	if (!md)
+		return -ENXIO;
+
+	r = __dev_status(md, param);
+	if (r)
+		goto out;
+ 
+	table = dm_get_table(md);
+	if (table) {
+		retrieve_status(table, param, param_size);
+		dm_table_put(table);
+	}
+
+ out:
+	dm_put(md);
+	return r;
+}
+
+/*-----------------------------------------------------------------
+ * Implementation of open/close/ioctl on the special char
+ * device.
+ *---------------------------------------------------------------*/
+static ioctl_fn lookup_ioctl(unsigned int cmd)
+{
+	static struct {
+		int cmd;
+		ioctl_fn fn;
+	} _ioctls[] = {
+		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
+		{DM_REMOVE_ALL_CMD, remove_all},
+		{DM_LIST_DEVICES_CMD, list_devices},
+
+		{DM_DEV_CREATE_CMD, dev_create},
+		{DM_DEV_REMOVE_CMD, dev_remove},
+		{DM_DEV_RENAME_CMD, dev_rename},
+		{DM_DEV_SUSPEND_CMD, dev_suspend},
+		{DM_DEV_STATUS_CMD, dev_status},
+		{DM_DEV_WAIT_CMD, dev_wait},
+
+		{DM_TABLE_LOAD_CMD, table_load},
+		{DM_TABLE_CLEAR_CMD, table_clear},
+		{DM_TABLE_DEPS_CMD, table_deps},
+		{DM_TABLE_STATUS_CMD, table_status}
+	};
+
+	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
+}
+
+/*
+ * As well as checking the version compatibility this always
+ * copies the kernel interface version out.
+ */
+static int check_version(unsigned int cmd, struct dm_ioctl *user)
+{
+	uint32_t version[3];
+	int r = 0;
+
+	if (copy_from_user(version, user->version, sizeof(version)))
+		return -EFAULT;
+
+	if ((DM_VERSION_MAJOR != version[0]) ||
+	    (DM_VERSION_MINOR < version[1])) {
+		DMWARN("ioctl interface mismatch: "
+		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
+		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
+		       DM_VERSION_PATCHLEVEL,
+		       version[0], version[1], version[2], cmd);
+		r = -EINVAL;
+	}
+
+	/*
+	 * Fill in the kernel version.
+	 */
+	version[0] = DM_VERSION_MAJOR;
+	version[1] = DM_VERSION_MINOR;
+	version[2] = DM_VERSION_PATCHLEVEL;
+	if (copy_to_user(user->version, version, sizeof(version)))
+		return -EFAULT;
+
+	return r;
+}
+
+static void free_params(struct dm_ioctl *param)
+{
+	vfree(param);
+}
+
+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
+{
+	struct dm_ioctl tmp, *dmi;
+
+	if (copy_from_user(&tmp, user, sizeof(tmp)))
+		return -EFAULT;
+
+	if (tmp.data_size < sizeof(tmp))
+		return -EINVAL;
+
+	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+	if (!dmi)
+		return -ENOMEM;
+
+	if (copy_from_user(dmi, user, tmp.data_size)) {
+		vfree(dmi);
+		return -EFAULT;
+	}
+
+	*param = dmi;
+	return 0;
+}
+
+static int validate_params(uint cmd, struct dm_ioctl *param)
+{
+	/* Always clear this flag */
+	param->flags &= ~DM_BUFFER_FULL_FLAG;
+
+	/* Ignores parameters */
+	if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
+		return 0;
+
+	/* Unless creating, either name or uuid but not both */
+	if (cmd != DM_DEV_CREATE_CMD) {
+		if ((!*param->uuid && !*param->name) ||
+		    (*param->uuid && *param->name)) {
+			DMWARN("one of name or uuid must be supplied, cmd(%u)",
+			       cmd);
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure strings are terminated */
+	param->name[DM_NAME_LEN - 1] = '\0';
+	param->uuid[DM_UUID_LEN - 1] = '\0';
+
+	return 0;
+}
+
+static int ctl_ioctl(struct inode *inode, struct file *file,
+		     uint command, ulong u)
+{
+	int r = 0;
+	unsigned int cmd;
+	struct dm_ioctl *param;
+	struct dm_ioctl *user = (struct dm_ioctl *) u;
+	ioctl_fn fn = NULL;
+	size_t param_size;
+
+	/* only root can play with this */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (_IOC_TYPE(command) != DM_IOCTL)
+		return -ENOTTY;
+
+	cmd = _IOC_NR(command);
+
+	/*
+	 * Check the interface version passed in.  This also
+	 * writes out the kernel's interface version.
+	 */
+	r = check_version(cmd, user);
+	if (r)
+		return r;
+
+	/*
+	 * Nothing more to do for the version command.
+	 */
+	if (cmd == DM_VERSION_CMD)
+		return 0;
+
+	fn = lookup_ioctl(cmd);
+	if (!fn) {
+		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
+		return -ENOTTY;
+	}
+
+	/*
+	 * FIXME: I don't like this, we're trying to avoid low
+	 * memory issues when a device is suspended.
+	 */
+	current->flags |= PF_MEMALLOC;
+
+	/*
+	 * Copy the parameters into kernel space.
+	 */
+	r = copy_params(user, &param);
+	if (r) {
+		current->flags &= ~PF_MEMALLOC;
+		return r;
+	}
+
+	r = validate_params(cmd, param);
+	if (r)
+		goto out;
+
+	param_size = param->data_size;
+	param->data_size = sizeof(*param);
+	r = fn(param, param_size);
+
+	/*
+	 * Copy the results back to userland.
+	 */
+	if (!r && copy_to_user(user, param, param->data_size))
+		r = -EFAULT;
+
+ out:
+	free_params(param);
+	current->flags &= ~PF_MEMALLOC;
+	return r;
+}
+
+static struct file_operations _ctl_fops = {
+	.ioctl	 = ctl_ioctl,
+	.owner	 = THIS_MODULE,
+};
+
+static devfs_handle_t _ctl_handle;
+
+static struct miscdevice _dm_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name  = DM_NAME,
+	.fops  = &_ctl_fops
+};
+
+/*
+ * Create misc character device and link to DM_DIR/control.
+ */
+int __init dm_interface_init(void)
+{
+	int r;
+	char rname[64];
+
+	r = dm_hash_init();
+	if (r)
+		return r;
+
+	r = misc_register(&_dm_misc);
+	if (r) {
+		DMERR("misc_register failed for control device");
+		dm_hash_exit();
+		return r;
+	}
+
+	r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
+				sizeof rname - 3);
+	if (r == -ENOSYS)
+		goto done;	/* devfs not present */
+
+	if (r < 0) {
+		DMERR("devfs_generate_path failed for control device");
+		goto failed;
+	}
+
+	strncpy(rname + r, "../", 3);
+	r = devfs_mk_symlink(NULL, DM_DIR "/control",
+			     DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
+	if (r) {
+		DMERR("devfs_mk_symlink failed for control device");
+		goto failed;
+	}
+	devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
+
+      done:
+	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
+	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
+	       DM_DRIVER_EMAIL);
+	return 0;
+
+      failed:
+	misc_deregister(&_dm_misc);
+	dm_hash_exit();
+	return r;
+}
+
+void dm_interface_exit(void)
+{
+	if (misc_deregister(&_dm_misc) < 0)
+		DMERR("misc_deregister failed for control device");
+
+	dm_hash_exit();
+}
--- diff/include/linux/dm-ioctl.h	1970-01-01 01:00:00.000000000 +0100
+++ source/include/linux/dm-ioctl.h	2003-12-09 11:01:41.000000000 +0000
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_H
+#define _LINUX_DM_IOCTL_H
+
+#include <linux/types.h>
+
+#define DM_DIR "mapper"		/* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables.  Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled.  The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed.  Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device.  If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_.  Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device.  This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device.  The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start.  If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+	/*
+	 * The version number is made up of three parts:
+	 * major - no backward or forward compatibility,
+	 * minor - only backwards compatible,
+	 * patch - both backwards and forwards compatible.
+	 *
+	 * All clients of the ioctl interface should fill in the
+	 * version number of the interface that they were
+	 * compiled with.
+	 *
+	 * All recognised ioctl commands (ie. those that don't
+	 * return -ENOTTY) fill out this field, even if the
+	 * command failed.
+	 */
+	uint32_t version[3];	/* in/out */
+	uint32_t data_size;	/* total size of data passed in
+				 * including this struct */
+
+	uint32_t data_start;	/* offset to start of data
+				 * relative to start of this struct */
+
+	uint32_t target_count;	/* in/out */
+	int32_t open_count;	/* out */
+	uint32_t flags;		/* in/out */
+	uint32_t event_nr;      /* in/out */
+	uint32_t padding;
+
+	uint64_t dev;		/* in/out */
+
+	char name[DM_NAME_LEN];	/* device name */
+	char uuid[DM_UUID_LEN];	/* unique identifier for
+				 * the block device */
+};
+
+/*
+ * Used to specify tables.  These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+	uint64_t sector_start;
+	uint64_t length;
+	int32_t status;		/* used when reading from kernel only */
+
+	/*
+	 * Offset in bytes (from the start of this struct) to
+	 * next target_spec.
+	 */
+	uint32_t next;
+
+	char target_type[DM_MAX_TYPE_NAME];
+
+	/*
+	 * Parameter string starts immediately after this object.
+	 * Be careful to add padding after string to ensure correct
+	 * alignment of subsequent dm_target_spec.
+	 */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+	uint32_t count;		/* Array size */
+	uint32_t padding;	/* unused */
+	uint64_t dev[0];	/* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+	uint64_t dev;
+	uint32_t next;		/* offset to the next record from
+				   the _start_ of this */
+	char name[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+	/* Top level cmds */
+	DM_VERSION_CMD = 0,
+	DM_REMOVE_ALL_CMD,
+	DM_LIST_DEVICES_CMD,
+
+	/* device level cmds */
+	DM_DEV_CREATE_CMD,
+	DM_DEV_REMOVE_CMD,
+	DM_DEV_RENAME_CMD,
+	DM_DEV_SUSPEND_CMD,
+	DM_DEV_STATUS_CMD,
+	DM_DEV_WAIT_CMD,
+
+	/* Table level cmds */
+	DM_TABLE_LOAD_CMD,
+	DM_TABLE_CLEAR_CMD,
+	DM_TABLE_DEPS_CMD,
+	DM_TABLE_STATUS_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR	4
+#define DM_VERSION_MINOR	0
+#define DM_VERSION_PATCHLEVEL	5
+#define DM_VERSION_EXTRA	"-ioctl (2003-11-18)"
+
+/* Status bits */
+#define DM_READONLY_FLAG	(1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG		(1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG	(1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG	(1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG   (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG	(1 << 8) /* Out */
+
+#endif				/* _LINUX_DM_IOCTL_H */

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
                   ` (3 preceding siblings ...)
  2003-12-09 12:26 ` [Patch 4/4] dm: ioctl interface Joe Thornber
@ 2003-12-09 13:15 ` Marcelo Tosatti
  2003-12-09 13:45   ` Joe Thornber
  2003-12-09 19:50 ` William Lee Irwin III
  5 siblings, 1 reply; 55+ messages in thread
From: Marcelo Tosatti @ 2003-12-09 13:15 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List



On Tue, 9 Dec 2003, Joe Thornber wrote:

> Marcello,
> 
> This set of patches is the core of device mapper for 2.4.  I would
> appreciate it if you could merge these into 2.4.24 please.

Joe, 

I believe 2.6 is the right place for the device mapper. 


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 13:15 ` Device-mapper submission for 2.4 Marcelo Tosatti
@ 2003-12-09 13:45   ` Joe Thornber
  2003-12-09 14:00     ` Måns Rullgård
                       ` (2 more replies)
  0 siblings, 3 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 13:45 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Joe Thornber, Linux Mailing List

On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
> I believe 2.6 is the right place for the device mapper. 

So what's the difference between a new filesystem like XFS and a new
device driver like dm ?

- Joe

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 13:45   ` Joe Thornber
@ 2003-12-09 14:00     ` Måns Rullgård
  2003-12-09 14:10       ` Muli Ben-Yehuda
                         ` (2 more replies)
  2003-12-09 14:10     ` Marcelo Tosatti
  2003-12-09 14:23     ` Stefan Smietanowski
  2 siblings, 3 replies; 55+ messages in thread
From: Måns Rullgård @ 2003-12-09 14:00 UTC (permalink / raw)
  To: linux-kernel

Joe Thornber <thornber@sistina.com> writes:

>> I believe 2.6 is the right place for the device mapper. 
>
> So what's the difference between a new filesystem like XFS and a new
> device driver like dm ?

None.  Neither will go into 2.4, if I've understood things correctly.

-- 
Måns Rullgård
mru@kth.se


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 13:45   ` Joe Thornber
  2003-12-09 14:00     ` Måns Rullgård
@ 2003-12-09 14:10     ` Marcelo Tosatti
  2003-12-09 14:34       ` Joe Thornber
                         ` (2 more replies)
  2003-12-09 14:23     ` Stefan Smietanowski
  2 siblings, 3 replies; 55+ messages in thread
From: Marcelo Tosatti @ 2003-12-09 14:10 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List



On Tue, 9 Dec 2003, Joe Thornber wrote:

> On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
> > I believe 2.6 is the right place for the device mapper. 
> 
> So what's the difference between a new filesystem like XFS and a new
> device driver like dm ?

Expected question... 

XFS is a totally different filesystem from the ones present in 2.4. 

As far as I know, we already have the similar functionality in 2.4 with
LVM. Device mapper provides the same functionality but in a much cleaner
way. Is that right?


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:00     ` Måns Rullgård
@ 2003-12-09 14:10       ` Muli Ben-Yehuda
  2003-12-09 14:21         ` Måns Rullgård
  2003-12-09 14:16       ` Joe Thornber
  2003-12-09 14:24       ` Stefan Smietanowski
  2 siblings, 1 reply; 55+ messages in thread
From: Muli Ben-Yehuda @ 2003-12-09 14:10 UTC (permalink / raw)
  To: M?ns Rullg?rd; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 602 bytes --]

On Tue, Dec 09, 2003 at 03:00:15PM +0100, M?ns Rullg?rd wrote:
> Joe Thornber <thornber@sistina.com> writes:
> 
> >> I believe 2.6 is the right place for the device mapper. 
> >
> > So what's the difference between a new filesystem like XFS and a new
> > device driver like dm ?
> 
> None.  Neither will go into 2.4, if I've understood things
> correctly.

You haven't been following lkml, I guess. xfs has been merged to 2.4. 

Cheers, 
Muli 
-- 
Muli Ben-Yehuda
http://www.mulix.org | http://mulix.livejournal.com/

"the nucleus of linux oscillates my world" - gccbot@#offtopic


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:00     ` Måns Rullgård
  2003-12-09 14:10       ` Muli Ben-Yehuda
@ 2003-12-09 14:16       ` Joe Thornber
  2003-12-09 14:24       ` Stefan Smietanowski
  2 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 14:16 UTC (permalink / raw)
  To: M?ns Rullg?rd; +Cc: linux-kernel

On Tue, Dec 09, 2003 at 03:00:15PM +0100, M?ns Rullg?rd wrote:
> None.  Neither will go into 2.4, if I've understood things correctly.

http://kerneltrap.org/node/view/1751

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:10       ` Muli Ben-Yehuda
@ 2003-12-09 14:21         ` Måns Rullgård
  0 siblings, 0 replies; 55+ messages in thread
From: Måns Rullgård @ 2003-12-09 14:21 UTC (permalink / raw)
  To: linux-kernel

Muli Ben-Yehuda <mulix@mulix.org> writes:

>> >> I believe 2.6 is the right place for the device mapper. 
>> >
>> > So what's the difference between a new filesystem like XFS and a new
>> > device driver like dm ?
>> 
>> None.  Neither will go into 2.4, if I've understood things
>> correctly.
>
> You haven't been following lkml, I guess. xfs has been merged to 2.4. 

Yes, I see that now.  After Marcelo had said "no" about a hundred
times, I stopped reading anything with xfs in the subject.  I guess I
should have looked a little bit closer at that last subject.

-- 
Måns Rullgård
mru@kth.se


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 13:45   ` Joe Thornber
  2003-12-09 14:00     ` Måns Rullgård
  2003-12-09 14:10     ` Marcelo Tosatti
@ 2003-12-09 14:23     ` Stefan Smietanowski
  2003-12-09 14:36       ` Joe Thornber
  2 siblings, 1 reply; 55+ messages in thread
From: Stefan Smietanowski @ 2003-12-09 14:23 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

Joe Thornber wrote:

> On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
> 
>>I believe 2.6 is the right place for the device mapper. 
> 
> 
> So what's the difference between a new filesystem like XFS and a new
> device driver like dm ?

One thing you're missing is that after all, XFS has existed longer than
dm. Hell, XFS existed before 2.4 did (in a Linux form, I'm not talking
IRIX now).

XFS is also a new filesystem as you said but DM is meant as a
replacement for other functions, not strictly as an additive.

// Stefan


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:00     ` Måns Rullgård
  2003-12-09 14:10       ` Muli Ben-Yehuda
  2003-12-09 14:16       ` Joe Thornber
@ 2003-12-09 14:24       ` Stefan Smietanowski
  2 siblings, 0 replies; 55+ messages in thread
From: Stefan Smietanowski @ 2003-12-09 14:24 UTC (permalink / raw)
  To: Måns Rullgård; +Cc: linux-kernel

Måns Rullgård wrote:

> Joe Thornber <thornber@sistina.com> writes:
> 
> 
>>>I believe 2.6 is the right place for the device mapper. 
>>
>>So what's the difference between a new filesystem like XFS and a new
>>device driver like dm ?
> 
> 
> None.  Neither will go into 2.4, if I've understood things correctly.
> 

XFS is in latest BK.

// Stefan


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:10     ` Marcelo Tosatti
@ 2003-12-09 14:34       ` Joe Thornber
  2003-12-09 21:07         ` Paul Jakma
  2003-12-09 17:02       ` Bill Rugolsky Jr.
  2003-12-09 17:45       ` Kevin Corry
  2 siblings, 1 reply; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 14:34 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Joe Thornber, Linux Mailing List

On Tue, Dec 09, 2003 at 12:10:06PM -0200, Marcelo Tosatti wrote:
> 
> 
> On Tue, 9 Dec 2003, Joe Thornber wrote:
> 
> > On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
> > > I believe 2.6 is the right place for the device mapper. 
> > 
> > So what's the difference between a new filesystem like XFS and a new
> > device driver like dm ?
> 
> Expected question... 
> 
> XFS is a totally different filesystem from the ones present in 2.4. 
> 
> As far as I know, we already have the similar functionality in 2.4 with
> LVM. Device mapper provides the same functionality but in a much cleaner
> way. Is that right?

Sort of, but please take into account the fact that the LVM1 driver
has bugs (particularly on the failure paths), and EVMS and other
volume managers dont use the LVM1 driver.  The snapshot target (which
I didn't include in the core patches) is hugely better performance
wise.

- Joe

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:23     ` Stefan Smietanowski
@ 2003-12-09 14:36       ` Joe Thornber
  0 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 14:36 UTC (permalink / raw)
  To: Stefan Smietanowski; +Cc: Joe Thornber, Marcelo Tosatti, Linux Mailing List

On Tue, Dec 09, 2003 at 03:23:50PM +0100, Stefan Smietanowski wrote:
> Joe Thornber wrote:
> 
> >On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
> >
> >>I believe 2.6 is the right place for the device mapper. 
> >
> >
> >So what's the difference between a new filesystem like XFS and a new
> >device driver like dm ?
> 
> One thing you're missing is that after all, XFS has existed longer than
> dm. Hell, XFS existed before 2.4 did (in a Linux form, I'm not talking
> IRIX now).

Correct, dm is only 2 and a half years old.

- Joe

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:10     ` Marcelo Tosatti
  2003-12-09 14:34       ` Joe Thornber
@ 2003-12-09 17:02       ` Bill Rugolsky Jr.
  2003-12-09 22:53         ` Ciaran McCreesh
  2003-12-10  3:38         ` Lincoln Dale
  2003-12-09 17:45       ` Kevin Corry
  2 siblings, 2 replies; 55+ messages in thread
From: Bill Rugolsky Jr. @ 2003-12-09 17:02 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Joe Thornber, Linux Mailing List

On Tue, Dec 09, 2003 at 12:10:06PM -0200, Marcelo Tosatti wrote:
> As far as I know, we already have the similar functionality in 2.4 with
> LVM. Device mapper provides the same functionality but in a much cleaner
> way. Is that right?

Yes.
 
And migration of root-on-LVM users to 2.6 will be *greatly* helped if users
can get LVM2/DM working on 2.4 (by upgrading lvm/initscripts/mkinitrd),
and then move to 2.6.

And LVM1 snapshots in 2.4 have limited value, due to the performance impact.

    Bill Rugolsky

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:10     ` Marcelo Tosatti
  2003-12-09 14:34       ` Joe Thornber
  2003-12-09 17:02       ` Bill Rugolsky Jr.
@ 2003-12-09 17:45       ` Kevin Corry
  2003-12-09 19:47         ` Paul P Komkoff Jr
  2 siblings, 1 reply; 55+ messages in thread
From: Kevin Corry @ 2003-12-09 17:45 UTC (permalink / raw)
  To: Marcelo Tosatti, Joe Thornber; +Cc: Linux Mailing List

On Tuesday 09 December 2003 08:10, Marcelo Tosatti wrote:
> On Tue, 9 Dec 2003, Joe Thornber wrote:
> > On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
> > > I believe 2.6 is the right place for the device mapper.
> >
> > So what's the difference between a new filesystem like XFS and a new
> > device driver like dm ?
>
> Expected question...
>
> XFS is a totally different filesystem from the ones present in 2.4.
>
> As far as I know, we already have the similar functionality in 2.4 with
> LVM. Device mapper provides the same functionality but in a much cleaner
> way. Is that right?

Hi Marcelo,

With all due respect, I don't really agree with this assessment.

To the casual observer, XFS is just another filesystem. It's used to manage 
files, just like with ext3, Reiser, or JFS. However, XFS provides certain 
features and performance characteristics that may not be found in the other 
filesystems. For this reason, many people prefer XFS over the other 
filesystems, and have pushed for its inclusion in the 2.4 kernel. Of course, 
I'd argue that just as many (if not more) people have very little preference 
as to which filesystem they use. They're happy as long as their data doesn't 
get corrupted if their system crashes.

The situation with Device-Mapper is *very* similar. There are plenty of people 
that are happy using LVM1, and probably don't care much about Device-Mapper 
at this point. But there are also many people who prefer the improved 
features offered by using Device-Mapper. The two new volume management tools, 
LVM2 and EVMS, provide significant improvements over LVM1, such as improved 
metadata formats, more reliable metadata updates, better user interfaces, in 
addition to features that aren't available with LVM1, such as asynchronous 
snapshots. Device-Mapper also provides a modular interface for adding new 
functionality. For example, the EVMS project includes a module for performing 
block-level bad-block-relocation, and another developer has contributed a 
module for block-level encryption based on the crypto API. These new volume 
management tools only work with Device-Mapper, because LVM1 simply doesn't 
have the flexibility necessary to provide these capabilities. Again, this 
situation seems to closely mirror the situation with XFS vs. the existing 
filesystems.

Another compelling reason in my mind is that unlike the variety of filesystems 
that exist both in 2.4 and in 2.6, LVM1 is no longer available in 2.6. Many 
LVM1 users have been eager to try out 2.6 (and I certainly agree with you 
that we need to convince more people to make this switch) but the fact that 
their current tools are useless on 2.6 makes the transition far more painful. 
It would be a huge benefit if these folks were able to first transition to 
the new volume management tools while remaining on a 2.4 kernel. Then after 
they're comfortable with this first switch, they can begin transitioning to a 
2.6 kernel, where the new tools will work seemlessly.

I certainly understand your apprehension about accepting new drivers that 
modify common kernel code. As with XFS, nearly all of the submitted code sits 
in its own directory, and is only enabled if a user decides he needs it. And 
the common changes really are incredibly minimal.

Joe's first patch changes all of 8 lines in the JBD code, which is done to 
prevent JBD and Device-Mapper from stepping on each other's private data. The 
second patch (mempool) only adds new functionality that won't affect any 
existing code. (I'm actually suprised the mempool code hasn't been merged 
yet, since it would be quite useful for any number of drivers and/or 
filesystems besides Device-Mapper. It has certainly come in quite handy in 
2.6.) And the changes in arch/ are simply to support the Device-Mapper 
interface on 64-bit architectures.

I'd be happy to answer any questions or provide any other information that 
would help you with this decision. If you'd like additional review of the 
common code changes, I'll gladly look for volunteers to help with what should 
be a very simple review.

I truly believe that including Device-Mapper will not only benefit users who 
wish to continue on the 2.4 platform, but also those who are looking for an 
easier path to migrate to 2.6.

Thanks very much for your time, Marcelo!

-- 
Kevin Corry
kevcorry@us.ibm.com
http://evms.sourceforge.net/


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 17:45       ` Kevin Corry
@ 2003-12-09 19:47         ` Paul P Komkoff Jr
  0 siblings, 0 replies; 55+ messages in thread
From: Paul P Komkoff Jr @ 2003-12-09 19:47 UTC (permalink / raw)
  To: Linux Mailing List

Replying to Kevin Corry:
> prevent JBD and Device-Mapper from stepping on each other's private data. The 
> second patch (mempool) only adds new functionality that won't affect any 
> existing code. (I'm actually suprised the mempool code hasn't been merged 
> yet, since it would be quite useful for any number of drivers and/or 

alan had mempool in -ac for more than a year ;(

-- 
Paul P 'Stingray' Komkoff Jr // http://stingr.net/key <- my pgp key
 This message represents the official view of the voices in my head

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
                   ` (4 preceding siblings ...)
  2003-12-09 13:15 ` Device-mapper submission for 2.4 Marcelo Tosatti
@ 2003-12-09 19:50 ` William Lee Irwin III
  2003-12-09 21:13   ` Paul Jakma
  5 siblings, 1 reply; 55+ messages in thread
From: William Lee Irwin III @ 2003-12-09 19:50 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

On Tue, Dec 09, 2003 at 11:58:06AM +0000, Joe Thornber wrote:
> This set of patches is the core of device mapper for 2.4.  I would
> appreciate it if you could merge these into 2.4.24 please.

You have *GOT* to be kidding.


-- wli

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 14:34       ` Joe Thornber
@ 2003-12-09 21:07         ` Paul Jakma
  2003-12-09 22:26           ` Joe Thornber
  0 siblings, 1 reply; 55+ messages in thread
From: Paul Jakma @ 2003-12-09 21:07 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List

On Tue, 9 Dec 2003, Joe Thornber wrote:

> Sort of, but please take into account the fact that the LVM1 driver
> has bugs (particularly on the failure paths), and EVMS and other
> volume managers dont use the LVM1 driver.  The snapshot target
> (which I didn't include in the core patches) is hugely better
> performance wise.

Would this be of any aid to 2.4 users to transition to DM, so that
they can then easily test 2.6 and boot back to 2.4 if needs be?

If so, my vote would be for it to be included in 2.4.

> - Joe

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
"The Right Honorable Gentleman is indebted to his memory for his jests
and to his imagination for his facts."
		-- Sheridan

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 19:50 ` William Lee Irwin III
@ 2003-12-09 21:13   ` Paul Jakma
  0 siblings, 0 replies; 55+ messages in thread
From: Paul Jakma @ 2003-12-09 21:13 UTC (permalink / raw)
  To: William Lee Irwin III; +Cc: Joe Thornber, Marcelo Tosatti, Linux Mailing List

On Tue, 9 Dec 2003, William Lee Irwin III wrote:

> You have *GOT* to be kidding.

considering the LVM1 tools interface no longer is supported by DM in
2.6, DM in 2.4 (presumably /with/ LVM1 support (i'd hope)) seems a 
sane way to give 2.4 LVM1 users an easy and reversable upgrade path 
to 2.6.

I know I would love to try out 2.6 on my NFS server, but OTOH, I much
prefer to have access to my data.

> -- wli

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
Neil Armstrong tripped.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 21:07         ` Paul Jakma
@ 2003-12-09 22:26           ` Joe Thornber
  2003-12-09 22:48             ` Marcelo Tosatti
  2003-12-10  8:45             ` Jens Axboe
  0 siblings, 2 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 22:26 UTC (permalink / raw)
  To: Paul Jakma; +Cc: Joe Thornber, Marcelo Tosatti, Linux Mailing List

On Tue, Dec 09, 2003 at 09:07:49PM +0000, Paul Jakma wrote:
> On Tue, 9 Dec 2003, Joe Thornber wrote:
> Would this be of any aid to 2.4 users to transition to DM, so that
> they can then easily test 2.6 and boot back to 2.4 if needs be?
> 
> If so, my vote would be for it to be included in 2.4.

yes

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 22:26           ` Joe Thornber
@ 2003-12-09 22:48             ` Marcelo Tosatti
  2003-12-09 23:46               ` Paul Jakma
  2003-12-10  8:45             ` Jens Axboe
  1 sibling, 1 reply; 55+ messages in thread
From: Marcelo Tosatti @ 2003-12-09 22:48 UTC (permalink / raw)
  To: Joe Thornber; +Cc: linux-kernel



On Tue, 9 Dec 2003, Joe Thornber wrote:

> On Tue, Dec 09, 2003 at 09:07:49PM +0000, Paul Jakma wrote:
> > On Tue, 9 Dec 2003, Joe Thornber wrote:
> > Would this be of any aid to 2.4 users to transition to DM, so that
> > they can then easily test 2.6 and boot back to 2.4 if needs be?
> > 
> > If so, my vote would be for it to be included in 2.4.
> 
> yes

I wont merge it Joe.

Its nothing against your or DM itself. 

Let DM be in 2.6. 


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 17:02       ` Bill Rugolsky Jr.
@ 2003-12-09 22:53         ` Ciaran McCreesh
  2003-12-10  3:38         ` Lincoln Dale
  1 sibling, 0 replies; 55+ messages in thread
From: Ciaran McCreesh @ 2003-12-09 22:53 UTC (permalink / raw)
  To: marcelo.tosatti; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 951 bytes --]

On Tue, 9 Dec 2003 12:02:50 -0500 "Bill Rugolsky Jr."
<brugolsky@telemetry-investments.com> wrote:
| On Tue, Dec 09, 2003 at 12:10:06PM -0200, Marcelo Tosatti wrote:
| > As far as I know, we already have the similar functionality in 2.4
| > with LVM. Device mapper provides the same functionality but in a
| > much cleaner way. Is that right?
| 
| Yes.
|  
| And migration of root-on-LVM users to 2.6 will be *greatly* helped if
| users can get LVM2/DM working on 2.4 (by upgrading
| lvm/initscripts/mkinitrd), and then move to 2.6.

Agreed. Early 2.6test kernels had serious keyboard issues on my laptop,
and the effort of switching backwards and forwards between LVM1 and 2
has put me off trying again on that box. If I could run 2.4 with LVM2
easily without having to worry about yet another manual kernel patch I'd
be a lot more inclined to do testing.

-- 
Ciaran McCreesh
Mail:    ciaranm at gentoo.org
Web:     http://dev.gentoo.org/~ciaranm



[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 22:48             ` Marcelo Tosatti
@ 2003-12-09 23:46               ` Paul Jakma
  2003-12-09 23:58                 ` William Lee Irwin III
                                   ` (3 more replies)
  0 siblings, 4 replies; 55+ messages in thread
From: Paul Jakma @ 2003-12-09 23:46 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Joe Thornber, linux-kernel

On Tue, 9 Dec 2003, Marcelo Tosatti wrote:

> Its nothing against your or DM itself.
> 
> Let DM be in 2.6. 

Well, how does this leave 2.4 LVM1 users? From my vague 
understanding:

- 2.6 DM does not support the LVM1 interface
- The DM tools library is dropping support for the LVM1 interface

This leaves 2.4 LVM1 users with a /huge/ leap to take if they wish to
test 2.6. Backward compatibility is awkward because of the DM tools
issue (need both old and new installed and some way to pick at boot,
or manually setup LVM), and you're ruling out the other option of
adding forwards compatibility to 2.4.

This isnt a new fs which 2.4 users wont be using, its an existing 
feature that has been reworked during 2.5 and is now incompatible in 
2.6 with 2.4. More over, its a feature on which access to data 
depends.

I'd really like to see one of:

- backwards compat: 2.6 have LVM1 support

- forward compat: 2.4 to have DM support to allow 2.4 users to 
migrate
LVM->DM first /before/ taking the risk on running 2.6.

- the DM tools to support both LVM1 and LVMx in 2.6, on a *long-term* 
  basis

I or others may not migrate to 2.6 for many a year, and when we do,
it'd nice to be able to migrate our data in place (not
backup&restore). Kernel interface compat at least tends to be the
most set in stone and is what I would prefer. Whether forward or
backward doesnt matter, adding compat cruft to a soon-to-be obsolete 
kernel is possibly better than weighing 2.6 down with it for the next 
3+ years.

There are people who store their data in LVM, we need compatibility,
and ideally we'd like to be able to migrate in small steps.

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
You can write a small letter to Grandma in the filename.
		-- Forbes Burkowski, CS, University of Washington

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Patch 1/4] fs.h: b_journal_head
  2003-12-09 12:24 ` [Patch 1/4] fs.h: b_journal_head Joe Thornber
@ 2003-12-09 23:46   ` Nathan Scott
  2003-12-10  8:46     ` Joe Thornber
  0 siblings, 1 reply; 55+ messages in thread
From: Nathan Scott @ 2003-12-09 23:46 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Linux Mailing List

Hi Joe,

On Tue, Dec 09, 2003 at 12:24:18PM +0000, Joe Thornber wrote:
> Add a new member to struct buffer_head called b_journal_head.  This is
> for jbd to use, rather than have it peeking at b_private for in flight
> ios.
> ...
> --- diff/include/linux/fs.h	2003-12-09 10:25:27.000000000 +0000
> +++ source/include/linux/fs.h	2003-12-09 10:32:41.000000000 +0000
> @@ -265,7 +265,7 @@
>  	struct page *b_page;		/* the page this bh is mapped to */
>  	void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
>   	void *b_private;		/* reserved for b_end_io */
> -
> + 	void *b_journal_head;		/* ext3 journal_heads */
>  	unsigned long b_rsector;	/* Real buffer location on disk */
>  	wait_queue_head_t b_wait;
>  

Could you explain a bit more about when b_private should and
shouldn't be used with this change?  We make use of b_private
within XFS, I'm just wondering if we will be stepping on each
others toes here?  And if XFS does need to use b_journal_head
instead of b_private with DM, maybe a more generic name like
"b_fsprivate" or something would be clearer?

thanks.

-- 
Nathan

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 23:46               ` Paul Jakma
@ 2003-12-09 23:58                 ` William Lee Irwin III
  2003-12-10  0:15                   ` Paul Jakma
  2003-12-10  0:27                 ` Jose Luis Domingo Lopez
                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 55+ messages in thread
From: William Lee Irwin III @ 2003-12-09 23:58 UTC (permalink / raw)
  To: Paul Jakma; +Cc: Marcelo Tosatti, Joe Thornber, linux-kernel

On Tue, Dec 09, 2003 at 11:46:13PM +0000, Paul Jakma wrote:
> This leaves 2.4 LVM1 users with a /huge/ leap to take if they wish to
> test 2.6. Backward compatibility is awkward because of the DM tools
> issue (need both old and new installed and some way to pick at boot,
> or manually setup LVM), and you're ruling out the other option of
> adding forwards compatibility to 2.4.
> This isnt a new fs which 2.4 users wont be using, its an existing 
> feature that has been reworked during 2.5 and is now incompatible in 
> 2.6 with 2.4. More over, its a feature on which access to data 
> depends.

Just apply the patch if you're for some reason terrified of 2.6.


-- wli

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 23:58                 ` William Lee Irwin III
@ 2003-12-10  0:15                   ` Paul Jakma
  2003-12-10 11:49                     ` Stephan von Krawczynski
  2003-12-10 23:15                     ` Dave Jones
  0 siblings, 2 replies; 55+ messages in thread
From: Paul Jakma @ 2003-12-10  0:15 UTC (permalink / raw)
  To: William Lee Irwin III; +Cc: Marcelo Tosatti, Joe Thornber, linux-kernel

On Tue, 9 Dec 2003, William Lee Irwin III wrote:

> Just apply the patch if you're for some reason terrified of 2.6.

Or get RedHat or Fedora to apply the patch.

Its a slightly safer bet though to have it in stock 2.4, guarantees
it will be there if one needs it 2 years down the road when upgrading
some box. (and non-LVM users wont be compiling it in).

So personally I'd rather Marcelo included it, being paranoid about
having support for access to data.

> -- wli

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
"If you want to travel around the world and be invited to speak at a lot
of different places, just write a Unix operating system."
(By Linus Torvalds)

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 23:46               ` Paul Jakma
  2003-12-09 23:58                 ` William Lee Irwin III
@ 2003-12-10  0:27                 ` Jose Luis Domingo Lopez
  2003-12-10  0:59                   ` Tupshin Harper
  2003-12-10  2:44                 ` Martin J. Bligh
  2003-12-16 19:01                 ` bill davidsen
  3 siblings, 1 reply; 55+ messages in thread
From: Jose Luis Domingo Lopez @ 2003-12-10  0:27 UTC (permalink / raw)
  To: linux-kernel

On Tuesday, 09 December 2003, at 23:46:13 +0000,
Paul Jakma wrote:

> There are people who store their data in LVM, we need compatibility,
> and ideally we'd like to be able to migrate in small steps.
> 
Install "module-init-tools", install "LVM2" (that can drive both LVM1
and DM Logical Volumes), compile a 2.6.x Linux kernel, reboot and you
should be done.

As far as I remember, migration is just that easy, and you can always go
back to plain 2.4.x while you don't update LVM metadata to newer version 2.

Greetings.

-- 
Jose Luis Domingo Lopez
Linux Registered User #189436     Debian Linux Sid (Linux 2.6.0-test10-mm1)

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  0:27                 ` Jose Luis Domingo Lopez
@ 2003-12-10  0:59                   ` Tupshin Harper
  2003-12-10  9:40                     ` Wichert Akkerman
  0 siblings, 1 reply; 55+ messages in thread
From: Tupshin Harper @ 2003-12-10  0:59 UTC (permalink / raw)
  To: Jose Luis Domingo Lopez; +Cc: linux-kernel

Jose Luis Domingo Lopez wrote:

>On Tuesday, 09 December 2003, at 23:46:13 +0000,
>Paul Jakma wrote:
>
>  
>
>>There are people who store their data in LVM, we need compatibility,
>>and ideally we'd like to be able to migrate in small steps.
>>
>>    
>>
>Install "module-init-tools", install "LVM2" (that can drive both LVM1
>and DM Logical Volumes), compile a 2.6.x Linux kernel, reboot and you
>should be done.
>
>As far as I remember, migration is just that easy, and you can always go
>back to plain 2.4.x while you don't update LVM metadata to newer version 2.
>
>Greetings.
>
>  
>
This is not true. LVM2 can read the LVM1 format, but it cannot 
communicate with non-dm interfaces in 2.4.x. This means that you need to 
run lvm1 on 2.4 and lvm2 on 2.6 unless you patch 2.4 with dm.

If this were the whole story, then it would be an amazingly painful 
transition to (safely) upgrade an lvm machine from 2.4 to 2.6 (upgrade 
to patched 2.4, then upgrade to 2.6). Luckily, debian has made the lvm1 
and lvm2 packages not conflict, and the correct ones runs at startup 
depending on which kernel you have. This is probably a feature that all 
distros will have to adopt to ease the upgrade cycle.

-Tupshin

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 23:46               ` Paul Jakma
  2003-12-09 23:58                 ` William Lee Irwin III
  2003-12-10  0:27                 ` Jose Luis Domingo Lopez
@ 2003-12-10  2:44                 ` Martin J. Bligh
  2003-12-10 15:55                   ` Paul Jakma
  2003-12-16 19:01                 ` bill davidsen
  3 siblings, 1 reply; 55+ messages in thread
From: Martin J. Bligh @ 2003-12-10  2:44 UTC (permalink / raw)
  To: Paul Jakma, Marcelo Tosatti; +Cc: Joe Thornber, linux-kernel

> I'd really like to see one of:
> 
> - backwards compat: 2.6 have LVM1 support
>
> - the DM tools to support both LVM1 and LVMx in 2.6, on a *long-term* 
>   basis

Some form of backward compatibility from 2.6 would seem a much more 
sensible thing to fight for. Foisting forward comaptibility on an 
older release seems like a bad road to go down.
 
M.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 17:02       ` Bill Rugolsky Jr.
  2003-12-09 22:53         ` Ciaran McCreesh
@ 2003-12-10  3:38         ` Lincoln Dale
  2003-12-10  6:12           ` Willy Tarreau
  1 sibling, 1 reply; 55+ messages in thread
From: Lincoln Dale @ 2003-12-10  3:38 UTC (permalink / raw)
  To: Bill Rugolsky Jr.; +Cc: Marcelo Tosatti, Joe Thornber, Linux Mailing List

At 04:02 AM 10/12/2003, Bill Rugolsky Jr. wrote:
>On Tue, Dec 09, 2003 at 12:10:06PM -0200, Marcelo Tosatti wrote:
> > As far as I know, we already have the similar functionality in 2.4 with
> > LVM. Device mapper provides the same functionality but in a much cleaner
> > way. Is that right?
>
>Yes.
>
>And migration of root-on-LVM users to 2.6 will be *greatly* helped if users
>can get LVM2/DM working on 2.4 (by upgrading lvm/initscripts/mkinitrd),
>and then move to 2.6.

i concur with this.
Marcello: try to migrate from a root-on-LVM1/2.4 to LVM2/2.6; it is very 
painful.  major/minor # changes, more stuff required in initrd, "dm" 
doesn't appear in 2.6's /proc/partitions . . .

it is a painful upgrade - probably partly due to lack of 
tools/documentation on DMs part, but also equally because 2.4->2.6 is a bug 
jump in a kernel and its exacerbated by LVM1->LVM2 changes...


cheers,

lincoln.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  3:38         ` Lincoln Dale
@ 2003-12-10  6:12           ` Willy Tarreau
  2003-12-10  6:35             ` viro
  0 siblings, 1 reply; 55+ messages in thread
From: Willy Tarreau @ 2003-12-10  6:12 UTC (permalink / raw)
  To: Lincoln Dale
  Cc: Bill Rugolsky Jr., Marcelo Tosatti, Joe Thornber, Linux Mailing List

On Wed, Dec 10, 2003 at 02:38:02PM +1100, Lincoln Dale wrote:
 
> i concur with this.
> Marcello: try to migrate from a root-on-LVM1/2.4 to LVM2/2.6; it is very 
> painful.  major/minor # changes, more stuff required in initrd, "dm" 
> doesn't appear in 2.6's /proc/partitions . . .
> 
> it is a painful upgrade - probably partly due to lack of 
> tools/documentation on DMs part, but also equally because 2.4->2.6 is a bug 
> jump in a kernel and its exacerbated by LVM1->LVM2 changes...

And what next ? people will ask "marcelo, please include initramfs support,
it will help us migrating", "marcelo, it's annoying to support both
module-init-tools and modutils, please accept this patch to change all modules
to 2.6 format", "marcelo, my usb memory stick is only supported in 2.6, please
include it in 2.4 so that I can use it to backup my system in case 2.6 crashes",
"marcelo, please include preempt, it's already in 2.6 and my desktop feels
smoother with it"...

If 2.6 breaks some backwards compatibility, which kernel do you think should
be changed ? Did anybody submit a patch to include netfilter support in 2.2
in case people would finally switch their firewall back to 2.2 when 2.4 was
unstable ? no.

I agree it's important to be able to upgrade and downgrade with a maximum
safety. But frankly, when you know that your data are so much important when
migrating to the new stable kernel, don't you believe you will backup them
first instead something weird happens ? Then they can be restored into a
common format. That's what I did when I used reiserfs 3.5 on raid5 in 2.2
when I switched to 2.4. Converting everything to ext2 was safer than risking
to rely on a not wide tested compatibility glue between the kernels.

It was the same for XFS imho. All XFS users once had the ability to patch
and install it themselves, and should still have the ability to continue
this way. OK this is annoying, and I too am happy that Marcelo makes it
easier now for them. There also are good reasons in case of DM. But we
should also consider that including any patch regularly breaks other
patches and makes it worse for many other people to include external
patches. So the question remains : what next ? 2.4 is definitely not what
I consider a "stable kernel", it's rather the "most stable actively
developped branch". Getting only bugfixes in it would be fairly simpler
for all people using it in production.

Cheers,
Willy


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  6:12           ` Willy Tarreau
@ 2003-12-10  6:35             ` viro
  0 siblings, 0 replies; 55+ messages in thread
From: viro @ 2003-12-10  6:35 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Lincoln Dale, Bill Rugolsky Jr.,
	Marcelo Tosatti, Joe Thornber, Linux Mailing List

On Wed, Dec 10, 2003 at 07:12:13AM +0100, Willy Tarreau wrote:
 
> And what next ? people will ask "marcelo, please include initramfs support,
> it will help us migrating", "marcelo, it's annoying to support both
> module-init-tools and modutils, please accept this patch to change all modules
> to 2.6 format", "marcelo, my usb memory stick is only supported in 2.6, please
> include it in 2.4 so that I can use it to backup my system in case 2.6 crashes",
> "marcelo, please include preempt, it's already in 2.6 and my desktop feels
> smoother with it"...

Heh.  Actually, 99% of initramfs support is there - the only piece missing
is unpack_to_rootfs().  IOW, rootfs is there in the same way as on 2.6,
but it isn't pre-populated.  By now it's too late, but a couple of months
ago it would be a trivial enough for backport - init/initramfs.c is
self-contained and it would be a matter of copying several kilobytes of
stuff in 2.4 + adding a section to ld script (same on all architectures)
+ adding one line in init/main.c.  It's nowhere near as intrusive as other
changes on the list (including dm), but it *is* too late for any of that
stuff in 2.4.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 22:26           ` Joe Thornber
  2003-12-09 22:48             ` Marcelo Tosatti
@ 2003-12-10  8:45             ` Jens Axboe
  2003-12-10 17:30               ` Paul Jakma
  1 sibling, 1 reply; 55+ messages in thread
From: Jens Axboe @ 2003-12-10  8:45 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Paul Jakma, Marcelo Tosatti, Linux Mailing List

On Tue, Dec 09 2003, Joe Thornber wrote:
> On Tue, Dec 09, 2003 at 09:07:49PM +0000, Paul Jakma wrote:
> > On Tue, 9 Dec 2003, Joe Thornber wrote:
> > Would this be of any aid to 2.4 users to transition to DM, so that
> > they can then easily test 2.6 and boot back to 2.4 if needs be?
> > 
> > If so, my vote would be for it to be included in 2.4.
> 
> yes

Seems to me, it's the lvm2 teams responsibility to provide easy
transition to 2.6 from 2.4. Merging dm in 2.4 right now looks like a
step in the wrong direction.

Arguments akin to "But XFS got merged, surely we can to" don't hold up
one bit. Should be obvious why.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Patch 1/4] fs.h: b_journal_head
  2003-12-09 23:46   ` Nathan Scott
@ 2003-12-10  8:46     ` Joe Thornber
  2003-12-10 12:06       ` Nathan Scott
  0 siblings, 1 reply; 55+ messages in thread
From: Joe Thornber @ 2003-12-10  8:46 UTC (permalink / raw)
  To: Nathan Scott; +Cc: Linux Mailing List

On Wed, Dec 10, 2003 at 10:46:55AM +1100, Nathan Scott wrote:
> Could you explain a bit more about when b_private should and
> shouldn't be used with this change?

Once the io goes through generic_make_request() you shouldn't look at
bh->b_private until the io has completed.  At which point it will have
been correctly set back to the value it had when submitted.

The problem with jbd wasn't the fact that it used it, but the fact
that it peeked while the io was in flight.

This is ugly I know, much cleaner in 2.6 where there is a split
between bh and bio.

- Joe

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  0:59                   ` Tupshin Harper
@ 2003-12-10  9:40                     ` Wichert Akkerman
  0 siblings, 0 replies; 55+ messages in thread
From: Wichert Akkerman @ 2003-12-10  9:40 UTC (permalink / raw)
  To: Jose Luis Domingo Lopez, linux-kernel

Previously Tupshin Harper wrote:
> This is not true. LVM2 can read the LVM1 format, but it cannot 
> communicate with non-dm interfaces in 2.4.x. This means that you need to 
> run lvm1 on 2.4 and lvm2 on 2.6 unless you patch 2.4 with dm.

And unless my memory is failing me all distros ship tools that will
detect which interface your system has and call the right tool for you.
That was already needed for lvm1 which went through several ioctl
interfaces and continues to work fine for lvm2. Which means this 
is pretty much a non-issue.

Wichert.

-- 
Wichert Akkerman <wichert@wiggy.net>    It is simple to make things.
http://www.wiggy.net/                   It is hard to make things simple.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  0:15                   ` Paul Jakma
@ 2003-12-10 11:49                     ` Stephan von Krawczynski
  2003-12-10 23:15                     ` Dave Jones
  1 sibling, 0 replies; 55+ messages in thread
From: Stephan von Krawczynski @ 2003-12-10 11:49 UTC (permalink / raw)
  To: Paul Jakma; +Cc: wli, marcelo.tosatti, thornber, linux-kernel

On Wed, 10 Dec 2003 00:15:17 +0000 (GMT)
Paul Jakma <paul@clubi.ie> wrote:

> On Tue, 9 Dec 2003, William Lee Irwin III wrote:
> 
> > Just apply the patch if you're for some reason terrified of 2.6.
> 
> Or get RedHat or Fedora to apply the patch.

There it is again, this /dev/null argument.

"Multi-billion dollar companies" have gone bancrupt on the simple fact that
diversification of one product can rattle customers/users to a degree that they
in fact decide against the whole product range.

IOW go on with the idea to spread around an unknown number of kernel versions
and you can be sure that linux as a whole will greatly suffer.

This is a "user" issue, not a "developer" issue of course. Developers can apply
any kind of patches they like, but don't go and tell the vast user base to
"just apply patch xyz". They won't honor this at all, your level of acceptance
will dramatically drop.

Regards,
Stephan



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Patch 1/4] fs.h: b_journal_head
  2003-12-10  8:46     ` Joe Thornber
@ 2003-12-10 12:06       ` Nathan Scott
  0 siblings, 0 replies; 55+ messages in thread
From: Nathan Scott @ 2003-12-10 12:06 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Linux Mailing List

On Wed, Dec 10, 2003 at 08:46:32AM +0000, Joe Thornber wrote:
> On Wed, Dec 10, 2003 at 10:46:55AM +1100, Nathan Scott wrote:
> > Could you explain a bit more about when b_private should and
> > shouldn't be used with this change?
> 
> Once the io goes through generic_make_request() you shouldn't look at
> bh->b_private until the io has completed.  At which point it will have
> been correctly set back to the value it had when submitted.

OK.

> The problem with jbd wasn't the fact that it used it, but the fact
> that it peeked while the io was in flight.

Ah, I see now.  XFS doesn't have to do that, so should work fine as is.

thanks.

-- 
Nathan

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  2:44                 ` Martin J. Bligh
@ 2003-12-10 15:55                   ` Paul Jakma
  2003-12-10 16:54                     ` venom
  2003-12-16 19:15                     ` bill davidsen
  0 siblings, 2 replies; 55+ messages in thread
From: Paul Jakma @ 2003-12-10 15:55 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: Marcelo Tosatti, Joe Thornber, linux-kernel

On Tue, 9 Dec 2003, Martin J. Bligh wrote:

> Some form of backward compatibility from 2.6 would seem a much more
> sensible thing to fight for. Foisting forward comaptibility on an
> older release seems like a bad road to go down.

I dont really care, but some kind of (long-term, ie lifetime of 
either 2.4 or 2.6) compatibility is needed.

LVM1 kernel support was recently removed from 2.6.0, so it would have 
to be added back in. 

One argument for adding forward compatibility in 2.4 is that it will 
/force/ people to move to DM before going to 2.6, which might be a 
good thing as, AIUI, LVM1 has problems. 

Its a choice between:

- 2.6 backwards compatibility, adding back in LVM1 support, LVM1 
users will then quite possibly continue to use the problematical LVM1 
interfaces even after migrating to 2.6.

- 2.4 forwards compatibility, add DM support - which appears (IMVU)  
to drop in cleanly alongside MD - and hence 2.6 can remain 'clean'.

I dont know, but it would be nice to have /something/ and to have it 
in stock kernel rather than /hope/ to have upstreams include the 
required backward or forward compatibility.

> M.

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
But it does move!
		-- Galileo Galilei

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 15:55                   ` Paul Jakma
@ 2003-12-10 16:54                     ` venom
  2003-12-10 17:00                       ` Paul Jakma
  2003-12-16 19:15                     ` bill davidsen
  1 sibling, 1 reply; 55+ messages in thread
From: venom @ 2003-12-10 16:54 UTC (permalink / raw)
  To: Paul Jakma; +Cc: Martin J. Bligh, Marcelo Tosatti, Joe Thornber, linux-kernel


DM is back compatible with LVM1, tested and runs well.

Of course LVM1 has problems, but should we consider the DM case as mutch the
same as XFS?

Luigi

On Wed, 10 Dec 2003, Paul Jakma wrote:

> Date: Wed, 10 Dec 2003 15:55:53 +0000 (GMT)
> From: Paul Jakma <paul@clubi.ie>
> To: Martin J. Bligh <mbligh@aracnet.com>
> Cc: Marcelo Tosatti <marcelo.tosatti@cyclades.com>,
>      Joe Thornber <thornber@sistina.com>, linux-kernel@vger.kernel.org
> Subject: Re: Device-mapper submission for 2.4
>
> On Tue, 9 Dec 2003, Martin J. Bligh wrote:
>
> > Some form of backward compatibility from 2.6 would seem a much more
> > sensible thing to fight for. Foisting forward comaptibility on an
> > older release seems like a bad road to go down.
>
> I dont really care, but some kind of (long-term, ie lifetime of
> either 2.4 or 2.6) compatibility is needed.
>
> LVM1 kernel support was recently removed from 2.6.0, so it would have
> to be added back in.
>
> One argument for adding forward compatibility in 2.4 is that it will
> /force/ people to move to DM before going to 2.6, which might be a
> good thing as, AIUI, LVM1 has problems.
>
> Its a choice between:
>
> - 2.6 backwards compatibility, adding back in LVM1 support, LVM1
> users will then quite possibly continue to use the problematical LVM1
> interfaces even after migrating to 2.6.
>
> - 2.4 forwards compatibility, add DM support - which appears (IMVU)
> to drop in cleanly alongside MD - and hence 2.6 can remain 'clean'.
>
> I dont know, but it would be nice to have /something/ and to have it
> in stock kernel rather than /hope/ to have upstreams include the
> required backward or forward compatibility.
>
> > M.
>
> regards,
> --
> Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
> 	warning: do not ever send email to spam@dishone.st
> Fortune:
> But it does move!
> 		-- Galileo Galilei
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 16:54                     ` venom
@ 2003-12-10 17:00                       ` Paul Jakma
  2003-12-10 17:14                         ` venom
  2003-12-10 23:40                         ` Mike Fedyk
  0 siblings, 2 replies; 55+ messages in thread
From: Paul Jakma @ 2003-12-10 17:00 UTC (permalink / raw)
  To: venom; +Cc: Martin J. Bligh, Marcelo Tosatti, Joe Thornber, linux-kernel

On Wed, 10 Dec 2003 venom@sns.it wrote:

> DM is back compatible with LVM1, tested and runs well.

What about the patches posted by Joe last (?) week which remove LVM1 
support from 2.6 DM? (if Linus hasnt picked them up, its surely an 
omen the support will go once the bug-only freeze is lifted (?)).

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
Someone is unenthusiastic about your work.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 17:00                       ` Paul Jakma
@ 2003-12-10 17:14                         ` venom
  2003-12-10 23:40                         ` Mike Fedyk
  1 sibling, 0 replies; 55+ messages in thread
From: venom @ 2003-12-10 17:14 UTC (permalink / raw)
  To: Paul Jakma; +Cc: Martin J. Bligh, Marcelo Tosatti, Joe Thornber, linux-kernel


wow, I did not  notice that.

ok, if it is so, it's really different.

On Wed, 10 Dec 2003, Paul Jakma wrote:

> Date: Wed, 10 Dec 2003 17:00:43 +0000 (GMT)
> From: Paul Jakma <paul@clubi.ie>
> To: venom@sns.it
> Cc: Martin J. Bligh <mbligh@aracnet.com>,
>      Marcelo Tosatti <marcelo.tosatti@cyclades.com>,
>      Joe Thornber <thornber@sistina.com>, linux-kernel@vger.kernel.org
> Subject: Re: Device-mapper submission for 2.4
>
> On Wed, 10 Dec 2003 venom@sns.it wrote:
>
> > DM is back compatible with LVM1, tested and runs well.
>
> What about the patches posted by Joe last (?) week which remove LVM1
> support from 2.6 DM? (if Linus hasnt picked them up, its surely an
> omen the support will go once the bug-only freeze is lifted (?)).
>
> regards,
> --
> Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
> 	warning: do not ever send email to spam@dishone.st
> Fortune:
> Someone is unenthusiastic about your work.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  8:45             ` Jens Axboe
@ 2003-12-10 17:30               ` Paul Jakma
  2003-12-10 17:44                 ` Joe Thornber
  0 siblings, 1 reply; 55+ messages in thread
From: Paul Jakma @ 2003-12-10 17:30 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Joe Thornber, Marcelo Tosatti, Linux Mailing List

On Wed, 10 Dec 2003, Jens Axboe wrote:

> Arguments akin to "But XFS got merged, surely we can to" don't hold
> up one bit. Should be obvious why.

Its not about a /new/ feature, its about an existing feature which is 
incompatible between 2.4 and 2.6.

I dont really care whether its done via forward or backware compat. 
(but why was LVM1 removed from 2.6?)

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
[Washington, D.C.] is the home of... taste for the people -- the big,
the bland and the banal.
		-- Ada Louise Huxtable

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 17:30               ` Paul Jakma
@ 2003-12-10 17:44                 ` Joe Thornber
  2003-12-10 17:48                   ` venom
                                     ` (2 more replies)
  0 siblings, 3 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-10 17:44 UTC (permalink / raw)
  To: Paul Jakma; +Cc: Jens Axboe, Joe Thornber, Marcelo Tosatti, Linux Mailing List

On Wed, Dec 10, 2003 at 05:30:01PM +0000, Paul Jakma wrote:
> On Wed, 10 Dec 2003, Jens Axboe wrote:
> 
> > Arguments akin to "But XFS got merged, surely we can to" don't hold
> > up one bit. Should be obvious why.
> 
> Its not about a /new/ feature, its about an existing feature which is 
> incompatible between 2.4 and 2.6.
> 
> I dont really care whether its done via forward or backware compat. 
> (but why was LVM1 removed from 2.6?)

The LVM1 driver was removed because dm covered the same functionality
+ lots more, and is more flexible.  The LVM2 tools still understand
the LVM1 metadata format, so there is no problem about not being able
to read data in 2.6.  The main reason for submitting dm to 2.4 was
that there are a lot of people out there who want to use LVM2/EVMS
tools with 2.4, and kept asking me to do it.  If this is against
Marcelos current policy then so be it; I probably should have checked
with him before spamming lkml with the submission.  I don't want this
to degenerate into the old LVM1 vs dm argument; people can search the
archives for that.

- Joe

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 17:44                 ` Joe Thornber
@ 2003-12-10 17:48                   ` venom
  2003-12-10 18:07                   ` Paul Jakma
  2003-12-10 19:30                   ` Jens Axboe
  2 siblings, 0 replies; 55+ messages in thread
From: venom @ 2003-12-10 17:48 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Paul Jakma, Jens Axboe, Marcelo Tosatti, Linux Mailing List

On Wed, 10 Dec 2003, Joe Thornber wrote:

>
> The LVM1 driver was removed because dm covered the same functionality
> + lots more, and is more flexible.  The LVM2 tools still understand
> the LVM1 metadata format, so there is no problem about not being able
> to read data in 2.6.

So I was right. Well, if back compatibility works, this solves most of the
problem.

> The main reason for submitting dm to 2.4 was
> that there are a lot of people out there who want to use LVM2/EVMS
> tools with 2.4, and kept asking me to do it.  If this is against
> Marcelos current policy then so be it; I probably should have checked
> with him before spamming lkml with the submission.

This is a good point, but patches are available, so those people can stil use
it, am I wrong?

Luigi


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 17:44                 ` Joe Thornber
  2003-12-10 17:48                   ` venom
@ 2003-12-10 18:07                   ` Paul Jakma
  2003-12-10 19:30                   ` Jens Axboe
  2 siblings, 0 replies; 55+ messages in thread
From: Paul Jakma @ 2003-12-10 18:07 UTC (permalink / raw)
  To: Joe Thornber; +Cc: Jens Axboe, Marcelo Tosatti, Linux Mailing List

On Wed, 10 Dec 2003, Joe Thornber wrote:

> The LVM1 driver was removed because dm covered the same
> functionality + lots more, and is more flexible.

Yes, DM seems quite nice.

> The LVM2 tools still understand the LVM1 metadata format, so there
> is no problem about not being able to read data in 2.6.

Ah, and this capability is /not/ going away? If so, then that works 
for me. Its just i got the vague impression that support was going 
to be excised at some stage soonish, which is what worries me. If 
not, apologies, and then indeed there's no reason for DM in 2.4.

> - Joe

regards,
-- 
Paul Jakma	paul@clubi.ie	paul@jakma.org	Key ID: 64A2FF6A
	warning: do not ever send email to spam@dishone.st
Fortune:
Will you loan me $20.00 and only give me ten of it?
That way, you will owe me ten, and I'll owe you ten, and we'll be even!

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 17:44                 ` Joe Thornber
  2003-12-10 17:48                   ` venom
  2003-12-10 18:07                   ` Paul Jakma
@ 2003-12-10 19:30                   ` Jens Axboe
  2 siblings, 0 replies; 55+ messages in thread
From: Jens Axboe @ 2003-12-10 19:30 UTC (permalink / raw)
  To: Joe Thornber, Paul Jakma; +Cc: Marcelo Tosatti, Linux Mailing List

On Wed, Dec 10 2003, Joe Thornber wrote:
> On Wed, Dec 10, 2003 at 05:30:01PM +0000, Paul Jakma wrote:
> > On Wed, 10 Dec 2003, Jens Axboe wrote:
> > 
> > > Arguments akin to "But XFS got merged, surely we can to" don't hold
> > > up one bit. Should be obvious why.
> > 
> > Its not about a /new/ feature, its about an existing feature which is 
> > incompatible between 2.4 and 2.6.
> > 
> > I dont really care whether its done via forward or backware compat. 
> > (but why was LVM1 removed from 2.6?)
> 
> The LVM1 driver was removed because dm covered the same functionality
> + lots more, and is more flexible.  The LVM2 tools still understand
> the LVM1 metadata format, so there is no problem about not being able
> to read data in 2.6.  The main reason for submitting dm to 2.4 was

Great, so then there's zero reason to merge it in 2.4.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10  0:15                   ` Paul Jakma
  2003-12-10 11:49                     ` Stephan von Krawczynski
@ 2003-12-10 23:15                     ` Dave Jones
  1 sibling, 0 replies; 55+ messages in thread
From: Dave Jones @ 2003-12-10 23:15 UTC (permalink / raw)
  To: Paul Jakma
  Cc: William Lee Irwin III, Marcelo Tosatti, Joe Thornber, linux-kernel

On Wed, Dec 10, 2003 at 12:15:17AM +0000, Paul Jakma wrote:
 > > Just apply the patch if you're for some reason terrified of 2.6.
 > Or get RedHat or Fedora to apply the patch.

This isn't going to happen for Fedora.

		Dave


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 17:00                       ` Paul Jakma
  2003-12-10 17:14                         ` venom
@ 2003-12-10 23:40                         ` Mike Fedyk
  2003-12-11 19:48                           ` Alasdair G Kergon
  1 sibling, 1 reply; 55+ messages in thread
From: Mike Fedyk @ 2003-12-10 23:40 UTC (permalink / raw)
  To: Paul Jakma
  Cc: venom, Martin J. Bligh, Marcelo Tosatti, Joe Thornber, linux-kernel

On Wed, Dec 10, 2003 at 05:00:43PM +0000, Paul Jakma wrote:
> On Wed, 10 Dec 2003 venom@sns.it wrote:
> 
> > DM is back compatible with LVM1, tested and runs well.
> 
> What about the patches posted by Joe last (?) week which remove LVM1 
> support from 2.6 DM? (if Linus hasnt picked them up, its surely an 

If this is what I was reading being discussed a few weeks ago, then the
support for the LVM1 sysctls/ioctls has/will be removed, so you will have to
use the DM utilities instead of the old LVM1 utilities.  LVM1 on-disk format
should still be supported.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 23:40                         ` Mike Fedyk
@ 2003-12-11 19:48                           ` Alasdair G Kergon
  0 siblings, 0 replies; 55+ messages in thread
From: Alasdair G Kergon @ 2003-12-11 19:48 UTC (permalink / raw)
  To: linux-kernel, linux-lvm
  Cc: Paul Jakma, Mike Fedyk, Joe Thornber, Marcelo Tosatti, Linus Torvalds

On Wed, Dec 10, 2003 at 03:40:07PM -0800, Mike Fedyk wrote:
> On Wed, Dec 10, 2003 at 05:00:43PM +0000, Paul Jakma wrote:
> > On Wed, 10 Dec 2003 venom@sns.it wrote:
> > > DM is back compatible with LVM1, tested and runs well.
> > What about the patches posted by Joe last (?) week which remove LVM1 
> > support from 2.6 DM? 

They remove support for the broken version 1 of the device-mapper 
ioctl interface.  This is nothing to do with LVM1.
 
> If this is what I was reading being discussed a few weeks ago, then the
> support for the LVM1 sysctls/ioctls has/will be removed, so you will have to
> use the DM utilities instead of the old LVM1 utilities.  LVM1 on-disk format
> should still be supported.

2.6 does not support LVM1 ioctls.
LVM2 userspace tools and EVMS both support LVM1 on-disk format using
device-mapper.


Here's a reference sheet to help clarify the terminology and explain
what's happening.

LVM1 = Userspace tools + kernel ioctls included in marcelo's 2.4 tree
  - LVM1 kernel ioctls are *not* included in or available for 2.6
  - LVM1 userspace tools do *not* work with 2.6 kernels

dm = Kernel driver (GPL) for new volume managers to use.
  - Included in Linus's 2.6 kernels.
  - Available as a patch for 2.4 kernels from the Sistina website.
  - Knows *nothing* about volume manager's on-disk metadata layouts.
  - Userspace volume managers (e.g. EVMS and LVM2) communicate via a new 
    ioctl interface.
  - This ioctl interface is currently "version 4" and we regard it as
    stable.  [Some enhancements are on the horizon, but nothing that 
    breaks existing code/binaries.]
  - An old development version of this device-mapper ioctl interface known
    as "version 1" has problems with it, is deprecated and should be
    removed from kernel trees ASAP.  
    Always use "version 4" when building new kernels today.

libdevmapper = Userspace shared library (LGPL) which wraps a volume manager 
               application interface around the device-mapper ioctls
  - Can determine transparently whether the kernel device-mapper is using
    "version 4" dm ioctl interface or the deprecated "version 1" interface
    and adapt itself accordingly.  [configure --enable-compat]
  - Can only communicate with device-mapper: it cannot use LVM1 ioctls.
  - Designed primarily for use by LVM2 tools.  [EVMS does not use it]
  - Some parts of the libdevmapper API are not yet stable and are likely 
    to get changed.

dmsetup = Userspace utility (GPL) which provides full command-line access to
          the libdevmapper API.
  - Designed for use by shell scripts and for testing and debugging.
  - Command line interface may be considered stable.  New features may get 
    added, but we'll try not to break existing commands.

LVM2 = New Logical Volume Manager command line tools (GPL) designed to
       be backward-compatible with LVM1 and offering new features and
       more flexibility, configurability and stability.
  - Supports existing LVM1 on-disk metadata.
    This means you do *not* have to make changes to your existing on-disk 
    LVM1 volumes to switch between using LVM1 and LVM2.
  - Uses command lines similar to LVM1.
  - By default uses a new on-disk metadata format supporting more
    features than the original LVM1 version.
  - Communicates with the device-mapper kernel driver via libdevmapper's
    API.


Miscellaneous points:
  - LVM1 uses block major number 58: dm selects one or more major numbers
    dynamically as required instead.
  - LVM1 uses character major number 109: dm selects a misc minor number
    dynamically instead.
  - There's a (non-devfs) script for creating /dev/mapper/control at
    startup (or after dm module load).
  - You can use LVM1 tools with unpatched 2.4 kernels.
  - You can use LVM2 tools with patched 2.4 and unpatched 2.6 kernels.
  - Device-mapper support for snapshots and pvmove is so far released 
    only for 2.4.  Patches for 2.6 are being tested.
  - Multipath and mirror support are under development for 2.6.
    (Then get back-ported to 2.4.)

Web download page: http://www.sistina.com/products_lvm_download.htm

The device-mapper tarball contains: 
  device-mapper kernel patches - needed only for 2.4;
  userspace libdevmapper and dmsetup - needed with all dm kernels.
The LVM2 tarball contains the LVM2 command line tools.

Development code can be found via:
  http://people.sistina.com/~thornber/  (for kernel patches)
  http://www.sistina.com/products_CVS.htm  (for userspace code)

Device-mapper mailing list:
  http://lists.sistina.com/mailman/listinfo/dm-devel

Alasdair
-- 
agk@uk.sistina.com

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-09 23:46               ` Paul Jakma
                                   ` (2 preceding siblings ...)
  2003-12-10  2:44                 ` Martin J. Bligh
@ 2003-12-16 19:01                 ` bill davidsen
  3 siblings, 0 replies; 55+ messages in thread
From: bill davidsen @ 2003-12-16 19:01 UTC (permalink / raw)
  To: linux-kernel

In article <Pine.LNX.4.56.0312092329280.30298@fogarty.jakma.org>,
Paul Jakma  <paul@clubi.ie> wrote:

| I'd really like to see one of:
| 
| - backwards compat: 2.6 have LVM1 support
| 
| - forward compat: 2.4 to have DM support to allow 2.4 users to 
| migrate
| LVM->DM first /before/ taking the risk on running 2.6.

Hate to say it, but unlike XFS which has been available for 2.4 for ages
and very well tested, DM for 2.4 has all the joy of a newly posted
feature. I really think that you will find the *if* you want DM you will
be safer going to 2.6 and using the version which has been reasonably
well tested.

You will want to do a full backup before going to a new o/s in any case,
if your data is of value.
-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
  2003-12-10 15:55                   ` Paul Jakma
  2003-12-10 16:54                     ` venom
@ 2003-12-16 19:15                     ` bill davidsen
  1 sibling, 0 replies; 55+ messages in thread
From: bill davidsen @ 2003-12-16 19:15 UTC (permalink / raw)
  To: linux-kernel

In article <Pine.LNX.4.56.0312101547590.1218@fogarty.jakma.org>,
Paul Jakma  <paul@clubi.ie> wrote:
| On Tue, 9 Dec 2003, Martin J. Bligh wrote:
| 
| > Some form of backward compatibility from 2.6 would seem a much more
| > sensible thing to fight for. Foisting forward comaptibility on an
| > older release seems like a bad road to go down.
| 
| I dont really care, but some kind of (long-term, ie lifetime of 
| either 2.4 or 2.6) compatibility is needed.

Where on earth did you get that? Is this some new policy Linus has put
forth, or something you wish were real? It certainly wasn't the case for
2.2 => 2.4 conversion, where is it writ that LVM1 needs to get
conversion help?
-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: Device-mapper submission for 2.4
@ 2003-12-10  0:49 Carl-Daniel Hailfinger
  0 siblings, 0 replies; 55+ messages in thread
From: Carl-Daniel Hailfinger @ 2003-12-10  0:49 UTC (permalink / raw)
  To: marcelo.tosatti; +Cc: thornber, Linux Kernel Mailing List

Marcelo Tosatti wrote:
> On Tue, 9 Dec 2003, Joe Thornber wrote:
> 
>> On Tue, Dec 09, 2003 at 11:15:08AM -0200, Marcelo Tosatti wrote:
>> > I believe 2.6 is the right place for the device mapper.
>>
>> So what's the difference between a new filesystem like XFS and a new
>> device driver like dm ?
> 
> Expected question...
> 
> XFS is a totally different filesystem from the ones present in 2.4.

Please give me a pointer about what's so different about XFS. Last time I
looked, XFS features were mostly equivalent to those of other journaling
file systems.

This is a honest question, not a flamebait.


Thanks,
Carl-Daniel


^ permalink raw reply	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2003-12-16 19:26 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
2003-12-09 12:24 ` [Patch 1/4] fs.h: b_journal_head Joe Thornber
2003-12-09 23:46   ` Nathan Scott
2003-12-10  8:46     ` Joe Thornber
2003-12-10 12:06       ` Nathan Scott
2003-12-09 12:25 ` [Patch 2/4] dm: mempool backport Joe Thornber
2003-12-09 12:26 ` [Patch 3/4] dm: core files Joe Thornber
2003-12-09 12:26 ` [Patch 4/4] dm: ioctl interface Joe Thornber
2003-12-09 13:15 ` Device-mapper submission for 2.4 Marcelo Tosatti
2003-12-09 13:45   ` Joe Thornber
2003-12-09 14:00     ` Måns Rullgård
2003-12-09 14:10       ` Muli Ben-Yehuda
2003-12-09 14:21         ` Måns Rullgård
2003-12-09 14:16       ` Joe Thornber
2003-12-09 14:24       ` Stefan Smietanowski
2003-12-09 14:10     ` Marcelo Tosatti
2003-12-09 14:34       ` Joe Thornber
2003-12-09 21:07         ` Paul Jakma
2003-12-09 22:26           ` Joe Thornber
2003-12-09 22:48             ` Marcelo Tosatti
2003-12-09 23:46               ` Paul Jakma
2003-12-09 23:58                 ` William Lee Irwin III
2003-12-10  0:15                   ` Paul Jakma
2003-12-10 11:49                     ` Stephan von Krawczynski
2003-12-10 23:15                     ` Dave Jones
2003-12-10  0:27                 ` Jose Luis Domingo Lopez
2003-12-10  0:59                   ` Tupshin Harper
2003-12-10  9:40                     ` Wichert Akkerman
2003-12-10  2:44                 ` Martin J. Bligh
2003-12-10 15:55                   ` Paul Jakma
2003-12-10 16:54                     ` venom
2003-12-10 17:00                       ` Paul Jakma
2003-12-10 17:14                         ` venom
2003-12-10 23:40                         ` Mike Fedyk
2003-12-11 19:48                           ` Alasdair G Kergon
2003-12-16 19:15                     ` bill davidsen
2003-12-16 19:01                 ` bill davidsen
2003-12-10  8:45             ` Jens Axboe
2003-12-10 17:30               ` Paul Jakma
2003-12-10 17:44                 ` Joe Thornber
2003-12-10 17:48                   ` venom
2003-12-10 18:07                   ` Paul Jakma
2003-12-10 19:30                   ` Jens Axboe
2003-12-09 17:02       ` Bill Rugolsky Jr.
2003-12-09 22:53         ` Ciaran McCreesh
2003-12-10  3:38         ` Lincoln Dale
2003-12-10  6:12           ` Willy Tarreau
2003-12-10  6:35             ` viro
2003-12-09 17:45       ` Kevin Corry
2003-12-09 19:47         ` Paul P Komkoff Jr
2003-12-09 14:23     ` Stefan Smietanowski
2003-12-09 14:36       ` Joe Thornber
2003-12-09 19:50 ` William Lee Irwin III
2003-12-09 21:13   ` Paul Jakma
2003-12-10  0:49 Carl-Daniel Hailfinger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).