* [Patch 3/4] dm: core files
2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
2003-12-09 12:24 ` [Patch 1/4] fs.h: b_journal_head Joe Thornber
2003-12-09 12:25 ` [Patch 2/4] dm: mempool backport Joe Thornber
@ 2003-12-09 12:26 ` Joe Thornber
2003-12-09 12:26 ` [Patch 4/4] dm: ioctl interface Joe Thornber
` (2 subsequent siblings)
5 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 12:26 UTC (permalink / raw)
To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List
Device-mapper core.
--- diff/Documentation/Configure.help 2003-12-09 10:25:24.000000000 +0000
+++ source/Documentation/Configure.help 2003-12-09 10:39:47.000000000 +0000
@@ -1912,6 +1912,20 @@
want), say M here and read <file:Documentation/modules.txt>. The
module will be called lvm-mod.o.
+Device-mapper support
+CONFIG_BLK_DEV_DM
+ Device-mapper is a low level volume manager. It works by allowing
+ people to specify mappings for ranges of logical sectors. Various
+ mapping types are available, in addition people may write their own
+ modules containing custom mappings if they wish.
+
+ Higher level volume managers such as LVM2 use this driver.
+
+ If you want to compile this as a module, say M here and read
+ <file:Documentation/modules.txt>. The module will be called dm-mod.o.
+
+ If unsure, say N.
+
Multiple devices driver support (RAID and LVM)
CONFIG_MD
Support multiple physical spindles through a single logical device.
--- diff/MAINTAINERS 2003-12-09 10:25:24.000000000 +0000
+++ source/MAINTAINERS 2003-12-09 10:39:47.000000000 +0000
@@ -570,6 +570,13 @@
W: http://www.debian.org/~dz/i8k/
S: Maintained
+DEVICE MAPPER
+P: Joe Thornber
+M: dm@uk.sistina.com
+L: linux-LVM@sistina.com
+W: http://www.sistina.com/lvm
+S: Maintained
+
DEVICE NUMBER REGISTRY
P: H. Peter Anvin
M: hpa@zytor.com
--- diff/drivers/md/Config.in 2001-09-26 16:15:05.000000000 +0100
+++ source/drivers/md/Config.in 2003-12-09 10:42:38.000000000 +0000
@@ -14,5 +14,6 @@
dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
+dep_tristate ' Device-mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
endmenu
--- diff/drivers/md/Makefile 2002-01-17 10:07:52.000000000 +0000
+++ source/drivers/md/Makefile 2003-12-09 10:43:27.000000000 +0000
@@ -4,24 +4,32 @@
O_TARGET := mddev.o
-export-objs := md.o xor.o
-list-multi := lvm-mod.o
+export-objs := md.o xor.o dm-table.o dm-target.o dm.o
+
+list-multi := lvm-mod.o dm-mod.o dm-mirror-mod.o
lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o
+dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o
# Note: link order is important. All raid personalities
# and xor.o must come before md.o, as they each initialise
# themselves, and md.o may use the personalities when it
# auto-initialised.
-obj-$(CONFIG_MD_LINEAR) += linear.o
-obj-$(CONFIG_MD_RAID0) += raid0.o
-obj-$(CONFIG_MD_RAID1) += raid1.o
-obj-$(CONFIG_MD_RAID5) += raid5.o xor.o
-obj-$(CONFIG_MD_MULTIPATH) += multipath.o
-obj-$(CONFIG_BLK_DEV_MD) += md.o
-obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
+obj-$(CONFIG_MD_LINEAR) += linear.o
+obj-$(CONFIG_MD_RAID0) += raid0.o
+obj-$(CONFIG_MD_RAID1) += raid1.o
+obj-$(CONFIG_MD_RAID5) += raid5.o xor.o
+obj-$(CONFIG_MD_MULTIPATH) += multipath.o
+obj-$(CONFIG_BLK_DEV_MD) += md.o
+
+obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
+
+obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
include $(TOPDIR)/Rules.make
lvm-mod.o: $(lvm-mod-objs)
$(LD) -r -o $@ $(lvm-mod-objs)
+
+dm-mod.o: $(dm-mod-objs)
+ $(LD) -r -o $@ $(dm-mod-objs)
--- diff/drivers/md/dm-linear.c 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-linear.c 2003-12-09 10:39:55.000000000 +0000
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+
+/*
+ * Linear: maps a linear range of a device.
+ */
+struct linear_c {
+ struct dm_dev *dev;
+ sector_t start;
+};
+
+/*
+ * Construct a linear mapping: <dev_path> <offset>
+ */
+static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct linear_c *lc;
+
+ if (argc != 2) {
+ ti->error = "dm-linear: Invalid argument count";
+ return -EINVAL;
+ }
+
+ lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+ if (lc == NULL) {
+ ti->error = "dm-linear: Cannot allocate linear context";
+ return -ENOMEM;
+ }
+
+ if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
+ ti->error = "dm-linear: Invalid device sector";
+ goto bad;
+ }
+
+ if (dm_get_device(ti, argv[0], lc->start, ti->len,
+ dm_table_get_mode(ti->table), &lc->dev)) {
+ ti->error = "dm-linear: Device lookup failed";
+ goto bad;
+ }
+
+ ti->private = lc;
+ return 0;
+
+ bad:
+ kfree(lc);
+ return -EINVAL;
+}
+
+static void linear_dtr(struct dm_target *ti)
+{
+ struct linear_c *lc = (struct linear_c *) ti->private;
+
+ dm_put_device(ti, lc->dev);
+ kfree(lc);
+}
+
+static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw,
+ union map_info *map_context)
+{
+ struct linear_c *lc = (struct linear_c *) ti->private;
+
+ bh->b_rdev = lc->dev->dev;
+ bh->b_rsector = lc->start + (bh->b_rsector - ti->begin);
+
+ return 1;
+}
+
+static int linear_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned int maxlen)
+{
+ struct linear_c *lc = (struct linear_c *) ti->private;
+ kdev_t kdev;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ result[0] = '\0';
+ break;
+
+ case STATUSTYPE_TABLE:
+ kdev = to_kdev_t(lc->dev->bdev->bd_dev);
+ snprintf(result, maxlen, "%s " SECTOR_FORMAT,
+ dm_kdevname(kdev), lc->start);
+ break;
+ }
+ return 0;
+}
+
+static struct target_type linear_target = {
+ .name = "linear",
+ .module = THIS_MODULE,
+ .ctr = linear_ctr,
+ .dtr = linear_dtr,
+ .map = linear_map,
+ .status = linear_status,
+};
+
+int __init dm_linear_init(void)
+{
+ int r = dm_register_target(&linear_target);
+
+ if (r < 0)
+ DMERR("linear: register failed %d", r);
+
+ return r;
+}
+
+void dm_linear_exit(void)
+{
+ int r = dm_unregister_target(&linear_target);
+
+ if (r < 0)
+ DMERR("linear: unregister failed %d", r);
+}
--- diff/drivers/md/dm-stripe.c 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-stripe.c 2003-12-09 10:39:55.000000000 +0000
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+
+struct stripe {
+ struct dm_dev *dev;
+ sector_t physical_start;
+};
+
+struct stripe_c {
+ uint32_t stripes;
+
+ /* The size of this target / num. stripes */
+ uint32_t stripe_width;
+
+ /* stripe chunk size */
+ uint32_t chunk_shift;
+ sector_t chunk_mask;
+
+ struct stripe stripe[0];
+};
+
+static inline struct stripe_c *alloc_context(unsigned int stripes)
+{
+ size_t len;
+
+ if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
+ stripes))
+ return NULL;
+
+ len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
+
+ return kmalloc(len, GFP_KERNEL);
+}
+
+/*
+ * Parse a single <dev> <sector> pair
+ */
+static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
+ unsigned int stripe, char **argv)
+{
+ sector_t start;
+
+ if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
+ return -EINVAL;
+
+ if (dm_get_device(ti, argv[0], start, sc->stripe_width,
+ dm_table_get_mode(ti->table),
+ &sc->stripe[stripe].dev))
+ return -ENXIO;
+
+ sc->stripe[stripe].physical_start = start;
+ return 0;
+}
+
+/*
+ * FIXME: Nasty function, only present because we can't link
+ * against __moddi3 and __divdi3.
+ *
+ * returns a == b * n
+ */
+static int multiple(sector_t a, sector_t b, sector_t *n)
+{
+ sector_t acc, prev, i;
+
+ *n = 0;
+ while (a >= b) {
+ for (acc = b, prev = 0, i = 1;
+ acc <= a;
+ prev = acc, acc <<= 1, i <<= 1)
+ ;
+
+ a -= prev;
+ *n += i >> 1;
+ }
+
+ return a == 0;
+}
+
+/*
+ * Construct a striped mapping.
+ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
+ */
+static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct stripe_c *sc;
+ sector_t width;
+ uint32_t stripes;
+ uint32_t chunk_size;
+ char *end;
+ int r;
+ unsigned int i;
+
+ if (argc < 2) {
+ ti->error = "dm-stripe: Not enough arguments";
+ return -EINVAL;
+ }
+
+ stripes = simple_strtoul(argv[0], &end, 10);
+ if (*end) {
+ ti->error = "dm-stripe: Invalid stripe count";
+ return -EINVAL;
+ }
+
+ chunk_size = simple_strtoul(argv[1], &end, 10);
+ if (*end) {
+ ti->error = "dm-stripe: Invalid chunk_size";
+ return -EINVAL;
+ }
+
+ /*
+ * chunk_size is a power of two
+ */
+ if (!chunk_size || (chunk_size & (chunk_size - 1))) {
+ ti->error = "dm-stripe: Invalid chunk size";
+ return -EINVAL;
+ }
+
+ if (!multiple(ti->len, stripes, &width)) {
+ ti->error = "dm-stripe: Target length not divisable by "
+ "number of stripes";
+ return -EINVAL;
+ }
+
+ /*
+ * Do we have enough arguments for that many stripes ?
+ */
+ if (argc != (2 + 2 * stripes)) {
+ ti->error = "dm-stripe: Not enough destinations specified";
+ return -EINVAL;
+ }
+
+ sc = alloc_context(stripes);
+ if (!sc) {
+ ti->error = "dm-stripe: Memory allocation for striped context "
+ "failed";
+ return -ENOMEM;
+ }
+
+ sc->stripes = stripes;
+ sc->stripe_width = width;
+
+ sc->chunk_mask = ((sector_t) chunk_size) - 1;
+ for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
+ chunk_size >>= 1;
+ sc->chunk_shift--;
+
+ /*
+ * Get the stripe destinations.
+ */
+ for (i = 0; i < stripes; i++) {
+ argv += 2;
+
+ r = get_stripe(ti, sc, i, argv);
+ if (r < 0) {
+ ti->error = "dm-stripe: Couldn't parse stripe "
+ "destination";
+ while (i--)
+ dm_put_device(ti, sc->stripe[i].dev);
+ kfree(sc);
+ return r;
+ }
+ }
+
+ ti->private = sc;
+ return 0;
+}
+
+static void stripe_dtr(struct dm_target *ti)
+{
+ unsigned int i;
+ struct stripe_c *sc = (struct stripe_c *) ti->private;
+
+ for (i = 0; i < sc->stripes; i++)
+ dm_put_device(ti, sc->stripe[i].dev);
+
+ kfree(sc);
+}
+
+static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw,
+ union map_info *context)
+{
+ struct stripe_c *sc = (struct stripe_c *) ti->private;
+
+ sector_t offset = bh->b_rsector - ti->begin;
+ uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
+ uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */
+ chunk = chunk / sc->stripes;
+
+ bh->b_rdev = sc->stripe[stripe].dev->dev;
+ bh->b_rsector = sc->stripe[stripe].physical_start +
+ (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
+ return 1;
+}
+
+static int stripe_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned int maxlen)
+{
+ struct stripe_c *sc = (struct stripe_c *) ti->private;
+ int offset;
+ unsigned int i;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ result[0] = '\0';
+ break;
+
+ case STATUSTYPE_TABLE:
+ offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
+ sc->stripes, sc->chunk_mask + 1);
+ for (i = 0; i < sc->stripes; i++) {
+ offset +=
+ snprintf(result + offset, maxlen - offset,
+ " %s " SECTOR_FORMAT,
+ dm_kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
+ sc->stripe[i].physical_start);
+ }
+ break;
+ }
+ return 0;
+}
+
+static struct target_type stripe_target = {
+ .name = "striped",
+ .module = THIS_MODULE,
+ .ctr = stripe_ctr,
+ .dtr = stripe_dtr,
+ .map = stripe_map,
+ .status = stripe_status,
+};
+
+int __init dm_stripe_init(void)
+{
+ int r;
+
+ r = dm_register_target(&stripe_target);
+ if (r < 0)
+ DMWARN("striped target registration failed");
+
+ return r;
+}
+
+void dm_stripe_exit(void)
+{
+ if (dm_unregister_target(&stripe_target))
+ DMWARN("striped target unregistration failed");
+
+ return;
+}
--- diff/drivers/md/dm-table.c 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-table.c 2003-12-09 10:57:20.000000000 +0000
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <asm/atomic.h>
+
+#define MAX_DEPTH 16
+#define NODE_SIZE L1_CACHE_BYTES
+#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
+#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
+
+struct dm_table {
+ atomic_t holders;
+
+ /* btree table */
+ unsigned int depth;
+ unsigned int counts[MAX_DEPTH]; /* in nodes */
+ sector_t *index[MAX_DEPTH];
+
+ unsigned int num_targets;
+ unsigned int num_allocated;
+ sector_t *highs;
+ struct dm_target *targets;
+
+ /*
+ * Indicates the rw permissions for the new logical
+ * device. This should be a combination of FMODE_READ
+ * and FMODE_WRITE.
+ */
+ int mode;
+
+ /* a list of devices used by this table */
+ struct list_head devices;
+
+ /* events get handed up using this callback */
+ void (*event_fn)(void *);
+ void *event_context;
+};
+
+/*
+ * Similar to ceiling(log_size(n))
+ */
+static unsigned int int_log(unsigned long n, unsigned long base)
+{
+ int result = 0;
+
+ while (n > 1) {
+ n = dm_div_up(n, base);
+ result++;
+ }
+
+ return result;
+}
+
+/*
+ * Calculate the index of the child node of the n'th node k'th key.
+ */
+static inline unsigned int get_child(unsigned int n, unsigned int k)
+{
+ return (n * CHILDREN_PER_NODE) + k;
+}
+
+/*
+ * Return the n'th node of level l from table t.
+ */
+static inline sector_t *get_node(struct dm_table *t, unsigned int l,
+ unsigned int n)
+{
+ return t->index[l] + (n * KEYS_PER_NODE);
+}
+
+/*
+ * Return the highest key that you could lookup from the n'th
+ * node on level l of the btree.
+ */
+static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
+{
+ for (; l < t->depth - 1; l++)
+ n = get_child(n, CHILDREN_PER_NODE - 1);
+
+ if (n >= t->counts[l])
+ return (sector_t) - 1;
+
+ return get_node(t, l, n)[KEYS_PER_NODE - 1];
+}
+
+/*
+ * Fills in a level of the btree based on the highs of the level
+ * below it.
+ */
+static int setup_btree_index(unsigned int l, struct dm_table *t)
+{
+ unsigned int n, k;
+ sector_t *node;
+
+ for (n = 0U; n < t->counts[l]; n++) {
+ node = get_node(t, l, n);
+
+ for (k = 0U; k < KEYS_PER_NODE; k++)
+ node[k] = high(t, l + 1, get_child(n, k));
+ }
+
+ return 0;
+}
+
+void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
+{
+ unsigned long size;
+ void *addr;
+
+ /*
+ * Check that we're not going to overflow.
+ */
+ if (nmemb > (ULONG_MAX / elem_size))
+ return NULL;
+
+ size = nmemb * elem_size;
+ addr = vmalloc(size);
+ if (addr)
+ memset(addr, 0, size);
+
+ return addr;
+}
+
+int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
+{
+ struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+
+ if (!t)
+ return -ENOMEM;
+
+ memset(t, 0, sizeof(*t));
+ INIT_LIST_HEAD(&t->devices);
+ atomic_set(&t->holders, 1);
+
+ num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
+
+ /* Allocate both the target array and offset array at once. */
+ t->highs = (sector_t *) dm_vcalloc(sizeof(struct dm_target) +
+ sizeof(sector_t), num_targets);
+ if (!t->highs) {
+ kfree(t);
+ return -ENOMEM;
+ }
+
+ memset(t->highs, -1, sizeof(*t->highs) * num_targets);
+
+ t->targets = (struct dm_target *) (t->highs + num_targets);
+ t->num_allocated = num_targets;
+ t->mode = mode;
+ *result = t;
+ return 0;
+}
+
+static void free_devices(struct list_head *devices)
+{
+ struct list_head *tmp, *next;
+
+ for (tmp = devices->next; tmp != devices; tmp = next) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ next = tmp->next;
+ kfree(dd);
+ }
+}
+
+void table_destroy(struct dm_table *t)
+{
+ unsigned int i;
+
+ /* free the indexes (see dm_table_complete) */
+ if (t->depth >= 2)
+ vfree(t->index[t->depth - 2]);
+
+ /* free the targets */
+ for (i = 0; i < t->num_targets; i++) {
+ struct dm_target *tgt = t->targets + i;
+
+ if (tgt->type->dtr)
+ tgt->type->dtr(tgt);
+
+ dm_put_target_type(tgt->type);
+ }
+
+ vfree(t->highs);
+
+ /* free the device list */
+ if (t->devices.next != &t->devices) {
+ DMWARN("devices still present during destroy: "
+ "dm_table_remove_device calls missing");
+
+ free_devices(&t->devices);
+ }
+
+ kfree(t);
+}
+
+void dm_table_get(struct dm_table *t)
+{
+ atomic_inc(&t->holders);
+}
+
+void dm_table_put(struct dm_table *t)
+{
+ if (atomic_dec_and_test(&t->holders))
+ table_destroy(t);
+}
+
+/*
+ * Convert a device path to a dev_t.
+ */
+static int lookup_device(const char *path, kdev_t *dev)
+{
+ int r;
+ struct nameidata nd;
+ struct inode *inode;
+
+ if (!path_init(path, LOOKUP_FOLLOW, &nd))
+ return 0;
+
+ if ((r = path_walk(path, &nd)))
+ goto out;
+
+ inode = nd.dentry->d_inode;
+ if (!inode) {
+ r = -ENOENT;
+ goto out;
+ }
+
+ if (!S_ISBLK(inode->i_mode)) {
+ r = -ENOTBLK;
+ goto out;
+ }
+
+ *dev = inode->i_rdev;
+
+ out:
+ path_release(&nd);
+ return r;
+}
+
+/*
+ * See if we've already got a device in the list.
+ */
+static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
+{
+ struct list_head *tmp;
+
+ list_for_each(tmp, l) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ if (kdev_same(dd->dev, dev))
+ return dd;
+ }
+
+ return NULL;
+}
+
+/*
+ * Open a device so we can use it as a map destination.
+ */
+static int open_dev(struct dm_dev *dd)
+{
+ if (dd->bdev)
+ BUG();
+
+ dd->bdev = bdget(kdev_t_to_nr(dd->dev));
+ if (!dd->bdev)
+ return -ENOMEM;
+
+ return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW);
+}
+
+/*
+ * Close a device that we've been using.
+ */
+static void close_dev(struct dm_dev *dd)
+{
+ if (!dd->bdev)
+ return;
+
+ blkdev_put(dd->bdev, BDEV_RAW);
+ dd->bdev = NULL;
+}
+
+/*
+ * If possible (ie. blk_size[major] is set), this checks an area
+ * of a destination device is valid.
+ */
+static int check_device_area(kdev_t dev, sector_t start, sector_t len)
+{
+ int *sizes;
+ sector_t dev_size;
+
+ if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
+ /* we don't know the device details,
+ * so give the benefit of the doubt */
+ return 1;
+
+ /* convert to 512-byte sectors */
+ dev_size <<= 1;
+
+ return ((start < dev_size) && (len <= (dev_size - start)));
+}
+
+/*
+ * This upgrades the mode on an already open dm_dev. Being
+ * careful to leave things as they were if we fail to reopen the
+ * device.
+ */
+static int upgrade_mode(struct dm_dev *dd, int new_mode)
+{
+ int r;
+ struct dm_dev dd_copy;
+
+ memcpy(&dd_copy, dd, sizeof(dd_copy));
+
+ dd->mode |= new_mode;
+ dd->bdev = NULL;
+ r = open_dev(dd);
+ if (!r)
+ close_dev(&dd_copy);
+ else
+ memcpy(dd, &dd_copy, sizeof(dd_copy));
+
+ return r;
+}
+
+/*
+ * Add a device to the list, or just increment the usage count if
+ * it's already present.
+ */
+int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
+ sector_t len, int mode, struct dm_dev **result)
+{
+ int r;
+ kdev_t dev;
+ struct dm_dev *dd;
+ unsigned major, minor;
+ struct dm_table *t = ti->table;
+
+ if (!t)
+ BUG();
+
+ if (sscanf(path, "%u:%u", &major, &minor) == 2) {
+ /* Extract the major/minor numbers */
+ dev = mk_kdev(major, minor);
+ } else {
+ /* convert the path to a device */
+ if ((r = lookup_device(path, &dev)))
+ return r;
+ }
+
+ dd = find_device(&t->devices, dev);
+ if (!dd) {
+ dd = kmalloc(sizeof(*dd), GFP_KERNEL);
+ if (!dd)
+ return -ENOMEM;
+
+ dd->dev = dev;
+ dd->mode = mode;
+ dd->bdev = NULL;
+
+ if ((r = open_dev(dd))) {
+ kfree(dd);
+ return r;
+ }
+
+ atomic_set(&dd->count, 0);
+ list_add(&dd->list, &t->devices);
+
+ } else if (dd->mode != (mode | dd->mode)) {
+ r = upgrade_mode(dd, mode);
+ if (r)
+ return r;
+ }
+ atomic_inc(&dd->count);
+
+ if (!check_device_area(dd->dev, start, len)) {
+ DMWARN("device %s too small for target", path);
+ dm_put_device(ti, dd);
+ return -EINVAL;
+ }
+
+ *result = dd;
+
+ return 0;
+}
+
+/*
+ * Decrement a devices use count and remove it if neccessary.
+ */
+void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
+{
+ if (atomic_dec_and_test(&dd->count)) {
+ close_dev(dd);
+ list_del(&dd->list);
+ kfree(dd);
+ }
+}
+
+/*
+ * Checks to see if the target joins onto the end of the table.
+ */
+static int adjoin(struct dm_table *table, struct dm_target *ti)
+{
+ struct dm_target *prev;
+
+ if (!table->num_targets)
+ return !ti->begin;
+
+ prev = &table->targets[table->num_targets - 1];
+ return (ti->begin == (prev->begin + prev->len));
+}
+
+/*
+ * Used to dynamically allocate the arg array.
+ */
+static char **realloc_argv(unsigned *array_size, char **old_argv)
+{
+ char **argv;
+ unsigned new_size;
+
+ new_size = *array_size ? *array_size * 2 : 64;
+ argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
+ if (argv) {
+ memcpy(argv, old_argv, *array_size * sizeof(*argv));
+ *array_size = new_size;
+ }
+
+ kfree(old_argv);
+ return argv;
+}
+
+/*
+ * Destructively splits up the argument list to pass to ctr.
+ */
+static int split_args(int *argc, char ***argvp, char *input)
+{
+ char *start, *end = input, *out, **argv = NULL;
+ unsigned array_size = 0;
+
+ *argc = 0;
+ argv = realloc_argv(&array_size, argv);
+ if (!argv)
+ return -ENOMEM;
+
+ while (1) {
+ start = end;
+
+ /* Skip whitespace */
+ while (*start && isspace(*start))
+ start++;
+
+ if (!*start)
+ break; /* success, we hit the end */
+
+ /* 'out' is used to remove any back-quotes */
+ end = out = start;
+ while (*end) {
+ /* Everything apart from '\0' can be quoted */
+ if (*end == '\\' && *(end + 1)) {
+ *out++ = *(end + 1);
+ end += 2;
+ continue;
+ }
+
+ if (isspace(*end))
+ break; /* end of token */
+
+ *out++ = *end++;
+ }
+
+ /* have we already filled the array ? */
+ if ((*argc + 1) > array_size) {
+ argv = realloc_argv(&array_size, argv);
+ if (!argv)
+ return -ENOMEM;
+ }
+
+ /* we know this is whitespace */
+ if (*end)
+ end++;
+
+ /* terminate the string and put it in the array */
+ *out = '\0';
+ argv[*argc] = start;
+ (*argc)++;
+ }
+
+ *argvp = argv;
+ return 0;
+}
+
+int dm_table_add_target(struct dm_table *t, const char *type,
+ sector_t start, sector_t len, char *params)
+{
+ int r = -EINVAL, argc;
+ char **argv;
+ struct dm_target *tgt;
+
+ if (t->num_targets >= t->num_allocated)
+ return -ENOMEM;
+
+ tgt = t->targets + t->num_targets;
+ memset(tgt, 0, sizeof(*tgt));
+
+ tgt->type = dm_get_target_type(type);
+ if (!tgt->type) {
+ tgt->error = "unknown target type";
+ return -EINVAL;
+ }
+
+ tgt->table = t;
+ tgt->begin = start;
+ tgt->len = len;
+ tgt->error = "Unknown error";
+
+ /*
+ * Does this target adjoin the previous one ?
+ */
+ if (!adjoin(t, tgt)) {
+ tgt->error = "Gap in table";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ r = split_args(&argc, &argv, params);
+ if (r) {
+ tgt->error = "couldn't split parameters (insufficient memory)";
+ goto bad;
+ }
+
+ r = tgt->type->ctr(tgt, argc, argv);
+ kfree(argv);
+ if (r)
+ goto bad;
+
+ t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
+ return 0;
+
+ bad:
+ printk(KERN_ERR DM_NAME ": %s\n", tgt->error);
+ dm_put_target_type(tgt->type);
+ return r;
+}
+
+static int setup_indexes(struct dm_table *t)
+{
+ int i;
+ unsigned int total = 0;
+ sector_t *indexes;
+
+ /* allocate the space for *all* the indexes */
+ for (i = t->depth - 2; i >= 0; i--) {
+ t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
+ total += t->counts[i];
+ }
+
+ indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
+ if (!indexes)
+ return -ENOMEM;
+
+ /* set up internal nodes, bottom-up */
+ for (i = t->depth - 2, total = 0; i >= 0; i--) {
+ t->index[i] = indexes;
+ indexes += (KEYS_PER_NODE * t->counts[i]);
+ setup_btree_index(i, t);
+ }
+
+ return 0;
+}
+
+/*
+ * Builds the btree to index the map.
+ */
+int dm_table_complete(struct dm_table *t)
+{
+ int r = 0;
+ unsigned int leaf_nodes;
+
+ /* how many indexes will the btree have ? */
+ leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
+ t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
+
+ /* leaf layer has already been set up */
+ t->counts[t->depth - 1] = leaf_nodes;
+ t->index[t->depth - 1] = t->highs;
+
+ if (t->depth >= 2)
+ r = setup_indexes(t);
+
+ return r;
+}
+
+static spinlock_t _event_lock = SPIN_LOCK_UNLOCKED;
+void dm_table_event_callback(struct dm_table *t,
+ void (*fn)(void *), void *context)
+{
+ spin_lock_irq(&_event_lock);
+ t->event_fn = fn;
+ t->event_context = context;
+ spin_unlock_irq(&_event_lock);
+}
+
+void dm_table_event(struct dm_table *t)
+{
+ spin_lock(&_event_lock);
+ if (t->event_fn)
+ t->event_fn(t->event_context);
+ spin_unlock(&_event_lock);
+}
+
+sector_t dm_table_get_size(struct dm_table *t)
+{
+ return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
+}
+
+struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
+{
+ if (index > t->num_targets)
+ return NULL;
+
+ return t->targets + index;
+}
+
+/*
+ * Search the btree for the correct target.
+ */
+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
+{
+ unsigned int l, n = 0, k = 0;
+ sector_t *node;
+
+ for (l = 0; l < t->depth; l++) {
+ n = get_child(n, k);
+ node = get_node(t, l, n);
+
+ for (k = 0; k < KEYS_PER_NODE; k++)
+ if (node[k] >= sector)
+ break;
+ }
+
+ return &t->targets[(KEYS_PER_NODE * n) + k];
+}
+
+unsigned int dm_table_get_num_targets(struct dm_table *t)
+{
+ return t->num_targets;
+}
+
+struct list_head *dm_table_get_devices(struct dm_table *t)
+{
+ return &t->devices;
+}
+
+int dm_table_get_mode(struct dm_table *t)
+{
+ return t->mode;
+}
+
+void dm_table_suspend_targets(struct dm_table *t)
+{
+ int i;
+
+ for (i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = t->targets + i;
+
+ if (ti->type->suspend)
+ ti->type->suspend(ti);
+ }
+}
+
+void dm_table_resume_targets(struct dm_table *t)
+{
+ int i;
+
+ for (i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = t->targets + i;
+
+ if (ti->type->resume)
+ ti->type->resume(ti);
+ }
+}
+
+EXPORT_SYMBOL(dm_get_device);
+EXPORT_SYMBOL(dm_put_device);
+EXPORT_SYMBOL(dm_table_event);
+EXPORT_SYMBOL(dm_table_get_mode);
--- diff/drivers/md/dm-target.c 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-target.c 2003-12-09 10:39:55.000000000 +0000
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/slab.h>
+
+struct tt_internal {
+ struct target_type tt;
+
+ struct list_head list;
+ long use;
+};
+
+static LIST_HEAD(_targets);
+static DECLARE_RWSEM(_lock);
+
+#define DM_MOD_NAME_SIZE 32
+
+static inline struct tt_internal *__find_target_type(const char *name)
+{
+ struct list_head *tih;
+ struct tt_internal *ti;
+
+ list_for_each(tih, &_targets) {
+ ti = list_entry(tih, struct tt_internal, list);
+
+ if (!strcmp(name, ti->tt.name))
+ return ti;
+ }
+
+ return NULL;
+}
+
+static struct tt_internal *get_target_type(const char *name)
+{
+ struct tt_internal *ti;
+
+ down_read(&_lock);
+ ti = __find_target_type(name);
+
+ if (ti) {
+ if (ti->use == 0 && ti->tt.module)
+ __MOD_INC_USE_COUNT(ti->tt.module);
+ ti->use++;
+ }
+ up_read(&_lock);
+
+ return ti;
+}
+
+static void load_module(const char *name)
+{
+ char module_name[DM_MOD_NAME_SIZE] = "dm-";
+
+ /* Length check for strcat() below */
+ if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
+ return;
+
+ strcat(module_name, name);
+ request_module(module_name);
+}
+
+struct target_type *dm_get_target_type(const char *name)
+{
+ struct tt_internal *ti = get_target_type(name);
+
+ if (!ti) {
+ load_module(name);
+ ti = get_target_type(name);
+ }
+
+ return ti ? &ti->tt : NULL;
+}
+
+void dm_put_target_type(struct target_type *t)
+{
+ struct tt_internal *ti = (struct tt_internal *) t;
+
+ down_read(&_lock);
+ if (--ti->use == 0 && ti->tt.module)
+ __MOD_DEC_USE_COUNT(ti->tt.module);
+
+ if (ti->use < 0)
+ BUG();
+ up_read(&_lock);
+
+ return;
+}
+
+static struct tt_internal *alloc_target(struct target_type *t)
+{
+ struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+ if (ti) {
+ memset(ti, 0, sizeof(*ti));
+ ti->tt = *t;
+ }
+
+ return ti;
+}
+
+int dm_register_target(struct target_type *t)
+{
+ int rv = 0;
+ struct tt_internal *ti = alloc_target(t);
+
+ if (!ti)
+ return -ENOMEM;
+
+ down_write(&_lock);
+ if (__find_target_type(t->name)) {
+ kfree(ti);
+ rv = -EEXIST;
+ } else
+ list_add(&ti->list, &_targets);
+
+ up_write(&_lock);
+ return rv;
+}
+
+int dm_unregister_target(struct target_type *t)
+{
+ struct tt_internal *ti;
+
+ down_write(&_lock);
+ if (!(ti = __find_target_type(t->name))) {
+ up_write(&_lock);
+ return -EINVAL;
+ }
+
+ if (ti->use) {
+ up_write(&_lock);
+ return -ETXTBSY;
+ }
+
+ list_del(&ti->list);
+ kfree(ti);
+
+ up_write(&_lock);
+ return 0;
+}
+
+/*
+ * io-err: always fails an io, useful for bringing
+ * up LVs that have holes in them.
+ */
+static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
+{
+ return 0;
+}
+
+static void io_err_dtr(struct dm_target *ti)
+{
+ /* empty */
+}
+
+static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw,
+ union map_info *map_context)
+{
+ return -EIO;
+}
+
+static struct target_type error_target = {
+ .name = "error",
+ .ctr = io_err_ctr,
+ .dtr = io_err_dtr,
+ .map = io_err_map,
+};
+
+int dm_target_init(void)
+{
+ return dm_register_target(&error_target);
+}
+
+void dm_target_exit(void)
+{
+ if (dm_unregister_target(&error_target))
+ DMWARN("error target unregistration failed");
+}
+
+EXPORT_SYMBOL(dm_register_target);
+EXPORT_SYMBOL(dm_unregister_target);
--- diff/drivers/md/dm.c 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm.c 2003-12-09 10:46:32.000000000 +0000
@@ -0,0 +1,1112 @@
+/*
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/blk.h>
+#include <linux/blkpg.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <linux/lvm.h>
+
+#include <asm/uaccess.h>
+
+static const char *_name = DM_NAME;
+#define DEFAULT_READ_AHEAD 64
+
+struct dm_io {
+ struct mapped_device *md;
+
+ struct dm_target *ti;
+ int rw;
+ union map_info map_context;
+ void (*end_io) (struct buffer_head * bh, int uptodate);
+ void *context;
+};
+
+struct deferred_io {
+ int rw;
+ struct buffer_head *bh;
+ struct deferred_io *next;
+};
+
+/*
+ * Bits for the md->flags field.
+ */
+#define DMF_BLOCK_IO 0
+#define DMF_SUSPENDED 1
+
+struct mapped_device {
+ struct rw_semaphore lock;
+ atomic_t holders;
+
+ kdev_t dev;
+ unsigned long flags;
+
+ /*
+ * A list of ios that arrived while we were suspended.
+ */
+ atomic_t pending;
+ wait_queue_head_t wait;
+ struct deferred_io *deferred;
+
+ /*
+ * The current mapping.
+ */
+ struct dm_table *map;
+
+ /*
+ * io objects are allocated from here.
+ */
+ mempool_t *io_pool;
+
+ /*
+ * Event handling.
+ */
+ uint32_t event_nr;
+ wait_queue_head_t eventq;
+};
+
+#define MIN_IOS 256
+static kmem_cache_t *_io_cache;
+
+static struct mapped_device *get_kdev(kdev_t dev);
+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh);
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
+
+/*-----------------------------------------------------------------
+ * In order to avoid the 256 minor number limit we are going to
+ * register more major numbers as neccessary.
+ *---------------------------------------------------------------*/
+#define MAX_MINORS (1 << MINORBITS)
+
+struct major_details {
+ unsigned int major;
+
+ int transient;
+ struct list_head transient_list;
+
+ unsigned int first_free_minor;
+ int nr_free_minors;
+
+ struct mapped_device *mds[MAX_MINORS];
+ int blk_size[MAX_MINORS];
+ int blksize_size[MAX_MINORS];
+ int hardsect_size[MAX_MINORS];
+};
+
+static struct rw_semaphore _dev_lock;
+static struct major_details *_majors[MAX_BLKDEV];
+
+/*
+ * This holds a list of majors that non-specified device numbers
+ * may be allocated from. Only majors with free minors appear on
+ * this list.
+ */
+static LIST_HEAD(_transients_free);
+
+static int __alloc_major(unsigned int major, struct major_details **result)
+{
+ int r;
+ unsigned int transient = !major;
+ struct major_details *maj;
+
+ /* Major already allocated? */
+ if (major && _majors[major])
+ return 0;
+
+ maj = kmalloc(sizeof(*maj), GFP_KERNEL);
+ if (!maj)
+ return -ENOMEM;
+
+ memset(maj, 0, sizeof(*maj));
+ INIT_LIST_HEAD(&maj->transient_list);
+
+ maj->nr_free_minors = MAX_MINORS;
+
+ r = register_blkdev(major, _name, &dm_blk_dops);
+ if (r < 0) {
+ DMERR("register_blkdev failed for %d", major);
+ kfree(maj);
+ return r;
+ }
+ if (r > 0)
+ major = r;
+
+ maj->major = major;
+
+ if (transient) {
+ maj->transient = transient;
+ list_add_tail(&maj->transient_list, &_transients_free);
+ }
+
+ _majors[major] = maj;
+
+ blk_size[major] = maj->blk_size;
+ blksize_size[major] = maj->blksize_size;
+ hardsect_size[major] = maj->hardsect_size;
+ read_ahead[major] = DEFAULT_READ_AHEAD;
+
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(major), dm_request);
+
+ *result = maj;
+ return 0;
+}
+
+static void __free_major(struct major_details *maj)
+{
+ unsigned int major = maj->major;
+
+ list_del(&maj->transient_list);
+
+ read_ahead[major] = 0;
+ blk_size[major] = NULL;
+ blksize_size[major] = NULL;
+ hardsect_size[major] = NULL;
+
+ _majors[major] = NULL;
+ kfree(maj);
+
+ if (unregister_blkdev(major, _name) < 0)
+ DMERR("devfs_unregister_blkdev failed");
+}
+
+static void free_all_majors(void)
+{
+ unsigned int major = ARRAY_SIZE(_majors);
+
+ down_write(&_dev_lock);
+
+ while (major--)
+ if (_majors[major])
+ __free_major(_majors[major]);
+
+ up_write(&_dev_lock);
+}
+
+static void free_dev(kdev_t dev)
+{
+ unsigned int major = major(dev);
+ unsigned int minor = minor(dev);
+ struct major_details *maj;
+
+ down_write(&_dev_lock);
+
+ maj = _majors[major];
+ if (!maj)
+ goto out;
+
+ maj->mds[minor] = NULL;
+ maj->nr_free_minors++;
+
+ if (maj->nr_free_minors == MAX_MINORS) {
+ __free_major(maj);
+ goto out;
+ }
+
+ if (!maj->transient)
+ goto out;
+
+ if (maj->nr_free_minors == 1)
+ list_add_tail(&maj->transient_list, &_transients_free);
+
+ if (minor < maj->first_free_minor)
+ maj->first_free_minor = minor;
+
+ out:
+ up_write(&_dev_lock);
+}
+
+static void __alloc_minor(struct major_details *maj, unsigned int minor,
+ struct mapped_device *md)
+{
+ maj->mds[minor] = md;
+ md->dev = mk_kdev(maj->major, minor);
+ maj->nr_free_minors--;
+
+ if (maj->transient && !maj->nr_free_minors)
+ list_del_init(&maj->transient_list);
+}
+
+/*
+ * See if requested kdev_t is available.
+ */
+static int specific_dev(kdev_t dev, struct mapped_device *md)
+{
+ int r = 0;
+ unsigned int major = major(dev);
+ unsigned int minor = minor(dev);
+ struct major_details *maj;
+
+ if (!major || (major > MAX_BLKDEV) || (minor >= MAX_MINORS)) {
+ DMWARN("device number requested out of range (%d, %d)",
+ major, minor);
+ return -EINVAL;
+ }
+
+ down_write(&_dev_lock);
+ maj = _majors[major];
+
+ /* Register requested major? */
+ if (!maj) {
+ r = __alloc_major(major, &maj);
+ if (r)
+ goto out;
+
+ major = maj->major;
+ }
+
+ if (maj->mds[minor]) {
+ r = -EBUSY;
+ goto out;
+ }
+
+ __alloc_minor(maj, minor, md);
+
+ out:
+ up_write(&_dev_lock);
+
+ return r;
+}
+
+/*
+ * Find first unused device number, requesting a new major number if required.
+ */
+static int first_free_dev(struct mapped_device *md)
+{
+ int r = 0;
+ struct major_details *maj;
+
+ down_write(&_dev_lock);
+
+ if (list_empty(&_transients_free)) {
+ r = __alloc_major(0, &maj);
+ if (r)
+ goto out;
+ } else
+ maj = list_entry(_transients_free.next, struct major_details,
+ transient_list);
+
+ while (maj->mds[maj->first_free_minor++])
+ ;
+
+ __alloc_minor(maj, maj->first_free_minor - 1, md);
+
+ out:
+ up_write(&_dev_lock);
+
+ return r;
+}
+
+static struct mapped_device *get_kdev(kdev_t dev)
+{
+ struct mapped_device *md;
+ struct major_details *maj;
+
+ down_read(&_dev_lock);
+ maj = _majors[major(dev)];
+ if (!maj) {
+ md = NULL;
+ goto out;
+ }
+ md = maj->mds[minor(dev)];
+ if (md)
+ dm_get(md);
+ out:
+ up_read(&_dev_lock);
+
+ return md;
+}
+
+/*-----------------------------------------------------------------
+ * init/exit code
+ *---------------------------------------------------------------*/
+
+static __init int local_init(void)
+{
+ init_rwsem(&_dev_lock);
+
+ /* allocate a slab for the dm_ios */
+ _io_cache = kmem_cache_create("dm io",
+ sizeof(struct dm_io), 0, 0, NULL, NULL);
+
+ if (!_io_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void local_exit(void)
+{
+ kmem_cache_destroy(_io_cache);
+ free_all_majors();
+
+ DMINFO("cleaned up");
+}
+
+/*
+ * We have a lot of init/exit functions, so it seems easier to
+ * store them in an array. The disposable macro 'xx'
+ * expands a prefix into a pair of function names.
+ */
+static struct {
+ int (*init) (void);
+ void (*exit) (void);
+
+} _inits[] = {
+#define xx(n) {n ## _init, n ## _exit},
+ xx(local)
+ xx(dm_target)
+ xx(dm_linear)
+ xx(dm_stripe)
+ xx(dm_interface)
+#undef xx
+};
+
+static int __init dm_init(void)
+{
+ const int count = ARRAY_SIZE(_inits);
+
+ int r, i;
+
+ for (i = 0; i < count; i++) {
+ r = _inits[i].init();
+ if (r)
+ goto bad;
+ }
+
+ return 0;
+
+ bad:
+ while (i--)
+ _inits[i].exit();
+
+ return r;
+}
+
+static void __exit dm_exit(void)
+{
+ int i = ARRAY_SIZE(_inits);
+
+ while (i--)
+ _inits[i].exit();
+}
+
+/*
+ * Block device functions
+ */
+static int dm_blk_open(struct inode *inode, struct file *file)
+{
+ struct mapped_device *md;
+
+ md = get_kdev(inode->i_rdev);
+ if (!md)
+ return -ENXIO;
+
+ return 0;
+}
+
+static int dm_blk_close(struct inode *inode, struct file *file)
+{
+ struct mapped_device *md;
+
+ md = get_kdev(inode->i_rdev);
+ dm_put(md); /* put the reference gained by dm_blk_open */
+ dm_put(md);
+ return 0;
+}
+
+static inline struct dm_io *alloc_io(struct mapped_device *md)
+{
+ return mempool_alloc(md->io_pool, GFP_NOIO);
+}
+
+static inline void free_io(struct mapped_device *md, struct dm_io *io)
+{
+ mempool_free(io, md->io_pool);
+}
+
+static inline struct deferred_io *alloc_deferred(void)
+{
+ return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
+}
+
+static inline void free_deferred(struct deferred_io *di)
+{
+ kfree(di);
+}
+
+static inline sector_t volume_size(kdev_t dev)
+{
+ return blk_size[major(dev)][minor(dev)] << 1;
+}
+
+/* FIXME: check this */
+static int dm_blk_ioctl(struct inode *inode, struct file *file,
+ unsigned int command, unsigned long a)
+{
+ kdev_t dev = inode->i_rdev;
+ long size;
+
+ switch (command) {
+ case BLKROSET:
+ case BLKROGET:
+ case BLKRASET:
+ case BLKRAGET:
+ case BLKFLSBUF:
+ case BLKSSZGET:
+ //case BLKRRPART: /* Re-read partition tables */
+ //case BLKPG:
+ case BLKELVGET:
+ case BLKELVSET:
+ case BLKBSZGET:
+ case BLKBSZSET:
+ return blk_ioctl(dev, command, a);
+ break;
+
+ case BLKGETSIZE:
+ size = volume_size(dev);
+ if (copy_to_user((void *) a, &size, sizeof(long)))
+ return -EFAULT;
+ break;
+
+ case BLKGETSIZE64:
+ size = volume_size(dev);
+ if (put_user((u64) ((u64) size) << 9, (u64 *) a))
+ return -EFAULT;
+ break;
+
+ case BLKRRPART:
+ return -ENOTTY;
+
+ case LV_BMAP:
+ return dm_user_bmap(inode, (struct lv_bmap *) a);
+
+ default:
+ DMWARN("unknown block ioctl 0x%x", command);
+ return -ENOTTY;
+ }
+
+ return 0;
+}
+
+/*
+ * Add the buffer to the list of deferred io.
+ */
+static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
+{
+ struct deferred_io *di;
+
+ di = alloc_deferred();
+ if (!di)
+ return -ENOMEM;
+
+ down_write(&md->lock);
+
+ if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
+ up_write(&md->lock);
+ free_deferred(di);
+ return 1;
+ }
+
+ di->bh = bh;
+ di->rw = rw;
+ di->next = md->deferred;
+ md->deferred = di;
+
+ up_write(&md->lock);
+ return 0; /* deferred successfully */
+}
+
+/*
+ * bh->b_end_io routine that decrements the pending count
+ * and then calls the original bh->b_end_io fn.
+ */
+static void dec_pending(struct buffer_head *bh, int uptodate)
+{
+ int r;
+ struct dm_io *io = bh->b_private;
+ dm_endio_fn endio = io->ti->type->end_io;
+
+ if (endio) {
+ r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO,
+ &io->map_context);
+ if (r < 0)
+ uptodate = 0;
+
+ else if (r > 0)
+ /* the target wants another shot at the io */
+ return;
+ }
+
+ if (atomic_dec_and_test(&io->md->pending))
+ /* nudge anyone waiting on suspend queue */
+ wake_up(&io->md->wait);
+
+ bh->b_end_io = io->end_io;
+ bh->b_private = io->context;
+ free_io(io->md, io);
+
+ bh->b_end_io(bh, uptodate);
+}
+
+/*
+ * Do the bh mapping for a given leaf
+ */
+static inline int __map_buffer(struct mapped_device *md, int rw,
+ struct buffer_head *bh, struct dm_io *io)
+{
+ struct dm_target *ti;
+
+ if (!md->map)
+ return -EINVAL;
+
+ ti = dm_table_find_target(md->map, bh->b_rsector);
+ if (!ti->type)
+ return -EINVAL;
+
+ /* hook the end io request fn */
+ atomic_inc(&md->pending);
+ io->md = md;
+ io->ti = ti;
+ io->rw = rw;
+ io->end_io = bh->b_end_io;
+ io->context = bh->b_private;
+ bh->b_end_io = dec_pending;
+ bh->b_private = io;
+
+ return ti->type->map(ti, bh, rw, &io->map_context);
+}
+
+/*
+ * Checks to see if we should be deferring io, if so it queues it
+ * and returns 1.
+ */
+static inline int __deferring(struct mapped_device *md, int rw,
+ struct buffer_head *bh)
+{
+ int r;
+
+ /*
+ * If we're suspended we have to queue this io for later.
+ */
+ while (test_bit(DMF_BLOCK_IO, &md->flags)) {
+ up_read(&md->lock);
+
+ /*
+ * There's no point deferring a read ahead
+ * request, just drop it.
+ */
+ if (rw == READA) {
+ down_read(&md->lock);
+ return -EIO;
+ }
+
+ r = queue_io(md, bh, rw);
+ down_read(&md->lock);
+
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ return 1; /* deferred successfully */
+
+ }
+
+ return 0;
+}
+
+static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh)
+{
+ int r;
+ struct dm_io *io;
+ struct mapped_device *md;
+
+ md = get_kdev(bh->b_rdev);
+ if (!md) {
+ buffer_IO_error(bh);
+ return 0;
+ }
+
+ io = alloc_io(md);
+ down_read(&md->lock);
+
+ r = __deferring(md, rw, bh);
+ if (r < 0)
+ goto bad;
+
+ else if (!r) {
+ /* not deferring */
+ r = __map_buffer(md, rw, bh, io);
+ if (r < 0)
+ goto bad;
+ } else
+ r = 0;
+
+ up_read(&md->lock);
+ dm_put(md);
+ return r;
+
+ bad:
+ buffer_IO_error(bh);
+ up_read(&md->lock);
+ dm_put(md);
+ return 0;
+}
+
+static int check_dev_size(kdev_t dev, unsigned long block)
+{
+ unsigned int major = major(dev);
+ unsigned int minor = minor(dev);
+
+ /* FIXME: check this */
+ unsigned long max_sector = (blk_size[major][minor] << 1) + 1;
+ unsigned long sector = (block + 1) * (blksize_size[major][minor] >> 9);
+
+ return (sector > max_sector) ? 0 : 1;
+}
+
+/*
+ * Creates a dummy buffer head and maps it (for lilo).
+ */
+static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block,
+ kdev_t *r_dev, unsigned long *r_block)
+{
+ struct buffer_head bh;
+ struct dm_target *ti;
+ union map_info map_context;
+ int r;
+
+ if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+ return -EPERM;
+ }
+
+ if (!check_dev_size(dev, block)) {
+ return -EINVAL;
+ }
+
+ if (!md->map)
+ return -EINVAL;
+
+ /* setup dummy bh */
+ memset(&bh, 0, sizeof(bh));
+ bh.b_blocknr = block;
+ bh.b_dev = bh.b_rdev = dev;
+ bh.b_size = blksize_size[major(dev)][minor(dev)];
+ bh.b_rsector = block * (bh.b_size >> 9);
+
+ /* find target */
+ ti = dm_table_find_target(md->map, bh.b_rsector);
+
+ /* do the mapping */
+ r = ti->type->map(ti, &bh, READ, &map_context);
+ ti->type->end_io(ti, &bh, READ, 0, &map_context);
+
+ if (!r) {
+ *r_dev = bh.b_rdev;
+ *r_block = bh.b_rsector / (bh.b_size >> 9);
+ }
+
+ return r;
+}
+
+/*
+ * Marshals arguments and results between user and kernel space.
+ */
+static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
+{
+ struct mapped_device *md;
+ unsigned long block, r_block;
+ kdev_t r_dev;
+ int r;
+
+ if (get_user(block, &lvb->lv_block))
+ return -EFAULT;
+
+ md = get_kdev(inode->i_rdev);
+ if (!md)
+ return -ENXIO;
+
+ down_read(&md->lock);
+ r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block);
+ up_read(&md->lock);
+ dm_put(md);
+
+ if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
+ put_user(r_block, &lvb->lv_block)))
+ r = -EFAULT;
+
+ return r;
+}
+
+static void free_md(struct mapped_device *md)
+{
+ free_dev(md->dev);
+ mempool_destroy(md->io_pool);
+ kfree(md);
+}
+
+/*
+ * Allocate and initialise a blank device with a given minor.
+ */
+static struct mapped_device *alloc_md(kdev_t dev)
+{
+ int r;
+ struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+
+ if (!md) {
+ DMWARN("unable to allocate device, out of memory.");
+ return NULL;
+ }
+
+ memset(md, 0, sizeof(*md));
+
+ /* Allocate suitable device number */
+ if (!dev)
+ r = first_free_dev(md);
+ else
+ r = specific_dev(dev, md);
+
+ if (r) {
+ kfree(md);
+ return NULL;
+ }
+
+ md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
+ mempool_free_slab, _io_cache);
+ if (!md->io_pool) {
+ free_md(md);
+ kfree(md);
+ return NULL;
+ }
+
+ init_rwsem(&md->lock);
+ atomic_set(&md->holders, 1);
+ atomic_set(&md->pending, 0);
+ init_waitqueue_head(&md->wait);
+ init_waitqueue_head(&md->eventq);
+
+ return md;
+}
+
+/*
+ * The hardsect size for a mapped device is the largest hardsect size
+ * from the devices it maps onto.
+ */
+static int __find_hardsect_size(struct list_head *devices)
+{
+ int result = 512, size;
+ struct list_head *tmp;
+
+ list_for_each (tmp, devices) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ size = get_hardsect_size(dd->dev);
+ if (size > result)
+ result = size;
+ }
+
+ return result;
+}
+
+/*
+ * Bind a table to the device.
+ */
+static void event_callback(void *context)
+{
+ struct mapped_device *md = (struct mapped_device *) context;
+
+ down_write(&md->lock);
+ md->event_nr++;
+ wake_up_interruptible(&md->eventq);
+ up_write(&md->lock);
+}
+
+static int __bind(struct mapped_device *md, struct dm_table *t)
+{
+ unsigned int minor = minor(md->dev);
+ unsigned int major = major(md->dev);
+ md->map = t;
+
+ /* in k */
+ blk_size[major][minor] = dm_table_get_size(t) >> 1;
+ blksize_size[major][minor] = BLOCK_SIZE;
+ hardsect_size[major][minor] =
+ __find_hardsect_size(dm_table_get_devices(t));
+ register_disk(NULL, md->dev, 1, &dm_blk_dops, blk_size[major][minor]);
+
+ dm_table_event_callback(md->map, event_callback, md);
+ dm_table_get(t);
+ return 0;
+}
+
+static void __unbind(struct mapped_device *md)
+{
+ unsigned int minor = minor(md->dev);
+ unsigned int major = major(md->dev);
+
+ if (md->map) {
+ dm_table_event_callback(md->map, NULL, NULL);
+ dm_table_put(md->map);
+ md->map = NULL;
+
+ }
+
+ blk_size[major][minor] = 0;
+ blksize_size[major][minor] = 0;
+ hardsect_size[major][minor] = 0;
+}
+
+/*
+ * Constructor for a new device.
+ */
+int dm_create(kdev_t dev, struct mapped_device **result)
+{
+ struct mapped_device *md;
+
+ md = alloc_md(dev);
+ if (!md)
+ return -ENXIO;
+
+ __unbind(md); /* Ensure zero device size */
+
+ *result = md;
+ return 0;
+}
+
+void dm_get(struct mapped_device *md)
+{
+ atomic_inc(&md->holders);
+}
+
+void dm_put(struct mapped_device *md)
+{
+ if (atomic_dec_and_test(&md->holders)) {
+ if (md->map)
+ dm_table_suspend_targets(md->map);
+ __unbind(md);
+ free_md(md);
+ }
+}
+
+/*
+ * Requeue the deferred io by calling generic_make_request.
+ */
+static void flush_deferred_io(struct deferred_io *c)
+{
+ struct deferred_io *n;
+
+ while (c) {
+ n = c->next;
+ generic_make_request(c->rw, c->bh);
+ free_deferred(c);
+ c = n;
+ }
+}
+
+/*
+ * Swap in a new table (destroying old one).
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *table)
+{
+ int r;
+
+ down_write(&md->lock);
+
+ /*
+ * The device must be suspended, or have no table bound yet.
+ */
+ if (md->map && !test_bit(DMF_SUSPENDED, &md->flags)) {
+ up_write(&md->lock);
+ return -EPERM;
+ }
+
+ __unbind(md);
+ r = __bind(md, table);
+ if (r)
+ return r;
+
+ up_write(&md->lock);
+ return 0;
+}
+
+/*
+ * We need to be able to change a mapping table under a mounted
+ * filesystem. For example we might want to move some data in
+ * the background. Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to flush any in
+ * flight io and ensure that any further io gets deferred.
+ */
+int dm_suspend(struct mapped_device *md)
+{
+ int r = 0;
+ DECLARE_WAITQUEUE(wait, current);
+
+ down_write(&md->lock);
+
+ /*
+ * First we set the BLOCK_IO flag so no more ios will be
+ * mapped.
+ */
+ if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+ up_write(&md->lock);
+ return -EINVAL;
+ }
+
+ set_bit(DMF_BLOCK_IO, &md->flags);
+ add_wait_queue(&md->wait, &wait);
+ up_write(&md->lock);
+
+ /*
+ * Then we wait for the already mapped ios to
+ * complete.
+ */
+ run_task_queue(&tq_disk);
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (!atomic_read(&md->pending) || signal_pending(current))
+ break;
+
+ schedule();
+ }
+ set_current_state(TASK_RUNNING);
+
+ down_write(&md->lock);
+ remove_wait_queue(&md->wait, &wait);
+
+ /* did we flush everything ? */
+ if (atomic_read(&md->pending)) {
+ clear_bit(DMF_BLOCK_IO, &md->flags);
+ r = -EINTR;
+ } else {
+ set_bit(DMF_SUSPENDED, &md->flags);
+ if (md->map)
+ dm_table_suspend_targets(md->map);
+ }
+ up_write(&md->lock);
+
+ return r;
+}
+
+int dm_resume(struct mapped_device *md)
+{
+ struct deferred_io *def;
+
+ down_write(&md->lock);
+ if (!test_bit(DMF_SUSPENDED, &md->flags)) {
+ up_write(&md->lock);
+ return -EINVAL;
+ }
+
+ if (md->map)
+ dm_table_resume_targets(md->map);
+
+ clear_bit(DMF_SUSPENDED, &md->flags);
+ clear_bit(DMF_BLOCK_IO, &md->flags);
+ def = md->deferred;
+ md->deferred = NULL;
+ up_write(&md->lock);
+
+ flush_deferred_io(def);
+ run_task_queue(&tq_disk);
+
+ return 0;
+}
+
+struct dm_table *dm_get_table(struct mapped_device *md)
+{
+ struct dm_table *t;
+
+ down_read(&md->lock);
+ t = md->map;
+ if (t)
+ dm_table_get(t);
+ up_read(&md->lock);
+
+ return t;
+}
+
+/*-----------------------------------------------------------------
+ * Event notification.
+ *---------------------------------------------------------------*/
+uint32_t dm_get_event_nr(struct mapped_device *md)
+{
+ uint32_t r;
+
+ down_read(&md->lock);
+ r = md->event_nr;
+ up_read(&md->lock);
+
+ return r;
+}
+
+int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
+ uint32_t event_nr)
+{
+ down_write(&md->lock);
+ if (event_nr != md->event_nr) {
+ up_write(&md->lock);
+ return 1;
+ }
+
+ add_wait_queue(&md->eventq, wq);
+ up_write(&md->lock);
+
+ return 0;
+}
+
+const char *dm_kdevname(kdev_t dev)
+{
+ static char buffer[32];
+ sprintf(buffer, "%03d:%03d", MAJOR(dev), MINOR(dev));
+ return buffer;
+}
+
+void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq)
+{
+ down_write(&md->lock);
+ remove_wait_queue(&md->eventq, wq);
+ up_write(&md->lock);
+}
+
+kdev_t dm_kdev(struct mapped_device *md)
+{
+ kdev_t dev;
+
+ down_read(&md->lock);
+ dev = md->dev;
+ up_read(&md->lock);
+
+ return dev;
+}
+
+int dm_suspended(struct mapped_device *md)
+{
+ return test_bit(DMF_SUSPENDED, &md->flags);
+}
+
+struct block_device_operations dm_blk_dops = {
+ .open = dm_blk_open,
+ .release = dm_blk_close,
+ .ioctl = dm_blk_ioctl,
+ .owner = THIS_MODULE
+};
+
+/*
+ * module hooks
+ */
+module_init(dm_init);
+module_exit(dm_exit);
+
+MODULE_DESCRIPTION(DM_NAME " driver");
+MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(dm_kdevname);
--- diff/drivers/md/dm.h 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm.h 2003-12-09 10:56:38.000000000 +0000
@@ -0,0 +1,172 @@
+/*
+ * Internal header file for device mapper
+ *
+ * Copyright (C) 2001, 2002 Sistina Software
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef DM_INTERNAL_H
+#define DM_INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+#include <linux/blkdev.h>
+
+#define DM_NAME "device-mapper"
+#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
+#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
+#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
+
+/*
+ * FIXME: I think this should be with the definition of sector_t
+ * in types.h.
+ */
+#ifdef CONFIG_LBD
+#define SECTOR_FORMAT "%Lu"
+#else
+#define SECTOR_FORMAT "%lu"
+#endif
+
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+
+extern struct block_device_operations dm_blk_dops;
+
+/*
+ * List of devices that a metadevice uses and should open/close.
+ */
+struct dm_dev {
+ struct list_head list;
+
+ atomic_t count;
+ int mode;
+ kdev_t dev;
+ struct block_device *bdev;
+};
+
+struct dm_table;
+struct mapped_device;
+
+/*-----------------------------------------------------------------
+ * Functions for manipulating a struct mapped_device.
+ * Drop the reference with dm_put when you finish with the object.
+ *---------------------------------------------------------------*/
+int dm_create(kdev_t dev, struct mapped_device **md);
+
+/*
+ * Reference counting for md.
+ */
+void dm_get(struct mapped_device *md);
+void dm_put(struct mapped_device *md);
+
+/*
+ * A device can still be used while suspended, but I/O is deferred.
+ */
+int dm_suspend(struct mapped_device *md);
+int dm_resume(struct mapped_device *md);
+
+/*
+ * The device must be suspended before calling this method.
+ */
+int dm_swap_table(struct mapped_device *md, struct dm_table *t);
+
+/*
+ * Drop a reference on the table when you've finished with the
+ * result.
+ */
+struct dm_table *dm_get_table(struct mapped_device *md);
+
+/*
+ * Event functions.
+ */
+uint32_t dm_get_event_nr(struct mapped_device *md);
+int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
+ uint32_t event_nr);
+void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq);
+
+/*
+ * Info functions.
+ */
+kdev_t dm_kdev(struct mapped_device *md);
+int dm_suspended(struct mapped_device *md);
+
+/*-----------------------------------------------------------------
+ * Functions for manipulating a table. Tables are also reference
+ * counted.
+ *---------------------------------------------------------------*/
+int dm_table_create(struct dm_table **result, int mode, unsigned num_targets);
+
+void dm_table_get(struct dm_table *t);
+void dm_table_put(struct dm_table *t);
+
+int dm_table_add_target(struct dm_table *t, const char *type,
+ sector_t start, sector_t len, char *params);
+int dm_table_complete(struct dm_table *t);
+void dm_table_event_callback(struct dm_table *t,
+ void (*fn)(void *), void *context);
+void dm_table_event(struct dm_table *t);
+sector_t dm_table_get_size(struct dm_table *t);
+struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
+struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+unsigned int dm_table_get_num_targets(struct dm_table *t);
+struct list_head *dm_table_get_devices(struct dm_table *t);
+int dm_table_get_mode(struct dm_table *t);
+void dm_table_suspend_targets(struct dm_table *t);
+void dm_table_resume_targets(struct dm_table *t);
+
+/*-----------------------------------------------------------------
+ * A registry of target types.
+ *---------------------------------------------------------------*/
+int dm_target_init(void);
+void dm_target_exit(void);
+struct target_type *dm_get_target_type(const char *name);
+void dm_put_target_type(struct target_type *t);
+
+
+/*-----------------------------------------------------------------
+ * Useful inlines.
+ *---------------------------------------------------------------*/
+static inline int array_too_big(unsigned long fixed, unsigned long obj,
+ unsigned long num)
+{
+ return (num > (ULONG_MAX - fixed) / obj);
+}
+
+/*
+ * ceiling(n / size) * size
+ */
+static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
+{
+ unsigned long r = n % size;
+ return n + (r ? (size - r) : 0);
+}
+
+/*
+ * Ceiling(n / size)
+ */
+static inline unsigned long dm_div_up(unsigned long n, unsigned long size)
+{
+ return dm_round_up(n, size) / size;
+}
+
+const char *dm_kdevname(kdev_t dev);
+
+/*
+ * The device-mapper can be driven through one of two interfaces;
+ * ioctl or filesystem, depending which patch you have applied.
+ */
+int dm_interface_init(void);
+void dm_interface_exit(void);
+
+/*
+ * Targets for linear and striped mappings
+ */
+int dm_linear_init(void);
+void dm_linear_exit(void);
+
+int dm_stripe_init(void);
+void dm_stripe_exit(void);
+
+#endif
--- diff/include/linux/device-mapper.h 1970-01-01 01:00:00.000000000 +0100
+++ source/include/linux/device-mapper.h 2003-12-09 10:39:56.000000000 +0000
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2001 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DEVICE_MAPPER_H
+#define _LINUX_DEVICE_MAPPER_H
+
+typedef unsigned long sector_t;
+
+struct dm_target;
+struct dm_table;
+struct dm_dev;
+
+typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
+
+union map_info {
+ void *ptr;
+ unsigned long long ll;
+};
+
+/*
+ * In the constructor the target parameter will already have the
+ * table, type, begin and len fields filled in.
+ */
+typedef int (*dm_ctr_fn) (struct dm_target * target, unsigned int argc,
+ char **argv);
+
+/*
+ * The destructor doesn't need to free the dm_target, just
+ * anything hidden ti->private.
+ */
+typedef void (*dm_dtr_fn) (struct dm_target * ti);
+
+/*
+ * The map function must return:
+ * < 0: error
+ * = 0: The target will handle the io by resubmitting it later
+ * > 0: simple remap complete
+ */
+typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh,
+ int rw, union map_info *map_context);
+
+/*
+ * Returns:
+ * < 0 : error (currently ignored)
+ * 0 : ended successfully
+ * 1 : for some reason the io has still not completed (eg,
+ * multipath target might want to requeue a failed io).
+ */
+typedef int (*dm_endio_fn) (struct dm_target * ti,
+ struct buffer_head * bh, int rw, int error,
+ union map_info *map_context);
+typedef void (*dm_suspend_fn) (struct dm_target *ti);
+typedef void (*dm_resume_fn) (struct dm_target *ti);
+typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type,
+ char *result, unsigned int maxlen);
+
+void dm_error(const char *message);
+
+/*
+ * Constructors should call these functions to ensure destination devices
+ * are opened/closed correctly.
+ * FIXME: too many arguments.
+ */
+int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
+ sector_t len, int mode, struct dm_dev **result);
+void dm_put_device(struct dm_target *ti, struct dm_dev *d);
+
+/*
+ * Information about a target type
+ */
+struct target_type {
+ const char *name;
+ struct module *module;
+ dm_ctr_fn ctr;
+ dm_dtr_fn dtr;
+ dm_map_fn map;
+ dm_endio_fn end_io;
+ dm_suspend_fn suspend;
+ dm_resume_fn resume;
+ dm_status_fn status;
+};
+
+struct dm_target {
+ struct dm_table *table;
+ struct target_type *type;
+
+ /* target limits */
+ sector_t begin;
+ sector_t len;
+
+ /* target specific data */
+ void *private;
+
+ /* Used to provide an error string from the ctr */
+ char *error;
+};
+
+int dm_register_target(struct target_type *t);
+int dm_unregister_target(struct target_type *t);
+
+#endif /* _LINUX_DEVICE_MAPPER_H */
^ permalink raw reply [flat|nested] 55+ messages in thread
* [Patch 4/4] dm: ioctl interface
2003-12-09 11:58 Device-mapper submission for 2.4 Joe Thornber
` (2 preceding siblings ...)
2003-12-09 12:26 ` [Patch 3/4] dm: core files Joe Thornber
@ 2003-12-09 12:26 ` Joe Thornber
2003-12-09 13:15 ` Device-mapper submission for 2.4 Marcelo Tosatti
2003-12-09 19:50 ` William Lee Irwin III
5 siblings, 0 replies; 55+ messages in thread
From: Joe Thornber @ 2003-12-09 12:26 UTC (permalink / raw)
To: Joe Thornber; +Cc: Marcelo Tosatti, Linux Mailing List
ioctl interface for dm
--- diff/arch/mips64/kernel/ioctl32.c 2003-08-26 13:50:03.000000000 +0100
+++ source/arch/mips64/kernel/ioctl32.c 2003-12-09 11:04:13.000000000 +0000
@@ -37,6 +37,7 @@
#include <linux/ext2_fs.h>
#include <linux/raid/md_u.h>
#include <linux/serial.h>
+#include <linux/dm-ioctl.h>
#include <scsi/scsi.h>
#undef __KERNEL__ /* This file was born to be ugly ... */
@@ -1222,6 +1223,22 @@
IOCTL32_DEFAULT(RESTART_ARRAY_RW),
#endif /* CONFIG_MD */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+ IOCTL32_DEFAULT(DM_VERSION),
+ IOCTL32_DEFAULT(DM_REMOVE_ALL),
+ IOCTL32_DEFAULT(DM_DEV_CREATE),
+ IOCTL32_DEFAULT(DM_DEV_REMOVE),
+ IOCTL32_DEFAULT(DM_TABLE_LOAD),
+ IOCTL32_DEFAULT(DM_DEV_SUSPEND),
+ IOCTL32_DEFAULT(DM_DEV_RENAME),
+ IOCTL32_DEFAULT(DM_TABLE_DEPS),
+ IOCTL32_DEFAULT(DM_DEV_STATUS),
+ IOCTL32_DEFAULT(DM_TABLE_STATUS),
+ IOCTL32_DEFAULT(DM_DEV_WAIT),
+ IOCTL32_DEFAULT(DM_LIST_DEVICES),
+ IOCTL32_DEFAULT(DM_TABLE_CLEAR),
+#endif /* CONFIG_BLK_DEV_DM */
+
#ifdef CONFIG_SIBYTE_TBPROF
IOCTL32_DEFAULT(SBPROF_ZBSTART),
IOCTL32_DEFAULT(SBPROF_ZBSTOP),
--- diff/arch/parisc/kernel/ioctl32.c 2003-08-26 13:50:03.000000000 +0100
+++ source/arch/parisc/kernel/ioctl32.c 2003-12-09 11:04:13.000000000 +0000
@@ -55,6 +55,7 @@
#define max max */
#include <linux/lvm.h>
#endif /* LVM */
+#include <linux/dm-ioctl.h>
#include <scsi/scsi.h>
/* Ugly hack. */
@@ -3423,6 +3424,22 @@
COMPATIBLE_IOCTL(LV_BMAP)
COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
#endif /* LVM */
+/* Device-Mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION)
+COMPATIBLE_IOCTL(DM_REMOVE_ALL)
+COMPATIBLE_IOCTL(DM_DEV_CREATE)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
+COMPATIBLE_IOCTL(DM_DEV_RENAME)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS)
+COMPATIBLE_IOCTL(DM_DEV_STATUS)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS)
+COMPATIBLE_IOCTL(DM_DEV_WAIT)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
+#endif /* CONFIG_BLK_DEV_DM */
#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
--- diff/arch/ppc64/kernel/ioctl32.c 2003-08-26 13:50:04.000000000 +0100
+++ source/arch/ppc64/kernel/ioctl32.c 2003-12-09 11:04:13.000000000 +0000
@@ -66,6 +66,7 @@
#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
#include <linux/lvm.h>
#endif /* LVM */
+#include <linux/dm-ioctl.h>
#include <scsi/scsi.h>
/* Ugly hack. */
@@ -4435,6 +4436,22 @@
COMPATIBLE_IOCTL(NBD_PRINT_DEBUG),
COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS),
COMPATIBLE_IOCTL(NBD_DISCONNECT),
+/* device-mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION),
+COMPATIBLE_IOCTL(DM_REMOVE_ALL),
+COMPATIBLE_IOCTL(DM_DEV_CREATE),
+COMPATIBLE_IOCTL(DM_DEV_REMOVE),
+COMPATIBLE_IOCTL(DM_TABLE_LOAD),
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND),
+COMPATIBLE_IOCTL(DM_DEV_RENAME),
+COMPATIBLE_IOCTL(DM_TABLE_DEPS),
+COMPATIBLE_IOCTL(DM_DEV_STATUS),
+COMPATIBLE_IOCTL(DM_TABLE_STATUS),
+COMPATIBLE_IOCTL(DM_DEV_WAIT),
+COMPATIBLE_IOCTL(DM_LIST_DEVICES),
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR),
+#endif /* CONFIG_BLK_DEV_DM */
/* Remove *PRIVATE in 2.5 */
COMPATIBLE_IOCTL(SIOCDEVPRIVATE),
COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1),
--- diff/arch/s390x/kernel/ioctl32.c 2003-08-26 13:50:04.000000000 +0100
+++ source/arch/s390x/kernel/ioctl32.c 2003-12-09 11:04:13.000000000 +0000
@@ -30,6 +30,7 @@
#include <linux/blk.h>
#include <linux/elevator.h>
#include <linux/raw.h>
+#include <linux/dm-ioctl.h>
#include <asm/types.h>
#include <asm/uaccess.h>
#include <asm/dasd.h>
@@ -627,6 +628,20 @@
IOCTL32_DEFAULT(SIOCGSTAMP),
+ IOCTL32_DEFAULT(DM_VERSION),
+ IOCTL32_DEFAULT(DM_REMOVE_ALL),
+ IOCTL32_DEFAULT(DM_DEV_CREATE),
+ IOCTL32_DEFAULT(DM_DEV_REMOVE),
+ IOCTL32_DEFAULT(DM_TABLE_LOAD),
+ IOCTL32_DEFAULT(DM_DEV_SUSPEND),
+ IOCTL32_DEFAULT(DM_DEV_RENAME),
+ IOCTL32_DEFAULT(DM_TABLE_DEPS),
+ IOCTL32_DEFAULT(DM_DEV_STATUS),
+ IOCTL32_DEFAULT(DM_TABLE_STATUS),
+ IOCTL32_DEFAULT(DM_DEV_WAIT),
+ IOCTL32_DEFAULT(DM_LIST_DEVICES),
+ IOCTL32_DEFAULT(DM_TABLE_CLEAR),
+
IOCTL32_DEFAULT(LOOP_SET_FD),
IOCTL32_DEFAULT(LOOP_CLR_FD),
--- diff/arch/sparc64/kernel/ioctl32.c 2003-12-09 10:25:25.000000000 +0000
+++ source/arch/sparc64/kernel/ioctl32.c 2003-12-09 11:04:13.000000000 +0000
@@ -56,6 +56,7 @@
#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
#include <linux/lvm.h>
#endif /* LVM */
+#include <linux/dm-ioctl.h>
#include <scsi/scsi.h>
/* Ugly hack. */
@@ -5086,6 +5087,22 @@
COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS)
COMPATIBLE_IOCTL(NBD_DISCONNECT)
+/* device-mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION)
+COMPATIBLE_IOCTL(DM_REMOVE_ALL)
+COMPATIBLE_IOCTL(DM_DEV_CREATE)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
+COMPATIBLE_IOCTL(DM_DEV_RENAME)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS)
+COMPATIBLE_IOCTL(DM_DEV_STATUS)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS)
+COMPATIBLE_IOCTL(DM_DEV_WAIT)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
+#endif /* CONFIG_BLK_DEV_DM */
/* Linux-1394 */
#if defined(CONFIG_IEEE1394) || defined(CONFIG_IEEE1394_MODULE)
COMPATIBLE_IOCTL(AMDTP_IOC_CHANNEL)
--- diff/arch/x86_64/ia32/ia32_ioctl.c 2003-12-09 10:25:25.000000000 +0000
+++ source/arch/x86_64/ia32/ia32_ioctl.c 2003-12-09 11:04:14.000000000 +0000
@@ -67,6 +67,7 @@
#define max max
#include <linux/lvm.h>
#endif /* LVM */
+#include <linux/dm-ioctl.h>
#include <scsi/scsi.h>
/* Ugly hack. */
@@ -4051,6 +4052,22 @@
COMPATIBLE_IOCTL(LV_BMAP)
COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
#endif /* LVM */
+/* Device-Mapper */
+#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
+COMPATIBLE_IOCTL(DM_VERSION)
+COMPATIBLE_IOCTL(DM_REMOVE_ALL)
+COMPATIBLE_IOCTL(DM_DEV_CREATE)
+COMPATIBLE_IOCTL(DM_DEV_REMOVE)
+COMPATIBLE_IOCTL(DM_TABLE_LOAD)
+COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
+COMPATIBLE_IOCTL(DM_DEV_RENAME)
+COMPATIBLE_IOCTL(DM_TABLE_DEPS)
+COMPATIBLE_IOCTL(DM_DEV_STATUS)
+COMPATIBLE_IOCTL(DM_TABLE_STATUS)
+COMPATIBLE_IOCTL(DM_DEV_WAIT)
+COMPATIBLE_IOCTL(DM_LIST_DEVICES)
+COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
+#endif /* CONFIG_BLK_DEV_DM */
#ifdef CONFIG_AUTOFS_FS
COMPATIBLE_IOCTL(AUTOFS_IOC_READY)
COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
--- diff/drivers/md/Makefile 2003-12-09 11:03:47.000000000 +0000
+++ source/drivers/md/Makefile 2003-12-09 11:02:02.000000000 +0000
@@ -8,7 +8,8 @@
list-multi := lvm-mod.o dm-mod.o dm-mirror-mod.o
lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o
-dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o
+dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
+ dm-ioctl.o
# Note: link order is important. All raid personalities
# and xor.o must come before md.o, as they each initialise
--- diff/drivers/md/dm-ioctl.c 1970-01-01 01:00:00.000000000 +0100
+++ source/drivers/md/dm-ioctl.c 2003-12-09 11:01:41.000000000 +0000
@@ -0,0 +1,1284 @@
+/*
+ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/dm-ioctl.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/blk.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
+
+/*-----------------------------------------------------------------
+ * The ioctl interface needs to be able to look up devices by
+ * name or uuid.
+ *---------------------------------------------------------------*/
+struct hash_cell {
+ struct list_head name_list;
+ struct list_head uuid_list;
+
+ char *name;
+ char *uuid;
+ struct mapped_device *md;
+ struct dm_table *new_map;
+
+ /* I hate devfs */
+ devfs_handle_t devfs_entry;
+};
+
+#define NUM_BUCKETS 64
+#define MASK_BUCKETS (NUM_BUCKETS - 1)
+static struct list_head _name_buckets[NUM_BUCKETS];
+static struct list_head _uuid_buckets[NUM_BUCKETS];
+
+static devfs_handle_t _dev_dir;
+void dm_hash_remove_all(void);
+
+/*
+ * Guards access to both hash tables.
+ */
+static DECLARE_RWSEM(_hash_lock);
+
+static void init_buckets(struct list_head *buckets)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_BUCKETS; i++)
+ INIT_LIST_HEAD(buckets + i);
+}
+
+int dm_hash_init(void)
+{
+ init_buckets(_name_buckets);
+ init_buckets(_uuid_buckets);
+ _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
+ return 0;
+}
+
+void dm_hash_exit(void)
+{
+ dm_hash_remove_all();
+ devfs_unregister(_dev_dir);
+}
+
+/*-----------------------------------------------------------------
+ * Hash function:
+ * We're not really concerned with the str hash function being
+ * fast since it's only used by the ioctl interface.
+ *---------------------------------------------------------------*/
+static unsigned int hash_str(const char *str)
+{
+ const unsigned int hash_mult = 2654435387U;
+ unsigned int h = 0;
+
+ while (*str)
+ h = (h + (unsigned int) *str++) * hash_mult;
+
+ return h & MASK_BUCKETS;
+}
+
+/*-----------------------------------------------------------------
+ * Code for looking up a device by name
+ *---------------------------------------------------------------*/
+static struct hash_cell *__get_name_cell(const char *str)
+{
+ struct list_head *tmp;
+ struct hash_cell *hc;
+ unsigned int h = hash_str(str);
+
+ list_for_each (tmp, _name_buckets + h) {
+ hc = list_entry(tmp, struct hash_cell, name_list);
+ if (!strcmp(hc->name, str))
+ return hc;
+ }
+
+ return NULL;
+}
+
+static struct hash_cell *__get_uuid_cell(const char *str)
+{
+ struct list_head *tmp;
+ struct hash_cell *hc;
+ unsigned int h = hash_str(str);
+
+ list_for_each (tmp, _uuid_buckets + h) {
+ hc = list_entry(tmp, struct hash_cell, uuid_list);
+ if (!strcmp(hc->uuid, str))
+ return hc;
+ }
+
+ return NULL;
+}
+
+/*-----------------------------------------------------------------
+ * Inserting, removing and renaming a device.
+ *---------------------------------------------------------------*/
+static inline char *kstrdup(const char *str)
+{
+ char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
+ if (r)
+ strcpy(r, str);
+ return r;
+}
+
+static struct hash_cell *alloc_cell(const char *name, const char *uuid,
+ struct mapped_device *md)
+{
+ struct hash_cell *hc;
+
+ hc = kmalloc(sizeof(*hc), GFP_KERNEL);
+ if (!hc)
+ return NULL;
+
+ hc->name = kstrdup(name);
+ if (!hc->name) {
+ kfree(hc);
+ return NULL;
+ }
+
+ if (!uuid)
+ hc->uuid = NULL;
+
+ else {
+ hc->uuid = kstrdup(uuid);
+ if (!hc->uuid) {
+ kfree(hc->name);
+ kfree(hc);
+ return NULL;
+ }
+ }
+
+ INIT_LIST_HEAD(&hc->name_list);
+ INIT_LIST_HEAD(&hc->uuid_list);
+ hc->md = md;
+ hc->new_map = NULL;
+ return hc;
+}
+
+static void free_cell(struct hash_cell *hc)
+{
+ if (hc) {
+ kfree(hc->name);
+ kfree(hc->uuid);
+ kfree(hc);
+ }
+}
+
+/*
+ * devfs stuff.
+ */
+static int register_with_devfs(struct hash_cell *hc)
+{
+ kdev_t dev = dm_kdev(hc->md);
+
+ hc->devfs_entry =
+ devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
+ major(dev), minor(dev),
+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+ &dm_blk_dops, NULL);
+
+ return 0;
+}
+
+static int unregister_with_devfs(struct hash_cell *hc)
+{
+ devfs_unregister(hc->devfs_entry);
+ return 0;
+}
+
+/*
+ * The kdev_t and uuid of a device can never change once it is
+ * initially inserted.
+ */
+int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
+{
+ struct hash_cell *cell;
+
+ /*
+ * Allocate the new cells.
+ */
+ cell = alloc_cell(name, uuid, md);
+ if (!cell)
+ return -ENOMEM;
+
+ /*
+ * Insert the cell into both hash tables.
+ */
+ down_write(&_hash_lock);
+ if (__get_name_cell(name))
+ goto bad;
+
+ list_add(&cell->name_list, _name_buckets + hash_str(name));
+
+ if (uuid) {
+ if (__get_uuid_cell(uuid)) {
+ list_del(&cell->name_list);
+ goto bad;
+ }
+ list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
+ }
+ register_with_devfs(cell);
+ dm_get(md);
+ up_write(&_hash_lock);
+
+ return 0;
+
+ bad:
+ up_write(&_hash_lock);
+ free_cell(cell);
+ return -EBUSY;
+}
+
+void __hash_remove(struct hash_cell *hc)
+{
+ /* remove from the dev hash */
+ list_del(&hc->uuid_list);
+ list_del(&hc->name_list);
+ unregister_with_devfs(hc);
+ dm_put(hc->md);
+ if (hc->new_map)
+ dm_table_put(hc->new_map);
+ free_cell(hc);
+}
+
+void dm_hash_remove_all(void)
+{
+ int i;
+ struct hash_cell *hc;
+ struct list_head *tmp, *n;
+
+ down_write(&_hash_lock);
+ for (i = 0; i < NUM_BUCKETS; i++) {
+ list_for_each_safe (tmp, n, _name_buckets + i) {
+ hc = list_entry(tmp, struct hash_cell, name_list);
+ __hash_remove(hc);
+ }
+ }
+ up_write(&_hash_lock);
+}
+
+int dm_hash_rename(const char *old, const char *new)
+{
+ char *new_name, *old_name;
+ struct hash_cell *hc;
+
+ /*
+ * duplicate new.
+ */
+ new_name = kstrdup(new);
+ if (!new_name)
+ return -ENOMEM;
+
+ down_write(&_hash_lock);
+
+ /*
+ * Is new free ?
+ */
+ hc = __get_name_cell(new);
+ if (hc) {
+ DMWARN("asked to rename to an already existing name %s -> %s",
+ old, new);
+ up_write(&_hash_lock);
+ kfree(new_name);
+ return -EBUSY;
+ }
+
+ /*
+ * Is there such a device as 'old' ?
+ */
+ hc = __get_name_cell(old);
+ if (!hc) {
+ DMWARN("asked to rename a non existent device %s -> %s",
+ old, new);
+ up_write(&_hash_lock);
+ kfree(new_name);
+ return -ENXIO;
+ }
+
+ /*
+ * rename and move the name cell.
+ */
+ list_del(&hc->name_list);
+ old_name = hc->name;
+ hc->name = new_name;
+ list_add(&hc->name_list, _name_buckets + hash_str(new_name));
+
+ /* rename the device node in devfs */
+ unregister_with_devfs(hc);
+ register_with_devfs(hc);
+
+ up_write(&_hash_lock);
+ kfree(old_name);
+ return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Implementation of the ioctl commands
+ *---------------------------------------------------------------*/
+/*
+ * All the ioctl commands get dispatched to functions with this
+ * prototype.
+ */
+typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
+
+static int remove_all(struct dm_ioctl *param, size_t param_size)
+{
+ dm_hash_remove_all();
+ param->data_size = 0;
+ return 0;
+}
+
+/*
+ * Round up the ptr to an 8-byte boundary.
+ */
+#define ALIGN_MASK 7
+static inline void *align_ptr(void *ptr)
+{
+ return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
+}
+
+/*
+ * Retrieves the data payload buffer from an already allocated
+ * struct dm_ioctl.
+ */
+static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
+ size_t *len)
+{
+ param->data_start = align_ptr(param + 1) - (void *) param;
+
+ if (param->data_start < param_size)
+ *len = param_size - param->data_start;
+ else
+ *len = 0;
+
+ return ((void *) param) + param->data_start;
+}
+
+static int list_devices(struct dm_ioctl *param, size_t param_size)
+{
+ unsigned int i;
+ struct hash_cell *hc;
+ size_t len, needed = 0;
+ struct dm_name_list *nl, *old_nl = NULL;
+
+ down_write(&_hash_lock);
+
+ /*
+ * Loop through all the devices working out how much
+ * space we need.
+ */
+ for (i = 0; i < NUM_BUCKETS; i++) {
+ list_for_each_entry (hc, _name_buckets + i, name_list) {
+ needed += sizeof(struct dm_name_list);
+ needed += strlen(hc->name);
+ needed += ALIGN_MASK;
+ }
+ }
+
+ /*
+ * Grab our output buffer.
+ */
+ nl = get_result_buffer(param, param_size, &len);
+ if (len < needed) {
+ param->flags |= DM_BUFFER_FULL_FLAG;
+ goto out;
+ }
+ param->data_size = param->data_start + needed;
+
+ nl->dev = 0; /* Flags no data */
+
+ /*
+ * Now loop through filling out the names.
+ */
+ for (i = 0; i < NUM_BUCKETS; i++) {
+ list_for_each_entry (hc, _name_buckets + i, name_list) {
+ if (old_nl)
+ old_nl->next = (uint32_t) ((void *) nl -
+ (void *) old_nl);
+
+ nl->dev = dm_kdev(hc->md);
+ nl->next = 0;
+ strcpy(nl->name, hc->name);
+
+ old_nl = nl;
+ nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
+ }
+ }
+
+ out:
+ up_write(&_hash_lock);
+ return 0;
+}
+
+static int check_name(const char *name)
+{
+ if (strchr(name, '/')) {
+ DMWARN("invalid device name");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Fills in a dm_ioctl structure, ready for sending back to
+ * userland.
+ */
+static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
+{
+ kdev_t dev = dm_kdev(md);
+ struct dm_table *table;
+ struct block_device *bdev;
+
+ param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
+ DM_ACTIVE_PRESENT_FLAG);
+
+ if (dm_suspended(md))
+ param->flags |= DM_SUSPEND_FLAG;
+
+ param->dev = kdev_t_to_nr(dev);
+
+ if (is_read_only(dev))
+ param->flags |= DM_READONLY_FLAG;
+
+ param->event_nr = dm_get_event_nr(md);
+
+ table = dm_get_table(md);
+ if (table) {
+ param->flags |= DM_ACTIVE_PRESENT_FLAG;
+ param->target_count = dm_table_get_num_targets(table);
+ dm_table_put(table);
+ } else
+ param->target_count = 0;
+
+ bdev = bdget(param->dev);
+ if (!bdev)
+ return -ENXIO;
+ param->open_count = bdev->bd_openers;
+ bdput(bdev);
+
+ return 0;
+}
+
+static int dev_create(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ kdev_t dev = 0;
+ struct mapped_device *md;
+
+ r = check_name(param->name);
+ if (r)
+ return r;
+
+ if (param->flags & DM_PERSISTENT_DEV_FLAG)
+ dev = to_kdev_t(param->dev);
+
+ r = dm_create(dev, &md);
+ if (r)
+ return r;
+
+ r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
+ if (r) {
+ dm_put(md);
+ return r;
+ }
+
+ param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+ r = __dev_status(md, param);
+ dm_put(md);
+
+ return r;
+}
+
+/*
+ * Always use UUID for lookups if it's present, otherwise use name.
+ */
+static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
+{
+ return *param->uuid ?
+ __get_uuid_cell(param->uuid) : __get_name_cell(param->name);
+}
+
+static inline struct mapped_device *find_device(struct dm_ioctl *param)
+{
+ struct hash_cell *hc;
+ struct mapped_device *md = NULL;
+
+ down_read(&_hash_lock);
+ hc = __find_device_hash_cell(param);
+ if (hc) {
+ md = hc->md;
+
+ /*
+ * Sneakily write in both the name and the uuid
+ * while we have the cell.
+ */
+ strncpy(param->name, hc->name, sizeof(param->name));
+ if (hc->uuid)
+ strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
+ else
+ param->uuid[0] = '\0';
+
+ if (hc->new_map)
+ param->flags |= DM_INACTIVE_PRESENT_FLAG;
+ else
+ param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+ dm_get(md);
+ }
+ up_read(&_hash_lock);
+
+ return md;
+}
+
+static int dev_remove(struct dm_ioctl *param, size_t param_size)
+{
+ struct hash_cell *hc;
+
+ down_write(&_hash_lock);
+ hc = __find_device_hash_cell(param);
+
+ if (!hc) {
+ DMWARN("device doesn't appear to be in the dev hash table.");
+ up_write(&_hash_lock);
+ return -ENXIO;
+ }
+
+ __hash_remove(hc);
+ up_write(&_hash_lock);
+ param->data_size = 0;
+ return 0;
+}
+
+/*
+ * Check a string doesn't overrun the chunk of
+ * memory we copied from userland.
+ */
+static int invalid_str(char *str, void *end)
+{
+ while ((void *) str < end)
+ if (!*str++)
+ return 0;
+
+ return -EINVAL;
+}
+
+static int dev_rename(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ char *new_name = (char *) param + param->data_start;
+
+ if (new_name < (char *) (param + 1) ||
+ invalid_str(new_name, (void *) param + param_size)) {
+ DMWARN("Invalid new logical volume name supplied.");
+ return -EINVAL;
+ }
+
+ r = check_name(new_name);
+ if (r)
+ return r;
+
+ param->data_size = 0;
+ return dm_hash_rename(param->name, new_name);
+}
+
+static int do_suspend(struct dm_ioctl *param)
+{
+ int r = 0;
+ struct mapped_device *md;
+
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ if (!dm_suspended(md))
+ r = dm_suspend(md);
+
+ if (!r)
+ r = __dev_status(md, param);
+
+ dm_put(md);
+ return r;
+}
+
+static int do_resume(struct dm_ioctl *param)
+{
+ int r = 0;
+ struct hash_cell *hc;
+ struct mapped_device *md;
+ struct dm_table *new_map;
+
+ down_write(&_hash_lock);
+
+ hc = __find_device_hash_cell(param);
+ if (!hc) {
+ DMWARN("device doesn't appear to be in the dev hash table.");
+ up_write(&_hash_lock);
+ return -ENXIO;
+ }
+
+ md = hc->md;
+ dm_get(md);
+
+ new_map = hc->new_map;
+ hc->new_map = NULL;
+ param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+ up_write(&_hash_lock);
+
+ /* Do we need to load a new map ? */
+ if (new_map) {
+ /* Suspend if it isn't already suspended */
+ if (!dm_suspended(md))
+ dm_suspend(md);
+
+ r = dm_swap_table(md, new_map);
+ if (r) {
+ dm_put(md);
+ dm_table_put(new_map);
+ return r;
+ }
+
+ if (dm_table_get_mode(new_map) & FMODE_WRITE)
+ set_device_ro(dm_kdev(md), 0);
+ else
+ set_device_ro(dm_kdev(md), 1);
+
+ dm_table_put(new_map);
+ }
+
+ if (dm_suspended(md))
+ r = dm_resume(md);
+
+ if (!r)
+ r = __dev_status(md, param);
+
+ dm_put(md);
+ return r;
+}
+
+/*
+ * Set or unset the suspension state of a device.
+ * If the device already is in the requested state we just return its status.
+ */
+static int dev_suspend(struct dm_ioctl *param, size_t param_size)
+{
+ if (param->flags & DM_SUSPEND_FLAG)
+ return do_suspend(param);
+
+ return do_resume(param);
+}
+
+/*
+ * Copies device info back to user space, used by
+ * the create and info ioctls.
+ */
+static int dev_status(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ struct mapped_device *md;
+
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ r = __dev_status(md, param);
+ dm_put(md);
+ return r;
+}
+
+/*
+ * Build up the status struct for each target
+ */
+static void retrieve_status(struct dm_table *table, struct dm_ioctl *param,
+ size_t param_size)
+{
+ unsigned int i, num_targets;
+ struct dm_target_spec *spec;
+ char *outbuf, *outptr;
+ status_type_t type;
+ size_t remaining, len, used = 0;
+
+ outptr = outbuf = get_result_buffer(param, param_size, &len);
+
+ if (param->flags & DM_STATUS_TABLE_FLAG)
+ type = STATUSTYPE_TABLE;
+ else
+ type = STATUSTYPE_INFO;
+
+ /* Get all the target info */
+ num_targets = dm_table_get_num_targets(table);
+ for (i = 0; i < num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(table, i);
+
+ remaining = len - (outptr - outbuf);
+ if (remaining < sizeof(struct dm_target_spec)) {
+ param->flags |= DM_BUFFER_FULL_FLAG;
+ break;
+ }
+
+ spec = (struct dm_target_spec *) outptr;
+
+ spec->status = 0;
+ spec->sector_start = ti->begin;
+ spec->length = ti->len;
+ strncpy(spec->target_type, ti->type->name,
+ sizeof(spec->target_type));
+
+ outptr += sizeof(struct dm_target_spec);
+ remaining = len - (outptr - outbuf);
+
+ /* Get the status/table string from the target driver */
+ if (ti->type->status) {
+ if (ti->type->status(ti, type, outptr, remaining)) {
+ param->flags |= DM_BUFFER_FULL_FLAG;
+ break;
+ }
+ } else
+ outptr[0] = '\0';
+
+ outptr += strlen(outptr) + 1;
+ used = param->data_start + (outptr - outbuf);
+
+ align_ptr(outptr);
+ spec->next = outptr - outbuf;
+ }
+
+ if (used)
+ param->data_size = used;
+
+ param->target_count = num_targets;
+}
+
+/*
+ * Wait for a device to report an event
+ */
+static int dev_wait(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ struct mapped_device *md;
+ struct dm_table *table;
+ DECLARE_WAITQUEUE(wq, current);
+
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ /*
+ * Wait for a notification event
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!dm_add_wait_queue(md, &wq, param->event_nr)) {
+ schedule();
+ dm_remove_wait_queue(md, &wq);
+ }
+ set_current_state(TASK_RUNNING);
+
+ /*
+ * The userland program is going to want to know what
+ * changed to trigger the event, so we may as well tell
+ * him and save an ioctl.
+ */
+ r = __dev_status(md, param);
+ if (r)
+ goto out;
+
+ table = dm_get_table(md);
+ if (table) {
+ retrieve_status(table, param, param_size);
+ dm_table_put(table);
+ }
+
+ out:
+ dm_put(md);
+ return r;
+}
+
+static inline int get_mode(struct dm_ioctl *param)
+{
+ int mode = FMODE_READ | FMODE_WRITE;
+
+ if (param->flags & DM_READONLY_FLAG)
+ mode = FMODE_READ;
+
+ return mode;
+}
+
+static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
+ struct dm_target_spec **spec, char **target_params)
+{
+ *spec = (struct dm_target_spec *) ((unsigned char *) last + next);
+ *target_params = (char *) (*spec + 1);
+
+ if (*spec < (last + 1))
+ return -EINVAL;
+
+ return invalid_str(*target_params, end);
+}
+
+static int populate_table(struct dm_table *table, struct dm_ioctl *param,
+ size_t param_size)
+{
+ int r;
+ unsigned int i = 0;
+ struct dm_target_spec *spec = (struct dm_target_spec *) param;
+ uint32_t next = param->data_start;
+ void *end = (void *) param + param_size;
+ char *target_params;
+
+ if (!param->target_count) {
+ DMWARN("populate_table: no targets specified");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < param->target_count; i++) {
+
+ r = next_target(spec, next, end, &spec, &target_params);
+ if (r) {
+ DMWARN("unable to find target");
+ return r;
+ }
+
+ r = dm_table_add_target(table, spec->target_type,
+ (sector_t) spec->sector_start,
+ (sector_t) spec->length,
+ target_params);
+ if (r) {
+ DMWARN("error adding target to table");
+ return r;
+ }
+
+ next = spec->next;
+ }
+
+ return dm_table_complete(table);
+}
+
+static int table_load(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ struct hash_cell *hc;
+ struct dm_table *t;
+
+ r = dm_table_create(&t, get_mode(param), param->target_count);
+ if (r)
+ return r;
+
+ r = populate_table(t, param, param_size);
+ if (r) {
+ dm_table_put(t);
+ return r;
+ }
+
+ down_write(&_hash_lock);
+ hc = __find_device_hash_cell(param);
+ if (!hc) {
+ DMWARN("device doesn't appear to be in the dev hash table.");
+ up_write(&_hash_lock);
+ return -ENXIO;
+ }
+
+ if (hc->new_map)
+ dm_table_put(hc->new_map);
+ hc->new_map = t;
+ param->flags |= DM_INACTIVE_PRESENT_FLAG;
+
+ r = __dev_status(hc->md, param);
+ up_write(&_hash_lock);
+ return r;
+}
+
+static int table_clear(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ struct hash_cell *hc;
+
+ down_write(&_hash_lock);
+
+ hc = __find_device_hash_cell(param);
+ if (!hc) {
+ DMWARN("device doesn't appear to be in the dev hash table.");
+ up_write(&_hash_lock);
+ return -ENXIO;
+ }
+
+ if (hc->new_map) {
+ dm_table_put(hc->new_map);
+ hc->new_map = NULL;
+ }
+
+ param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+ r = __dev_status(hc->md, param);
+ up_write(&_hash_lock);
+ return r;
+}
+
+/*
+ * Retrieves a list of devices used by a particular dm device.
+ */
+static void retrieve_deps(struct dm_table *table, struct dm_ioctl *param,
+ size_t param_size)
+{
+ unsigned int count = 0;
+ struct list_head *tmp;
+ size_t len, needed;
+ struct dm_target_deps *deps;
+
+ deps = get_result_buffer(param, param_size, &len);
+
+ /*
+ * Count the devices.
+ */
+ list_for_each(tmp, dm_table_get_devices(table))
+ count++;
+
+ /*
+ * Check we have enough space.
+ */
+ needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
+ if (len < needed) {
+ param->flags |= DM_BUFFER_FULL_FLAG;
+ return;
+ }
+
+ /*
+ * Fill in the devices.
+ */
+ deps->count = count;
+ count = 0;
+ list_for_each(tmp, dm_table_get_devices(table)) {
+ struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
+ deps->dev[count++] = dd->bdev->bd_dev;
+ }
+
+ param->data_size = param->data_start + needed;
+}
+
+static int table_deps(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ struct mapped_device *md;
+ struct dm_table *table;
+
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ r = __dev_status(md, param);
+ if (r)
+ goto out;
+
+ table = dm_get_table(md);
+ if (table) {
+ retrieve_deps(table, param, param_size);
+ dm_table_put(table);
+ }
+
+ out:
+ dm_put(md);
+ return r;
+}
+
+/*
+ * Return the status of a device as a text string for each
+ * target.
+ */
+static int table_status(struct dm_ioctl *param, size_t param_size)
+{
+ int r;
+ struct mapped_device *md;
+ struct dm_table *table;
+
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ r = __dev_status(md, param);
+ if (r)
+ goto out;
+
+ table = dm_get_table(md);
+ if (table) {
+ retrieve_status(table, param, param_size);
+ dm_table_put(table);
+ }
+
+ out:
+ dm_put(md);
+ return r;
+}
+
+/*-----------------------------------------------------------------
+ * Implementation of open/close/ioctl on the special char
+ * device.
+ *---------------------------------------------------------------*/
+static ioctl_fn lookup_ioctl(unsigned int cmd)
+{
+ static struct {
+ int cmd;
+ ioctl_fn fn;
+ } _ioctls[] = {
+ {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */
+ {DM_REMOVE_ALL_CMD, remove_all},
+ {DM_LIST_DEVICES_CMD, list_devices},
+
+ {DM_DEV_CREATE_CMD, dev_create},
+ {DM_DEV_REMOVE_CMD, dev_remove},
+ {DM_DEV_RENAME_CMD, dev_rename},
+ {DM_DEV_SUSPEND_CMD, dev_suspend},
+ {DM_DEV_STATUS_CMD, dev_status},
+ {DM_DEV_WAIT_CMD, dev_wait},
+
+ {DM_TABLE_LOAD_CMD, table_load},
+ {DM_TABLE_CLEAR_CMD, table_clear},
+ {DM_TABLE_DEPS_CMD, table_deps},
+ {DM_TABLE_STATUS_CMD, table_status}
+ };
+
+ return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
+}
+
+/*
+ * As well as checking the version compatibility this always
+ * copies the kernel interface version out.
+ */
+static int check_version(unsigned int cmd, struct dm_ioctl *user)
+{
+ uint32_t version[3];
+ int r = 0;
+
+ if (copy_from_user(version, user->version, sizeof(version)))
+ return -EFAULT;
+
+ if ((DM_VERSION_MAJOR != version[0]) ||
+ (DM_VERSION_MINOR < version[1])) {
+ DMWARN("ioctl interface mismatch: "
+ "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
+ DM_VERSION_MAJOR, DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL,
+ version[0], version[1], version[2], cmd);
+ r = -EINVAL;
+ }
+
+ /*
+ * Fill in the kernel version.
+ */
+ version[0] = DM_VERSION_MAJOR;
+ version[1] = DM_VERSION_MINOR;
+ version[2] = DM_VERSION_PATCHLEVEL;
+ if (copy_to_user(user->version, version, sizeof(version)))
+ return -EFAULT;
+
+ return r;
+}
+
+static void free_params(struct dm_ioctl *param)
+{
+ vfree(param);
+}
+
+static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
+{
+ struct dm_ioctl tmp, *dmi;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)))
+ return -EFAULT;
+
+ if (tmp.data_size < sizeof(tmp))
+ return -EINVAL;
+
+ dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+ if (!dmi)
+ return -ENOMEM;
+
+ if (copy_from_user(dmi, user, tmp.data_size)) {
+ vfree(dmi);
+ return -EFAULT;
+ }
+
+ *param = dmi;
+ return 0;
+}
+
+static int validate_params(uint cmd, struct dm_ioctl *param)
+{
+ /* Always clear this flag */
+ param->flags &= ~DM_BUFFER_FULL_FLAG;
+
+ /* Ignores parameters */
+ if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
+ return 0;
+
+ /* Unless creating, either name or uuid but not both */
+ if (cmd != DM_DEV_CREATE_CMD) {
+ if ((!*param->uuid && !*param->name) ||
+ (*param->uuid && *param->name)) {
+ DMWARN("one of name or uuid must be supplied, cmd(%u)",
+ cmd);
+ return -EINVAL;
+ }
+ }
+
+ /* Ensure strings are terminated */
+ param->name[DM_NAME_LEN - 1] = '\0';
+ param->uuid[DM_UUID_LEN - 1] = '\0';
+
+ return 0;
+}
+
+static int ctl_ioctl(struct inode *inode, struct file *file,
+ uint command, ulong u)
+{
+ int r = 0;
+ unsigned int cmd;
+ struct dm_ioctl *param;
+ struct dm_ioctl *user = (struct dm_ioctl *) u;
+ ioctl_fn fn = NULL;
+ size_t param_size;
+
+ /* only root can play with this */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (_IOC_TYPE(command) != DM_IOCTL)
+ return -ENOTTY;
+
+ cmd = _IOC_NR(command);
+
+ /*
+ * Check the interface version passed in. This also
+ * writes out the kernel's interface version.
+ */
+ r = check_version(cmd, user);
+ if (r)
+ return r;
+
+ /*
+ * Nothing more to do for the version command.
+ */
+ if (cmd == DM_VERSION_CMD)
+ return 0;
+
+ fn = lookup_ioctl(cmd);
+ if (!fn) {
+ DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
+ return -ENOTTY;
+ }
+
+ /*
+ * FIXME: I don't like this, we're trying to avoid low
+ * memory issues when a device is suspended.
+ */
+ current->flags |= PF_MEMALLOC;
+
+ /*
+ * Copy the parameters into kernel space.
+ */
+ r = copy_params(user, ¶m);
+ if (r) {
+ current->flags &= ~PF_MEMALLOC;
+ return r;
+ }
+
+ r = validate_params(cmd, param);
+ if (r)
+ goto out;
+
+ param_size = param->data_size;
+ param->data_size = sizeof(*param);
+ r = fn(param, param_size);
+
+ /*
+ * Copy the results back to userland.
+ */
+ if (!r && copy_to_user(user, param, param->data_size))
+ r = -EFAULT;
+
+ out:
+ free_params(param);
+ current->flags &= ~PF_MEMALLOC;
+ return r;
+}
+
+static struct file_operations _ctl_fops = {
+ .ioctl = ctl_ioctl,
+ .owner = THIS_MODULE,
+};
+
+static devfs_handle_t _ctl_handle;
+
+static struct miscdevice _dm_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = DM_NAME,
+ .fops = &_ctl_fops
+};
+
+/*
+ * Create misc character device and link to DM_DIR/control.
+ */
+int __init dm_interface_init(void)
+{
+ int r;
+ char rname[64];
+
+ r = dm_hash_init();
+ if (r)
+ return r;
+
+ r = misc_register(&_dm_misc);
+ if (r) {
+ DMERR("misc_register failed for control device");
+ dm_hash_exit();
+ return r;
+ }
+
+ r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
+ sizeof rname - 3);
+ if (r == -ENOSYS)
+ goto done; /* devfs not present */
+
+ if (r < 0) {
+ DMERR("devfs_generate_path failed for control device");
+ goto failed;
+ }
+
+ strncpy(rname + r, "../", 3);
+ r = devfs_mk_symlink(NULL, DM_DIR "/control",
+ DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
+ if (r) {
+ DMERR("devfs_mk_symlink failed for control device");
+ goto failed;
+ }
+ devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
+
+ done:
+ DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
+ DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
+ DM_DRIVER_EMAIL);
+ return 0;
+
+ failed:
+ misc_deregister(&_dm_misc);
+ dm_hash_exit();
+ return r;
+}
+
+void dm_interface_exit(void)
+{
+ if (misc_deregister(&_dm_misc) < 0)
+ DMERR("misc_deregister failed for control device");
+
+ dm_hash_exit();
+}
--- diff/include/linux/dm-ioctl.h 1970-01-01 01:00:00.000000000 +0100
+++ source/include/linux/dm-ioctl.h 2003-12-09 11:01:41.000000000 +0000
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_H
+#define _LINUX_DM_IOCTL_H
+
+#include <linux/types.h>
+
+#define DM_DIR "mapper" /* Slashes not supported */
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables. Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled. The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed. Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device. If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_. Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device. This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device. The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+ /*
+ * The version number is made up of three parts:
+ * major - no backward or forward compatibility,
+ * minor - only backwards compatible,
+ * patch - both backwards and forwards compatible.
+ *
+ * All clients of the ioctl interface should fill in the
+ * version number of the interface that they were
+ * compiled with.
+ *
+ * All recognised ioctl commands (ie. those that don't
+ * return -ENOTTY) fill out this field, even if the
+ * command failed.
+ */
+ uint32_t version[3]; /* in/out */
+ uint32_t data_size; /* total size of data passed in
+ * including this struct */
+
+ uint32_t data_start; /* offset to start of data
+ * relative to start of this struct */
+
+ uint32_t target_count; /* in/out */
+ int32_t open_count; /* out */
+ uint32_t flags; /* in/out */
+ uint32_t event_nr; /* in/out */
+ uint32_t padding;
+
+ uint64_t dev; /* in/out */
+
+ char name[DM_NAME_LEN]; /* device name */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
+};
+
+/*
+ * Used to specify tables. These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+ uint64_t sector_start;
+ uint64_t length;
+ int32_t status; /* used when reading from kernel only */
+
+ /*
+ * Offset in bytes (from the start of this struct) to
+ * next target_spec.
+ */
+ uint32_t next;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ /*
+ * Parameter string starts immediately after this object.
+ * Be careful to add padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+ uint32_t count; /* Array size */
+ uint32_t padding; /* unused */
+ uint64_t dev[0]; /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+ uint64_t dev;
+ uint32_t next; /* offset to the next record from
+ the _start_ of this */
+ char name[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Top level cmds */
+ DM_VERSION_CMD = 0,
+ DM_REMOVE_ALL_CMD,
+ DM_LIST_DEVICES_CMD,
+
+ /* device level cmds */
+ DM_DEV_CREATE_CMD,
+ DM_DEV_REMOVE_CMD,
+ DM_DEV_RENAME_CMD,
+ DM_DEV_SUSPEND_CMD,
+ DM_DEV_STATUS_CMD,
+ DM_DEV_WAIT_CMD,
+
+ /* Table level cmds */
+ DM_TABLE_LOAD_CMD,
+ DM_TABLE_CLEAR_CMD,
+ DM_TABLE_DEPS_CMD,
+ DM_TABLE_STATUS_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR 4
+#define DM_VERSION_MINOR 0
+#define DM_VERSION_PATCHLEVEL 5
+#define DM_VERSION_EXTRA "-ioctl (2003-11-18)"
+
+/* Status bits */
+#define DM_READONLY_FLAG (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
+
+#endif /* _LINUX_DM_IOCTL_H */
^ permalink raw reply [flat|nested] 55+ messages in thread