All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Smith <danms@us.ibm.com>
To: device-mapper development <dm-devel@redhat.com>
Subject: Re: [PATCH 1/2] Add userspace device-mapper target
Date: Wed, 31 Jan 2007 07:25:11 -0800	[thread overview]
Message-ID: <m3d54vqkvc.fsf@guaranine.beaverton.ibm.com> (raw)
In-Reply-To: <20070131213946H.fujita.tomonori@lab.ntt.co.jp> (FUJITA Tomonori's message of "Wed, 31 Jan 2007 21:39:46 +0900")


[-- Attachment #1.1.1: Type: text/plain, Size: 307 bytes --]

FT> I can't apply both cleanly. 

Hmm, really?  The kernel patch is against 2.6.20-rc6 and the library
patch is against device-mapper CVS from January 29th.

FT> Can you resend them as an attachment (though I don't like
FT> attachments).

Attached.

Signed-off-by: Dan Smith <danms@us.ibm.com>


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.1.2: dm-user_kernel.patch --]
[-- Type: text/x-patch, Size: 48366 bytes --]

diff -r 50f87a6ffd94 drivers/md/Kconfig
--- a/drivers/md/Kconfig	Thu Jan 25 17:50:37 2007 -0800
+++ b/drivers/md/Kconfig	Mon Jan 29 14:28:05 2007 -0800
@@ -236,6 +236,12 @@ config DM_SNAPSHOT
        ---help---
          Allow volume managers to take writable snapshots of a device.
 
+config DM_USERSPACE
+       tristate "Userspace target (EXPERIMENTAL)"
+       depends on BLK_DEV_DM && EXPERIMENTAL
+       ---help---
+         A target that provides a userspace interface to device-mapper
+
 config DM_MIRROR
        tristate "Mirror target (EXPERIMENTAL)"
        depends on BLK_DEV_DM && EXPERIMENTAL
diff -r 50f87a6ffd94 drivers/md/Makefile
--- a/drivers/md/Makefile	Thu Jan 25 17:50:37 2007 -0800
+++ b/drivers/md/Makefile	Mon Jan 29 14:28:05 2007 -0800
@@ -14,6 +14,8 @@ raid456-objs	:= raid5.o raid6algos.o rai
 		   raid6altivec1.o raid6altivec2.o raid6altivec4.o \
 		   raid6altivec8.o \
 		   raid6mmx.o raid6sse1.o raid6sse2.o
+dm-user-objs    := dm-userspace.o dm-userspace-chardev.o \
+		   dm-userspace-cache.o
 hostprogs-y	:= mktables
 
 # Note: link order is important.  All raid personalities
@@ -36,6 +38,7 @@ obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
 obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
+obj-$(CONFIG_DM_USERSPACE)      += dm-user.o
 
 quiet_cmd_unroll = UNROLL  $@
       cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
diff -r 50f87a6ffd94 drivers/md/dm-user.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/md/dm-user.h	Mon Jan 29 14:28:05 2007 -0800
@@ -0,0 +1,176 @@
+/*
+ * Copyright IBM Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __DM_USER_H
+#define __DM_USER_H
+
+#include <linux/dm-userspace.h>
+
+#include <linux/hardirq.h>
+#include <linux/slab.h>
+
+#define DMU_KEY_LEN 256
+
+extern struct target_type userspace_target;
+extern mempool_t *request_pool;
+extern dev_t dmu_dev;
+extern spinlock_t devices_lock;
+extern struct list_head devices;
+
+struct dmu_mappings;
+
+#define DMU_CP_HASH 1024
+
+/*
+ * A block device that we can send bios to
+ */
+struct target_device {
+	struct list_head list;        /* Our place in the targets list      */
+	struct block_device *bdev;    /* The target block_device            */
+	struct kref users;            /* Self-destructing reference count   */
+};
+
+/*
+ * A dm-userspace device, which consists of multiple targets sharing a
+ * common key
+ */
+struct dmu_device {
+	struct list_head list;        /* Our place in the devices list     */
+
+	spinlock_t lock;              /* Protects all the fields below     */
+
+	/* We need to protect the TX/RX lists with a separate lock that is
+	 * always used with IRQs disabled because it is locked from
+	 * inside the endio function
+	 */
+	spinlock_t xmit_lock;
+	struct list_head tx_requests; /* Requests to send to userspace     */
+	struct list_head *rx_requests; /* Requests waiting for reply        */
+
+	struct dmu_mappings *mappings;
+
+	/* Accounting */
+	atomic_t t_reqs;              /* Waiting to be sent to userspace   */
+	atomic_t r_reqs;              /* Waiting for a response from uspace*/
+	atomic_t f_reqs;              /* Submitted, waiting for endio      */
+	atomic_t total;               /* Total requests allocated          */
+
+	atomic_t idcounter;           /* Counter for making request IDs    */
+
+	struct list_head target_devs; /* List of devices we can target     */
+
+	void *transport_private;      /* Private data for userspace comms  */
+
+	char key[DMU_KEY_LEN];        /* Unique name string for device     */
+	struct kref users;            /* Self-destructing reference count  */
+
+	wait_queue_head_t lowmem;     /* To block while waiting for memory */
+
+	uint64_t block_size;          /* Block size for this device        */
+	uint64_t block_mask;          /* Mask for offset in block          */
+	unsigned int block_shift;     /* Shift to convert to/from block    */
+
+	struct kcopyd_client *kcopy;  /* Interface to kcopyd               */
+
+	unsigned int request_slots;   /* Max number of reqs we will queue  */
+};
+
+struct dmu_request {
+	struct list_head list;        /* Our place on the request queue    */
+	struct list_head copy;        /* Our place on the copy list        */
+	struct dmu_device *dev;       /* The DMU device that owns us       */
+
+	struct block_device *target_dev;
+
+	int type;                     /* Type of request                   */
+	uint32_t flags;               /* Attribute flags                   */
+	uint64_t id;                  /* Unique ID for sync with userspace */
+	union {
+		uint64_t block;       /* The block in question             */
+	} u;
+
+	struct list_head deps;        /* Requests depending on this one    */
+	struct bio *bio;              /* The bio this request represents   */
+
+	struct work_struct task;      /* Async task to run for this req    */
+
+	struct dmu_msg_map_response response; /* FIXME: Clean this up      */
+};
+
+
+extern void add_tx_request(struct dmu_device *dev, struct dmu_request *req);
+extern void endio_worker(struct work_struct *work);
+
+/* Find and grab a reference to a target device */
+struct target_device *find_target(struct dmu_device *dev,
+				  dev_t devno);
+/* Character device transport functions */
+int register_chardev_transport(struct dmu_device *dev);
+void unregister_chardev_transport(struct dmu_device *dev);
+int init_chardev_transport(void);
+void cleanup_chardev_transport(void);
+void write_chardev_transport_info(struct dmu_device *dev,
+				  char *buf, unsigned int maxlen);
+
+/* Return the block number for @sector */
+static inline u64 dmu_block(struct dmu_device *dev,
+			    sector_t sector)
+{
+	return sector >> dev->block_shift;
+}
+
+/* Return the sector offset in a block for @sector */
+static inline u64 dmu_sector_offset(struct dmu_device *dev,
+				    sector_t sector)
+{
+	return sector & dev->block_mask;
+}
+
+/* Return the starting sector for @block */
+static inline u64 dmu_sector(struct dmu_device *dev,
+			     uint64_t block)
+{
+	return block << dev->block_shift;
+}
+
+/* Increase the usage count for @dev */
+static inline void get_dev(struct dmu_device *dev)
+{
+	kref_get(&dev->users);
+}
+
+/* Decrease the usage count for @dev */
+void destroy_dmu_device(struct kref *ref);
+static inline void put_dev(struct dmu_device *dev)
+{
+	kref_put(&dev->users, destroy_dmu_device);
+}
+
+int dmu_init_mappings(void);
+void dmu_cleanup_mappings(void);
+int dmu_make_mapping(struct dmu_device *dev,
+		     uint64_t org, uint64_t new, int64_t offset,
+		     struct block_device *dest, int rw);
+int dmu_map_from_mappings(struct dmu_device *dev,
+			  struct bio *bio);
+int dmu_alloc_mappings(struct dmu_mappings **m, uint32_t size);
+int dmu_remove_mapping(struct dmu_device *dev, uint64_t org);
+unsigned int dmu_remove_all_mappings(struct dmu_device *dev);
+
+#endif
diff -r 50f87a6ffd94 drivers/md/dm-userspace-cache.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/md/dm-userspace-cache.c	Mon Jan 29 14:28:05 2007 -0800
@@ -0,0 +1,256 @@
+/*
+ * Copyright IBM Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/types.h>
+#include <linux/poll.h>
+
+#include "dm.h"
+
+#include <linux/dm-userspace.h>
+
+#include "dm-user.h"
+
+#define DM_MSG_PREFIX "dm-userspace-cache"
+
+static struct kmem_cache *map_cache;
+
+struct dmu_mappings {
+	struct list_head *table;
+	uint32_t size;
+	uint32_t count;
+	struct semaphore sem;
+};
+
+struct dmu_map {
+	struct list_head list;
+	uint64_t org_block;
+	uint64_t new_block;
+	int64_t offset;
+	struct block_device *dest_dev;
+	int rw;
+};
+
+int dmu_alloc_mappings(struct dmu_mappings **mp, uint32_t size)
+{
+	struct dmu_mappings *m;
+	int i;
+
+	(*mp) = kmalloc(sizeof(*m), GFP_KERNEL);
+	if (!(*mp)) {
+		DMERR("Failed to alloc mappings");
+		return 0;
+	}
+	
+	m = *mp;	   
+
+	m->table = kmalloc(sizeof(struct list_head) * size, GFP_KERNEL);
+	m->size = size;
+	m->count = 0;
+
+	for (i = 0; i < m->size; i++) {
+		INIT_LIST_HEAD(&m->table[i]);
+	}
+		
+	init_MUTEX(&m->sem);
+
+	return 1;
+}
+
+int dmu_destroy_mappings(struct dmu_mappings *m)
+{
+	if (m->table)
+		kfree(m->table);
+			
+	return 1;
+}
+
+static struct dmu_map *__dmu_find_mapping(struct dmu_mappings *m,
+					  uint64_t block)
+{
+	uint32_t bucket;
+	struct dmu_map *map;
+
+	bucket = ((uint32_t)block) % m->size;
+
+	list_for_each_entry(map, &m->table[bucket], list) {
+		if (map->org_block == block)
+			return map;
+	}
+
+	return NULL;
+}
+
+static void __dmu_delete_mapping(struct dmu_mappings *m,
+				 struct dmu_map *map)
+{
+	m->count--;
+	list_del(&map->list);
+	kmem_cache_free(map_cache, map);
+}
+
+static int dmu_add_mapping(struct dmu_mappings *m, 
+			   struct dmu_map *map)
+{
+	uint32_t bucket;
+	struct dmu_map *old;
+
+	down(&m->sem);
+
+	old = __dmu_find_mapping(m, map->org_block);
+	if (old)
+		__dmu_delete_mapping(m, old);
+
+	bucket = ((uint32_t)map->org_block) % m->size;
+	
+	list_add(&map->list, &m->table[bucket]);
+	m->count++;
+
+	up(&m->sem);
+
+	return 1;
+}
+
+int dmu_map_from_mappings(struct dmu_device *dev,
+			  struct bio *bio)
+{
+	struct dmu_map *map;
+	int ret = 0;
+
+	down(&dev->mappings->sem);
+
+	map = __dmu_find_mapping(dev->mappings,
+				 dmu_block(dev, bio->bi_sector));
+
+	if (map && (bio_rw(bio) == map->rw)) {
+		
+		bio->bi_sector = dmu_sector(dev, map->new_block) +
+			dmu_sector_offset(dev, bio->bi_sector) +
+			map->offset;
+		bio->bi_bdev = map->dest_dev;
+		ret = 1;
+	}
+
+	up(&dev->mappings->sem);
+
+	return ret;
+}
+
+int dmu_make_mapping(struct dmu_device *dev,
+		     uint64_t org, uint64_t new, int64_t offset,
+		     struct block_device *dest, int rw)
+{
+	struct dmu_map *map;
+
+	/* FIXME */
+	map = kmem_cache_alloc(map_cache, GFP_NOIO);
+	if (!map) {
+		DMERR("Failed to alloc mapping");
+		return 0;
+	}
+
+	INIT_LIST_HEAD(&map->list);
+
+	map->org_block = org;
+	map->new_block = new;
+	map->dest_dev = dest;
+	map->offset = offset;
+	map->rw = rw;
+
+	return dmu_add_mapping(dev->mappings, map);
+}
+
+int dmu_remove_mapping(struct dmu_device *dev,
+		       uint64_t org)
+{
+	struct dmu_map *map;
+	int ret = 0;
+
+	down(&dev->mappings->sem);
+
+	map = __dmu_find_mapping(dev->mappings, org);
+	if (map) {
+		__dmu_delete_mapping(dev->mappings, map);
+		ret = 1;
+	}
+
+	up(&dev->mappings->sem);
+
+	return ret;
+}
+
+static unsigned int __destroy_bucket(struct dmu_mappings *m,
+				     unsigned int index)
+{
+	struct dmu_map *map, *next;
+	unsigned int count = 0;
+
+	list_for_each_entry_safe(map, next, &m->table[index], list) {
+		__dmu_delete_mapping(m, map);
+		count++;
+	}
+
+	return count;
+}
+
+unsigned int dmu_remove_all_mappings(struct dmu_device *dev)
+{
+	int i;
+	unsigned int count = 0;
+
+	down(&dev->mappings->sem);
+
+	for (i = 0; i < dev->mappings->size; i++) {
+		count += __destroy_bucket(dev->mappings, i);
+	}
+	
+	up(&dev->mappings->sem);
+
+	return count;
+}
+
+int dmu_init_mappings(void)
+{
+	map_cache =
+		kmem_cache_create("dm-userspace-mappings",
+				  sizeof(struct dmu_map),
+				  __alignof__ (struct dmu_map),
+				  0, NULL, NULL);
+	if (!map_cache) {
+		DMERR("Failed to allocate map cache");
+		return 0;
+	}
+
+	return 1;
+}
+
+void dmu_cleanup_mappings(void)
+{
+	kmem_cache_destroy(map_cache);
+}
+
+
diff -r 50f87a6ffd94 drivers/md/dm-userspace-chardev.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/md/dm-userspace-chardev.c	Mon Jan 29 14:28:05 2007 -0800
@@ -0,0 +1,765 @@
+/*
+ * Copyright IBM Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * (C) 2006 FUJITA Tomonori <tomof@acm.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/dm-userspace.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <asm/uaccess.h>
+
+#include "dm.h"
+#include "dm-bio-list.h"
+#include "kcopyd.h"
+#include "dm-user.h"
+
+#define DM_MSG_PREFIX "dm-userspace"
+
+/* This allows for a cleaner separation between the dm-userspace
+ * device-mapper target, and the userspace transport used.  Right now,
+ * only a chardev transport exists, but it's possible that there could
+ * be more in the future
+ */
+struct dmu_ring {
+	u32 r_idx;
+	unsigned long r_pages[DMU_RING_PAGES];
+	spinlock_t r_lock;
+};
+
+struct chardev_transport {
+	struct cdev cdev;
+	dev_t ctl_dev;
+	struct dmu_device *parent;
+
+	struct dmu_ring tx;
+	struct dmu_ring rx;
+
+	struct task_struct *tx_task;
+	struct task_struct *rx_task;
+
+	wait_queue_head_t tx_wqueue;
+	wait_queue_head_t rx_wqueue;
+	wait_queue_head_t poll_wait;
+};
+
+static inline void dmu_ring_idx_inc(struct dmu_ring *r)
+{
+	if (r->r_idx == DMU_MAX_EVENTS - 1)
+		r->r_idx = 0;
+	else
+		r->r_idx++;
+}
+
+static struct dmu_msg *dmu_head_msg(struct dmu_ring *r, u32 idx)
+{
+	u32 pidx, off;
+
+	pidx = idx / DMU_EVENT_PER_PAGE;
+	off = idx % DMU_EVENT_PER_PAGE;
+
+	return (struct dmu_msg *)
+		(r->r_pages[pidx] + sizeof(struct dmu_msg) * off);
+}
+
+static struct dmu_request *find_rx_request(struct dmu_device *dev,
+					   uint64_t id)
+{
+	struct dmu_request *req, *next, *match = NULL;
+	int count = 0;
+	struct list_head *list = &dev->rx_requests[id % DMU_CP_HASH];
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->xmit_lock, flags);
+	list_for_each_entry_safe(req, next, list, list) {
+		count++;
+		if (req->id == id) {
+			list_del_init(&req->list);
+			match = req;
+			atomic_dec(&dev->r_reqs);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&dev->xmit_lock, flags);
+
+	return match;
+}
+
+static int have_pending_requests(struct dmu_device *dev)
+{
+	return atomic_read(&dev->t_reqs) != 0;
+}
+
+static void send_userspace_message(struct dmu_msg *msg,
+				   struct dmu_request *req)
+{
+	memset(msg, 0, sizeof(*msg));
+
+	msg->hdr.id = req->id;
+
+	switch (req->type) {
+	case DM_USERSPACE_MAP_BLOCK_REQ:
+		msg->hdr.msg_type = req->type;
+		msg->payload.map_req.org_block = req->u.block;
+		dmu_cpy_flag(&msg->payload.map_req.flags,
+			     req->flags, DMU_FLAG_WR);
+		break;
+
+	case DM_USERSPACE_MAP_DONE:
+		msg->hdr.msg_type = DM_USERSPACE_MAP_DONE;
+		msg->payload.map_done.id_of_op = req->id;
+		msg->payload.map_done.org_block = req->u.block;
+		dmu_cpy_flag(&msg->payload.map_done.flags,
+			     req->flags, DMU_FLAG_WR);
+		break;
+
+	default:
+		DMWARN("Unknown outgoing message type %i", req->type);
+	}
+
+	/* If this request is not on a list (the rx_requests list),
+	 * then it needs to be freed after sending
+	 */
+	if (list_empty(&req->list)) {
+ 		INIT_WORK(&req->task, endio_worker);
+		schedule_work(&req->task);
+	}
+}
+
+static void add_rx_request(struct dmu_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&req->dev->xmit_lock, flags);
+	list_add_tail(&req->list, 
+		      &req->dev->rx_requests[req->id % DMU_CP_HASH]);
+	atomic_inc(&req->dev->r_reqs);
+	spin_unlock_irqrestore(&req->dev->xmit_lock, flags);
+}
+
+struct dmu_request *pluck_next_request(struct dmu_device *dev)
+{
+	struct dmu_request *req = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->xmit_lock, flags);
+	if (!list_empty(&dev->tx_requests)) {
+		req = list_entry(dev->tx_requests.next,
+				 struct dmu_request, list);
+		list_del_init(&req->list);
+
+		atomic_dec(&dev->t_reqs);
+	}
+	spin_unlock_irqrestore(&dev->xmit_lock, flags);
+
+	if (req && ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) ||
+		    (req->type == DM_USERSPACE_MAP_DONE)))
+		add_rx_request(req);
+
+	return req;
+}
+
+static struct dmu_msg *get_tx_msg(struct dmu_ring *ring)
+{
+	struct dmu_msg *msg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ring->r_lock, flags);
+	msg = dmu_head_msg(ring, ring->r_idx);
+	if (msg->hdr.status)
+		msg = NULL;
+	else
+		dmu_ring_idx_inc(ring);
+	spin_unlock_irqrestore(&ring->r_lock, flags);
+
+	return msg;
+}
+
+static void send_tx_request(struct dmu_msg *msg, struct dmu_request *req)
+{
+	struct chardev_transport *t = req->dev->transport_private;
+
+	send_userspace_message(msg, req);
+	msg->hdr.status = 1;
+	mb();
+	flush_dcache_page(virt_to_page(msg));
+	wake_up_interruptible(&t->poll_wait);
+}
+
+/* Add a request to a device's request queue */
+void add_tx_request(struct dmu_device *dev, struct dmu_request *req)
+{
+	unsigned long flags;
+	struct chardev_transport *t = dev->transport_private;
+	struct dmu_ring *ring = &t->tx;
+	struct dmu_msg *msg;
+
+	BUG_ON(!list_empty(&req->list));
+
+	msg = get_tx_msg(ring);
+
+	if (msg) {
+		add_rx_request(req);
+		send_tx_request(msg, req);
+	} else {
+		spin_lock_irqsave(&dev->xmit_lock, flags);
+		list_add_tail(&req->list, &dev->tx_requests);
+		atomic_inc(&dev->t_reqs);
+		spin_unlock_irqrestore(&dev->xmit_lock, flags);
+
+		wake_up_interruptible(&t->tx_wqueue);
+	}
+}
+
+static int dmu_txd(void *data)
+{
+
+	struct dmu_device *dev = data;
+	struct chardev_transport *t = dev->transport_private;
+	struct dmu_ring *ring = &t->tx;
+	struct dmu_request *req = NULL;
+	struct dmu_msg *msg;
+
+	while (!kthread_should_stop()) {
+		msg = dmu_head_msg(ring, ring->r_idx);
+
+		wait_event_interruptible(t->tx_wqueue,
+					 (!msg->hdr.status &&
+					  have_pending_requests(dev)) ||
+					 kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		msg = get_tx_msg(ring);
+		if (!msg)
+			continue;
+
+		req = pluck_next_request(dev);
+		BUG_ON(!req);
+
+		send_tx_request(msg, req);
+	}
+
+	return 0;
+}
+
+static void flush_block(int read_err, unsigned int write_err, void *data)
+{
+	struct dmu_request *req = data;
+
+	if (read_err || write_err) {
+		DMERR("Failed to copy block!");
+		bio_io_error(req->bio, req->bio->bi_size);
+		return;
+	}
+
+	atomic_inc(&req->dev->f_reqs);
+	generic_make_request(req->bio);
+}
+
+static void copy_block(struct dmu_device *dev,
+		       struct block_device *src_dev,
+		       struct block_device *dst_dev,
+		       struct dmu_request *req,
+		       uint64_t org_block,
+		       uint64_t new_block,
+		       int64_t offset)
+{
+	struct io_region src, dst;
+
+	src.bdev = src_dev;
+	src.sector = dmu_sector(dev, org_block);
+	src.count = dev->block_size;
+
+	dst.bdev = dst_dev;
+	dst.sector = dmu_sector(dev, new_block);
+	dst.sector += offset;
+	dst.count = dev->block_size;
+
+	kcopyd_copy(dev->kcopy, &src, 1, &dst, 0, flush_block, req);
+}
+
+static void map_worker(struct work_struct *work)
+{
+	struct dmu_request *req;
+	struct dmu_msg_map_response *msg;
+	struct dmu_device *dev;
+	struct target_device *src_dev, *dst_dev;
+	
+	req = container_of(work, struct dmu_request, task);
+	msg = &req->response;
+	dev = req->dev;
+
+	if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) {
+		src_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min));
+		if (!src_dev) {
+			DMERR("Failed to find src device %i:%i\n",
+			      msg->src_maj, msg->src_min);
+			goto fail;
+		}
+	} else
+		src_dev = NULL;
+
+	dst_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min));
+	if (!dst_dev) {
+		DMERR("Failed to find dest device %i:%i\n",
+		      msg->dst_maj, msg->dst_min);
+		goto fail;
+	}
+
+	req->target_dev = dst_dev->bdev;
+
+	/* Remap the bio */
+	req->bio->bi_sector = dmu_sector(dev, msg->new_block) +
+		dmu_sector_offset(dev, req->bio->bi_sector) +
+		msg->offset;
+	req->bio->bi_bdev = dst_dev->bdev;
+
+	dmu_cpy_flag(&req->flags, msg->flags, DMU_FLAG_SYNC);
+
+	if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST))
+		copy_block(dev, src_dev->bdev, dst_dev->bdev, req,
+			   req->u.block, msg->new_block,
+			   msg->offset);
+	else
+		flush_block(0, 0, req);
+
+	return;
+
+ fail:
+	bio_io_error(req->bio, req->bio->bi_size);
+}
+
+static void do_make_mapping(struct dmu_device *dev,
+			    struct dmu_msg_make_mapping *msg)
+{
+	struct target_device *target;
+
+	target = find_target(dev, MKDEV(msg->dev_maj, msg->dev_min));
+	if (!target) {
+		DMERR("Failed to find target device %i:%i\n",
+		      msg->dev_maj, msg->dev_min);
+		return;
+	}
+
+	dmu_make_mapping(dev, 
+			 msg->org_block, msg->new_block, msg->offset,
+			 target->bdev, dmu_get_flag(&msg->flags, DMU_FLAG_WR));
+
+}
+
+static void do_kill_mapping(struct dmu_device *dev,
+			    struct dmu_msg_make_mapping *msg)
+{
+	if (!dmu_remove_mapping(dev, msg->org_block))
+		DMERR("Tried to remove non-existent mapping for %llu",
+		      msg->org_block);
+}
+
+static void do_map_bio(struct dmu_device *dev,
+		       struct dmu_msg_map_response *msg)
+{
+	struct dmu_request *req;
+
+	req = find_rx_request(dev, msg->id_of_req);
+	if (!req) {
+		DMERR("Unable to complete unknown map: %llu\n",
+		      (unsigned long long) msg->id_of_req);
+		return;
+	}
+
+	memcpy(&req->response, msg, sizeof(req->response));
+
+	INIT_WORK(&req->task, map_worker);
+	schedule_work(&req->task);
+}
+
+static void do_map_done(struct dmu_device *dev, uint64_t id_of_op, int fail)
+{
+	struct dmu_request *req;
+
+	req = find_rx_request(dev, id_of_op);
+	if (!req) {
+		DMERR("Unable to complete unknown request: %llu\n",
+		      (unsigned long long) id_of_op);
+		return;
+	}
+
+	dmu_clr_flag(&req->flags, DMU_FLAG_SYNC);
+
+	req->bio->bi_end_io(req->bio, req->bio->bi_size, fail);
+}
+
+static void do_map_failed(struct dmu_device *dev, uint64_t id_of_op)
+{
+	struct dmu_request *req;
+
+	req = find_rx_request(dev, id_of_op);
+	if (!req) {
+		DMERR("Unable to fail unknown request: %llu\n",
+		      (unsigned long long) id_of_op);
+		return;
+	}
+
+	DMERR("Userspace failed to map id %llu (sector %llu)",
+	      (unsigned long long) id_of_op,
+	      (unsigned long long) req->bio->bi_sector);
+
+	bio_io_error(req->bio, req->bio->bi_size);
+
+	mempool_free(req, request_pool);
+}
+
+static int dmu_rxd(void *data)
+{
+	struct dmu_device *dev = (struct dmu_device *) data;
+	struct chardev_transport *t = dev->transport_private;
+	struct dmu_ring *ring = &t->rx;
+	struct dmu_msg *msg;
+
+	while (!kthread_should_stop()) {
+		msg = dmu_head_msg(ring, ring->r_idx);
+		/* do we need this? */
+		flush_dcache_page(virt_to_page(msg));
+
+		wait_event_interruptible(t->rx_wqueue, msg->hdr.status ||
+					kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		switch (msg->hdr.msg_type) {
+		case DM_USERSPACE_MAP_BLOCK_RESP:
+			do_map_bio(dev, &msg->payload.map_rsp);
+			break;
+
+		case DM_USERSPACE_MAP_FAILED:
+			do_map_failed(dev, msg->payload.map_rsp.id_of_req);
+			break;
+
+		case DM_USERSPACE_MAP_DONE:
+			do_map_done(dev, msg->payload.map_done.id_of_op, 0);
+			break;
+
+		case DM_USERSPACE_MAP_DONE_FAILED:
+			do_map_done(dev, msg->payload.map_done.id_of_op, 1);
+			break;
+
+		case DM_USERSPACE_MAKE_MAPPING:
+			do_make_mapping(dev, &msg->payload.make_mapping);
+			break;
+
+		case DM_USERSPACE_KILL_MAPPING:
+			do_kill_mapping(dev, &msg->payload.make_mapping);
+			break;
+
+		default:
+			DMWARN("Unknown incoming request type: %i",
+			       msg->hdr.msg_type);
+		}
+
+		msg->hdr.status = 0;
+		dmu_ring_idx_inc(ring);
+	}
+
+	return 0;
+}
+
+ssize_t dmu_ctl_write(struct file *file, const char __user *buffer,
+		      size_t size, loff_t *offset)
+{
+	struct dmu_device *dev = (struct dmu_device *)file->private_data;
+	struct chardev_transport *t = dev->transport_private;
+
+	wake_up(&t->tx_wqueue);
+	wake_up(&t->rx_wqueue);
+	return size;
+}
+
+static void dmu_ring_free(struct dmu_ring *r)
+{
+	int i;
+	for (i = 0; i < DMU_RING_PAGES; i++) {
+		if (!r->r_pages[i])
+			break;
+		free_page(r->r_pages[i]);
+		r->r_pages[i] = 0;
+	}
+}
+
+static int dmu_ring_alloc(struct dmu_ring *r)
+{
+	int i;
+
+	r->r_idx = 0;
+	spin_lock_init(&r->r_lock);
+
+	for (i = 0; i < DMU_RING_PAGES; i++) {
+		r->r_pages[i] = get_zeroed_page(GFP_KERNEL);
+		if (!r->r_pages[i])
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+int dmu_ctl_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct chardev_transport *t;
+	struct dmu_device *dev;
+
+        if (!capable(CAP_SYS_ADMIN))
+                return -EACCES;
+
+	t = container_of(inode->i_cdev, struct chardev_transport, cdev);
+	dev = t->parent;
+
+	init_waitqueue_head(&t->poll_wait);
+	init_waitqueue_head(&t->tx_wqueue);
+	init_waitqueue_head(&t->rx_wqueue);
+
+	ret = dmu_ring_alloc(&t->tx);
+	if (ret)
+		return -ENOMEM;
+
+	ret = dmu_ring_alloc(&t->rx);
+	if (ret)
+		goto free_tx;
+
+	t->tx_task = kthread_run(dmu_txd, dev, "%s_tx", DM_MSG_PREFIX);
+	if (!t->tx_task)
+		goto free_rx;
+
+	t->rx_task = kthread_run(dmu_rxd, dev, "%s_rx", DM_MSG_PREFIX);
+	if (!t->rx_task) {
+		ret = -ENOMEM;
+		goto destroy_tx_task;
+	}
+
+	get_dev(dev);
+
+	file->private_data = dev;
+
+	return 0;
+destroy_tx_task:
+	kthread_stop(t->tx_task);
+free_rx:
+	dmu_ring_free(&t->rx);
+free_tx:
+	dmu_ring_free(&t->tx);
+	return ret;
+}
+
+int dmu_ctl_release(struct inode *inode, struct file *file)
+{
+	struct dmu_device *dev = (struct dmu_device *)file->private_data;
+	struct chardev_transport *t = dev->transport_private;
+
+	kthread_stop(t->rx_task);
+	kthread_stop(t->tx_task);
+
+	dmu_ring_free(&t->rx);
+	dmu_ring_free(&t->tx);
+
+	put_dev(dev);
+
+	/* Stop taking requests when there is no userspace to service them */
+	dev->request_slots = 0;
+
+	return 0;
+}
+
+unsigned dmu_ctl_poll(struct file *file, poll_table *wait)
+{
+	struct dmu_device *dev = (struct dmu_device *)file->private_data;
+	struct chardev_transport *t = dev->transport_private;
+	struct dmu_ring *ring = &t->tx;
+	struct dmu_msg *msg;
+	unsigned mask = 0;
+	u32 idx;
+	unsigned long flags;
+
+	poll_wait(file, &t->poll_wait, wait);
+
+	spin_lock_irqsave(&ring->r_lock, flags);
+
+	idx = ring->r_idx ? ring->r_idx - 1 : DMU_MAX_EVENTS - 1;
+	msg = dmu_head_msg(ring, idx);
+	if (msg->hdr.status)
+		mask |= POLLIN | POLLRDNORM;
+
+	spin_unlock_irqrestore(&ring->r_lock, flags);
+
+	return mask;
+}
+
+static int dmu_ring_map(struct vm_area_struct *vma, unsigned long addr,
+			struct dmu_ring *ring)
+{
+	int i, err;
+
+	for (i = 0; i < DMU_RING_PAGES; i++) {
+		struct page *page = virt_to_page(ring->r_pages[i]);
+		err = vm_insert_page(vma, addr, page);
+		if (err)
+			return err;
+		addr += PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int dmu_ctl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct dmu_device *dev = (struct dmu_device *)file->private_data;
+	struct chardev_transport *t = dev->transport_private;
+	unsigned long addr;
+	int err;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	if (vma->vm_end - vma->vm_start != DMU_RING_SIZE * 2) {
+		DMERR("mmap size must be %lu, not %lu \n",
+			DMU_RING_SIZE * 2, vma->vm_end - vma->vm_start);
+		return -EINVAL;
+	}
+
+	addr = vma->vm_start;
+	err = dmu_ring_map(vma, addr, &t->tx);
+	if (err)
+		return err;
+	err = dmu_ring_map(vma, addr + DMU_RING_SIZE, &t->rx);
+
+	/* Open the gates and wake anyone waiting */
+	/* FIXME: Magic number */
+	dev->request_slots = 20000;
+	wake_up_interruptible(&dev->lowmem);
+
+	return err;
+}
+
+static struct file_operations ctl_fops = {
+	.open    = dmu_ctl_open,
+	.release = dmu_ctl_release,
+	.write   = dmu_ctl_write,
+	.mmap    = dmu_ctl_mmap,
+	.poll    = dmu_ctl_poll,
+	.owner   = THIS_MODULE,
+};
+
+static int get_free_minor(void)
+{
+	struct dmu_device *dev;
+	int minor = 0;
+
+	spin_lock(&devices_lock);
+
+	while (1) {
+		list_for_each_entry(dev, &devices, list) {
+			struct chardev_transport *t = dev->transport_private;
+			if (MINOR(t->ctl_dev) == minor)
+				goto dupe;
+		}
+		break;
+	dupe:
+		minor++;
+	}
+
+	spin_unlock(&devices_lock);
+
+	return minor;
+}
+
+int register_chardev_transport(struct dmu_device *dev)
+{
+	struct chardev_transport *t;
+	int ret;
+
+	dev->transport_private = kmalloc(sizeof(struct chardev_transport),
+					 GFP_KERNEL);
+	t = dev->transport_private;
+
+	if (!t) {
+		DMERR("Failed to allocate chardev transport");
+		goto bad;
+	}
+
+	t->ctl_dev = MKDEV(MAJOR(dmu_dev), get_free_minor());
+	t->parent = dev;
+
+	cdev_init(&t->cdev, &ctl_fops);
+	t->cdev.owner = THIS_MODULE;
+	t->cdev.ops = &ctl_fops;
+
+	ret = cdev_add(&t->cdev, t->ctl_dev, 1);
+	if (ret < 0) {
+		DMERR("Failed to register control device %d:%d",
+		       MAJOR(t->ctl_dev), MINOR(t->ctl_dev));
+		goto bad;
+	}
+
+	return 1;
+
+ bad:
+	kfree(t);
+	return 0;
+}
+
+void unregister_chardev_transport(struct dmu_device *dev)
+{
+	struct chardev_transport *t = dev->transport_private;
+
+	cdev_del(&t->cdev);
+	kfree(t);
+}
+
+int init_chardev_transport(void)
+{
+	int r;
+
+	r = alloc_chrdev_region(&dmu_dev, 0, 10, "dm-userspace");
+	if (r) {
+		DMERR("Failed to allocate chardev region");
+		return 0;
+	} else
+		return 1;
+}
+
+void cleanup_chardev_transport(void)
+{
+	unregister_chrdev_region(dmu_dev, 10);
+}
+
+void write_chardev_transport_info(struct dmu_device *dev,
+			char *buf, unsigned int maxlen)
+{
+	struct chardev_transport *t = dev->transport_private;
+
+	snprintf(buf, maxlen, "%x:%x",
+		 MAJOR(t->ctl_dev), MINOR(t->ctl_dev));
+}
diff -r 50f87a6ffd94 drivers/md/dm-userspace.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/md/dm-userspace.c	Mon Jan 29 14:28:05 2007 -0800
@@ -0,0 +1,568 @@
+/*
+ * Copyright IBM Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/types.h>
+#include <linux/poll.h>
+
+#include <linux/dm-userspace.h>
+
+#include "dm.h"
+#include "dm-bio-list.h"
+#include "kcopyd.h"
+#include "dm-user.h"
+
+#define DMU_COPY_PAGES     256
+
+#define DM_MSG_PREFIX     "dm-userspace"
+
+struct kmem_cache *request_cache;
+mempool_t *request_pool;
+
+spinlock_t devices_lock;
+LIST_HEAD(devices);
+
+/* Device number for the control device */
+dev_t dmu_dev;
+
+void endio_worker(struct work_struct *work)
+{
+	struct dmu_request *req;
+	struct dmu_device *dev;
+
+	req = container_of(work, struct dmu_request, task);
+	dev  = req->dev;
+
+	spin_lock(&dev->lock);
+	if (list_empty(&req->list) && list_empty(&req->copy)) {
+		mempool_free(req, request_pool);
+		atomic_dec(&dev->f_reqs);
+		atomic_dec(&dev->total);
+		wake_up_interruptible(&dev->lowmem);
+	} else {
+		PREPARE_WORK(&req->task, endio_worker);
+		schedule_work(&req->task);
+	}
+	spin_unlock(&dev->lock);
+}
+
+/* Return an already-bound target device */
+struct target_device *find_target(struct dmu_device *dev,
+					 dev_t devno)
+{
+	struct target_device *target, *match = NULL;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(target, &dev->target_devs, list) {
+		if (target->bdev->bd_dev == devno) {
+			match = target;
+			break;
+		}
+	}
+	spin_unlock(&dev->lock);
+
+	return match;
+}
+
+/* Find a new target device and bind it to our device */
+static struct target_device *get_target(struct dmu_device *dev,
+					dev_t devno)
+{
+	struct target_device *target;
+	struct block_device *bdev;
+
+	target = find_target(dev, devno);
+	if (target)
+		return target;
+
+	bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE);
+	if (IS_ERR(bdev)) {
+		DMERR("Unable to lookup device %x", devno);
+		return NULL;
+	}
+
+	target = kmalloc(sizeof(*target), GFP_KERNEL);
+	if (!target) {
+		DMERR("Unable to alloc new target device");
+		return NULL;
+	}
+
+	target->bdev = bdev;
+	INIT_LIST_HEAD(&target->list);
+
+	if (in_interrupt())
+		DMERR("%s in irq\n", __FUNCTION__);
+
+	spin_lock(&dev->lock);
+	list_add_tail(&target->list, &dev->target_devs);
+	spin_unlock(&dev->lock);
+
+	return target;
+}
+
+/* Caller must hold dev->lock */
+static void put_target(struct dmu_device *dev,
+		       struct target_device *target)
+{
+	list_del(&target->list);
+
+	bd_release(target->bdev);
+	blkdev_put(target->bdev);
+
+	kfree(target);
+}
+
+void destroy_dmu_device(struct kref *ref)
+{
+	struct dmu_device *dev;
+	struct list_head *cursor, *next;
+	int i;
+
+	dev = container_of(ref, struct dmu_device, users);
+
+	spin_lock(&devices_lock);
+	list_del(&dev->list);
+	spin_unlock(&devices_lock);
+
+	list_for_each_safe(cursor, next, &dev->target_devs) {
+		struct target_device *target;
+
+		target = list_entry(cursor,
+				    struct target_device,
+				    list);
+
+		put_target(dev, target);
+	}
+
+	list_for_each_safe(cursor, next, &dev->tx_requests) {
+		struct dmu_request *req;
+
+		req = list_entry(cursor,
+				 struct dmu_request,
+				 list);
+
+		DMERR("Failing unsent bio");
+		bio_io_error(req->bio, req->bio->bi_size);
+
+		list_del(&req->list);
+
+		mempool_free(req, request_pool);
+	}
+
+	for (i = 0; i < DMU_CP_HASH; i++) {
+		list_for_each_safe(cursor, next, &dev->rx_requests[i]) {
+			struct dmu_request *req;
+
+			req = list_entry(cursor,
+					 struct dmu_request,
+					 list);
+
+			DMERR("Failing bio");
+			req->flags = 0;
+			bio_io_error(req->bio, req->bio->bi_size);
+
+			list_del(&req->list);
+
+			mempool_free(req, request_pool);
+		}
+	}
+
+	dmu_remove_all_mappings(dev);
+
+	kcopyd_client_destroy(dev->kcopy);
+	unregister_chardev_transport(dev);
+
+	kfree(dev);
+}
+
+static int init_dmu_device(struct dmu_device *dev, u32 block_size)
+{
+	int ret, i;
+
+	init_waitqueue_head(&dev->lowmem);
+	INIT_LIST_HEAD(&dev->list);
+	INIT_LIST_HEAD(&dev->target_devs);
+	kref_init(&dev->users);
+	spin_lock_init(&dev->lock);
+	spin_lock_init(&dev->xmit_lock);
+
+	INIT_LIST_HEAD(&dev->tx_requests);
+
+	dev->rx_requests = kmalloc(sizeof(struct list_head) * DMU_CP_HASH,
+				   GFP_KERNEL);
+	if (!dev->rx_requests) {
+		DMERR("Failed to alloc RX hash\n");
+		return 0;
+	}
+
+	for (i = 0; i < DMU_CP_HASH; i++)
+		INIT_LIST_HEAD(&dev->rx_requests[i]);
+
+	dev->block_size  = block_size;
+	dev->block_mask  = block_size - 1;
+	dev->block_shift = ffs(block_size) - 1;
+
+	atomic_set(&dev->t_reqs, 0);
+	atomic_set(&dev->r_reqs, 0);
+	atomic_set(&dev->f_reqs, 0);
+	atomic_set(&dev->total, 0);
+	atomic_set(&dev->idcounter, 0);
+
+	dmu_alloc_mappings(&dev->mappings, 2048);
+
+	ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopy);
+	if (ret) {
+		DMERR("Failed to initialize kcopyd client");
+		return 0;
+	}
+
+	dev->request_slots = 0; /* Unable to queue reqs right away */
+
+	return 1;
+}
+
+static struct dmu_device *new_dmu_device(char *key,
+					 struct dm_target *ti,
+					 u32 block_size)
+{
+	struct dmu_device *dev;
+	int                ret;
+
+	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		DMERR("Failed to allocate new userspace device");
+		return NULL;
+	}
+
+	if (!init_dmu_device(dev, block_size))
+		goto bad1;
+
+	snprintf(dev->key, DMU_KEY_LEN, "%s", key);
+
+	ret = register_chardev_transport(dev);
+	if (!ret)
+		goto bad2;
+
+	spin_lock(&devices_lock);
+	list_add(&dev->list, &devices);
+	spin_unlock(&devices_lock);
+
+	return dev;
+
+ bad2:
+	put_dev(dev);
+ bad1:
+	kfree(dev);
+	DMERR("Failed to create device");
+	return NULL;
+}
+
+static struct dmu_device *find_dmu_device(const char *key)
+{
+	struct dmu_device *dev;
+	struct dmu_device *match = NULL;
+
+	spin_lock(&devices_lock);
+
+	list_for_each_entry(dev, &devices, list) {
+		spin_lock(&dev->lock);
+		if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) {
+			match = dev;
+			spin_unlock(&dev->lock);
+			break;
+		}
+		spin_unlock(&dev->lock);
+	}
+
+	spin_unlock(&devices_lock);
+
+	return match;
+}
+
+static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	uint64_t block_size;
+	struct dmu_device *dev;
+	char *device_key;
+	char *block_size_param;
+	int target_idx = 2;
+
+	if (argc < 3) {
+		ti->error = "Invalid argument count";
+		return -EINVAL;
+	}
+
+	device_key = argv[0];
+	block_size_param = argv[1];
+
+	block_size = simple_strtoul(block_size_param, NULL, 10) / 512;
+
+	dev = find_dmu_device(device_key);
+	if (!dev) {
+		dev = new_dmu_device(device_key, ti, block_size);
+		if (!dev) {
+			ti->error = "Failed to create device";
+			goto bad;
+		}
+	} else
+		get_dev(dev);
+
+	spin_lock(&dev->lock);
+	if (dev->block_size != block_size) {
+		ti->error = "Invalid block size";
+		goto bad;
+	}
+	spin_unlock(&dev->lock);
+
+	/* Resolve target devices */
+	do {
+		int maj, min;
+		sscanf(argv[target_idx], "%i:%i", &maj, &min);
+		if (!get_target(dev, MKDEV(maj, min))) {
+			DMERR("Failed to find target device %i:%i (%s)",
+			      maj, min, argv[target_idx]);
+			goto out;
+		}
+	} while (++target_idx < argc);
+
+	ti->private  = dev;
+	ti->split_io = block_size;
+
+	return 0;
+
+ bad:
+	if (dev)
+		spin_unlock(&dev->lock);
+ out:
+	if (dev)
+		put_dev(dev);
+
+	return -EINVAL;
+}
+
+static void dmu_dtr(struct dm_target *ti)
+{
+	struct dmu_device *dev = (struct dmu_device *) ti->private;
+
+	put_dev(dev);
+}
+
+static void init_req(struct dmu_device *dev,
+		     struct bio *bio,
+		     struct dmu_request *req)
+{
+	req->id = (uint64_t) atomic_add_return(1, &dev->idcounter);
+
+	req->type = DM_USERSPACE_MAP_BLOCK_REQ;
+	req->dev = dev;
+	req->bio = bio;
+	req->u.block = dmu_block(dev, bio->bi_sector);
+	req->flags = 0;
+	INIT_LIST_HEAD(&req->deps);
+	INIT_LIST_HEAD(&req->list);
+	INIT_LIST_HEAD(&req->copy);
+
+	if (bio_rw(bio))
+		dmu_set_flag(&req->flags, DMU_FLAG_WR);
+}
+
+static int dmu_map(struct dm_target *ti, struct bio *bio,
+		   union map_info *map_context)
+{
+	struct dmu_device *dev = (struct dmu_device *) ti->private;
+	struct dmu_request *req;
+
+	if (unlikely(bio_barrier(bio))) {
+		DMINFO("Refusing bio barrier\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (dmu_map_from_mappings(dev, bio)) {
+		map_context->ptr = NULL;
+		return 1;
+	}
+
+	wait_event_interruptible(dev->lowmem,
+				 atomic_read(&dev->total) < 
+				 dev->request_slots);
+
+	req = mempool_alloc(request_pool, GFP_NOIO);
+	if (!req) {
+		DMERR("Failed to alloc request");
+		return -1;
+	}
+
+	atomic_inc(&dev->total);
+
+	map_context->ptr = req;
+
+	init_req(dev, bio, req);
+
+	add_tx_request(dev, req);
+
+	return 0;
+}
+
+static int dmu_status(struct dm_target *ti, status_type_t type,
+		      char *result, unsigned int maxlen)
+{
+	struct dmu_device *dev = (struct dmu_device *) ti->private;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		write_chardev_transport_info(dev, result, maxlen);
+		break;
+
+	case STATUSTYPE_TABLE:
+		snprintf(result, maxlen, "%s %llu",
+			 dev->key,
+			 (unsigned long long) dev->block_size * 512);
+		break;
+	}
+
+	return 0;
+}
+
+static int dmu_end_io(struct dm_target *ti, struct bio *bio,
+                        int error, union map_info *map_context)
+{
+	struct dmu_request *req = map_context->ptr;
+	int ret = 0;
+
+	if (error)
+		return -1;
+
+	if (!req)
+		return 0;
+
+	if (dmu_get_flag(&req->flags, DMU_FLAG_SYNC)) {
+		req->type = DM_USERSPACE_MAP_DONE;
+		add_tx_request(req->dev, req);
+		ret = 1;
+	} else {
+		INIT_WORK(&req->task, endio_worker);
+		schedule_work(&req->task);
+	}
+
+	return ret;
+}
+
+struct target_type userspace_target = {
+	.name    = "userspace",
+	.version = {0, 1, 0},
+	.module  = THIS_MODULE,
+	.ctr     = dmu_ctr,
+	.dtr     = dmu_dtr,
+	.map     = dmu_map,
+	.status  = dmu_status,
+	.end_io  = dmu_end_io
+};
+
+int __init dm_userspace_init(void)
+{
+	int r = dm_register_target(&userspace_target);
+	if (r < 0) {
+		DMERR("Register failed %d", r);
+		return 0;
+	}
+
+	spin_lock_init(&devices_lock);
+
+	request_cache =
+		kmem_cache_create("dm-userspace-requests",
+				  sizeof(struct dmu_request),
+				  __alignof__ (struct dmu_request),
+				  0, NULL, NULL);
+	if (!request_cache) {
+		DMERR("Failed to allocate request cache");
+		goto bad;
+	}
+
+	request_pool = mempool_create(64,
+				      mempool_alloc_slab, mempool_free_slab,
+				      request_cache);
+	if (!request_pool) {
+		DMERR("Failed to allocate request pool");
+		goto bad2;
+	}
+
+	r = dmu_init_mappings();
+	if (!r)
+		goto bad3;
+
+	r = init_chardev_transport();
+	if (!r)
+		goto bad4;
+
+	return 1;
+ bad4:
+	dmu_cleanup_mappings();
+ bad3:
+	mempool_destroy(request_pool);
+ bad2:
+	kmem_cache_destroy(request_cache);
+ bad:
+	dm_unregister_target(&userspace_target);
+
+	return 0;
+}
+
+void __exit dm_userspace_exit(void)
+{
+	int r;
+	struct list_head *cursor, *next;
+	struct dmu_device *dev;
+
+	spin_lock(&devices_lock);
+
+	list_for_each_safe(cursor, next, &devices) {
+		dev = list_entry(cursor, struct dmu_device, list);
+		list_del(cursor);
+		destroy_dmu_device(&dev->users);
+		DMERR("Destroying hanging device %s", dev->key);
+	}
+
+	spin_unlock(&devices_lock);
+
+	cleanup_chardev_transport();
+
+	mempool_destroy(request_pool);
+	kmem_cache_destroy(request_cache);
+
+	dmu_cleanup_mappings();
+
+	r = dm_unregister_target(&userspace_target);
+	if (r < 0)
+		DMERR("unregister failed %d", r);
+}
+
+module_init(dm_userspace_init);
+module_exit(dm_userspace_exit);
+
+MODULE_DESCRIPTION(DM_NAME " userspace target");
+MODULE_AUTHOR("Dan Smith");
+MODULE_LICENSE("GPL");
diff -r 50f87a6ffd94 include/linux/dm-userspace.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/linux/dm-userspace.h	Mon Jan 29 14:28:05 2007 -0800
@@ -0,0 +1,123 @@
+/*
+ * Copyright IBM Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is released under the LGPL
+ *
+ */
+
+#ifndef __DM_USERSPACE_H
+#define __DM_USERSPACE_H
+
+#include <linux/types.h>
+
+/*
+ * Message Types
+ */
+#define DM_USERSPACE_MAP_BLOCK_REQ    1
+#define DM_USERSPACE_MAP_BLOCK_RESP   2
+#define DM_USERSPACE_MAP_FAILED       3
+#define DM_USERSPACE_MAP_DONE         4
+#define DM_USERSPACE_MAP_DONE_FAILED  5
+#define DM_USERSPACE_MAKE_MAPPING     6
+#define DM_USERSPACE_KILL_MAPPING     7
+
+/*
+ * Flags and associated macros
+ */
+#define DMU_FLAG_VALID       1
+#define DMU_FLAG_WR          2
+#define DMU_FLAG_COPY_FIRST  4
+#define DMU_FLAG_SYNC        8
+
+static inline int dmu_get_flag(uint32_t *flags, uint32_t flag)
+{
+	return (*flags & flag) != 0;
+}
+
+static inline void dmu_set_flag(uint32_t *flags, uint32_t flag)
+{
+	*flags |= flag;
+}
+
+static inline void dmu_clr_flag(uint32_t *flags, uint32_t flag)
+{
+	*flags &= (~flag);
+}
+
+static inline void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
+{
+	*flags = (*flags & ~flag) | (src & flag);
+}
+
+/*
+ * This message header is sent in front of every message, in both
+ * directions
+ */
+struct dmu_msg_header {
+	uint64_t id;
+	uint32_t msg_type;
+	uint32_t payload_len;
+	uint32_t status;
+	uint32_t padding;
+};
+
+/* DM_USERSPACE_MAP_DONE
+ * DM_USERSPACE_MAP_DONE_FAILED
+ */
+struct dmu_msg_map_done {
+	uint64_t id_of_op;
+	uint64_t org_block;
+	uint32_t flags;
+};
+
+/* DM_USERSPACE_MAP_BLOCK_REQ */
+struct dmu_msg_map_request {
+	uint64_t org_block;
+
+	uint32_t flags;
+};
+
+struct dmu_msg_make_mapping {
+	uint64_t org_block;
+	uint64_t new_block;
+	int64_t offset;
+	uint32_t dev_maj;
+	uint32_t dev_min;
+	uint32_t flags;
+};
+
+/* DM_USERSPACE_MAP_BLOCK_RESP
+ * DM_USERSPACE_MAP_BLOCK_FAILED
+ */
+struct dmu_msg_map_response {
+	uint64_t new_block;
+	int64_t offset;
+
+	uint64_t id_of_req;
+	uint32_t flags;
+
+	uint32_t src_maj;
+	uint32_t src_min;
+
+	uint32_t dst_maj;
+	uint32_t dst_min;
+};
+
+/* A full message */
+struct dmu_msg {
+	struct dmu_msg_header hdr;
+	union {
+		struct dmu_msg_map_done map_done;
+		struct dmu_msg_map_request map_req;
+		struct dmu_msg_map_response map_rsp;
+		struct dmu_msg_make_mapping make_mapping;
+	} payload;
+};
+
+#define DMU_RING_SIZE (1UL << 16)
+#define DMU_RING_PAGES (DMU_RING_SIZE >> PAGE_SHIFT)
+#define DMU_EVENT_PER_PAGE (PAGE_SIZE / sizeof(struct dmu_msg))
+#define DMU_MAX_EVENTS (DMU_EVENT_PER_PAGE * DMU_RING_PAGES)
+
+#endif

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.1.3: dm-user_lib.patch --]
[-- Type: text/x-patch, Size: 24673 bytes --]

diff -r 0200430c78db configure
--- a/configure	Thu Jan 25 23:36:05 2007 +0000
+++ b/configure	Mon Jan 29 14:32:56 2007 -0800
@@ -310,7 +310,7 @@ ac_includes_default="\
 #endif"
 
 ac_default_prefix=/usr
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB LIBOBJS MSGFMT usrlibdir JOBS STATIC_LINK OWNER GROUP interface kerneldir missingkernel kernelvsn tmpdir COPTIMISE_FLAG CLDFLAGS LDDEPS LIB_SUFFIX DEBUG DM_LIB_VERSION COMPAT DMIOCTLS LOCALEDIR INTL_PACKAGE INTL DEVICE_UID DEVICE_GID DEVICE_MODE DMEVENTD PKGCONFIG LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB LIBOBJS MSGFMT usrlibdir JOBS STATIC_LINK OWNER GROUP interface kerneldir missingkernel kernelvsn tmpdir COPTIMISE_FLAG CLDFLAGS LDDEPS LIB_SUFFIX DEBUG DM_LIB_VERSION COMPAT DMIOCTLS LOCALEDIR INTL_PACKAGE INTL DEVICE_UID DEVICE_GID DEVICE_MODE DMEVENTD PKGCONFIG DMU LTLIBOBJS'
 ac_subst_files=''
 
 # Initialize some variables set by options.
@@ -856,6 +856,7 @@ Optional Features:
                           statically.  Default is dynamic linking
   --disable-selinux       Disable selinux support
   --enable-nls            Enable Native Language Support
+  --disable-dmu        Disable dm-userspace support
 
 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@@ -1445,7 +1446,8 @@ case "$host_os" in
 		LDDEPS="$LDDEPS .export.sym"
 		LIB_SUFFIX="so"
 		DMIOCTLS="yes"
-		SELINUX="yes" ;;
+		SELINUX="yes"
+		DMU="yes" ;;
 	darwin*)
 		CFLAGS="$CFLAGS -no-cpp-precomp -fno-common"
 		COPTIMISE_FLAG="-O2"
@@ -1453,7 +1455,8 @@ case "$host_os" in
 		LDDEPS="$LDDEPS"
 		LIB_SUFFIX="dylib"
 		DMIOCTLS="no"
-		SELINUX="no" ;;
+		SELINUX="no"
+		DMU="no" ;;
 esac
 
 ################################################################################
@@ -5963,6 +5966,26 @@ fi
 fi
 
 ################################################################################
+echo "$as_me:$LINENO: checking whether to enable dm-userspace" >&5
+echo $ECHO_N "checking whether to enable dm-userspace... $ECHO_C" >&6
+# Check whether --enable-dmu or --disable-dmu was given.
+if test "${enable_dmu+set}" = set; then
+  enableval="$enable_dmu"
+  DMU=$enableval
+fi;
+echo "$as_me:$LINENO: result: $DMU" >&5
+echo "${ECHO_T}$DMU" >&6
+
+if test "x${DMU}" = "xyes"; then
+	if test "x${missingkernel}" = xyes; then
+		{ { echo "$as_me:$LINENO: error: \"Kernel source required to build dm-userspace tools\"" >&5
+echo "$as_me: error: \"Kernel source required to build dm-userspace tools\"" >&2;}
+   { (exit 1); exit 1; }; }
+	fi
+fi
+
+
+################################################################################
 echo "$as_me:$LINENO: checking for kernel version" >&5
 echo $ECHO_N "checking for kernel version... $ECHO_C" >&6
 
@@ -6044,6 +6067,7 @@ _ACEOF
 
 
 ################################################################################
+
 
 
 if test "$DMEVENTD" = yes; then
@@ -6799,6 +6823,7 @@ s,@DEVICE_MODE@,$DEVICE_MODE,;t t
 s,@DEVICE_MODE@,$DEVICE_MODE,;t t
 s,@DMEVENTD@,$DMEVENTD,;t t
 s,@PKGCONFIG@,$PKGCONFIG,;t t
+s,@DMU@,$DMU,;t t
 s,@LTLIBOBJS@,$LTLIBOBJS,;t t
 CEOF
 
diff -r 0200430c78db configure.in
--- a/configure.in	Thu Jan 25 23:36:05 2007 +0000
+++ b/configure.in	Mon Jan 29 14:32:56 2007 -0800
@@ -38,7 +38,8 @@ case "$host_os" in
 		LDDEPS="$LDDEPS .export.sym"
 		LIB_SUFFIX="so"
 		DMIOCTLS="yes"
-		SELINUX="yes" ;;
+		SELINUX="yes"
+		DMU="yes" ;;
 	darwin*)
 		CFLAGS="$CFLAGS -no-cpp-precomp -fno-common"
 		COPTIMISE_FLAG="-O2"
@@ -46,7 +47,8 @@ case "$host_os" in
 		LDDEPS="$LDDEPS"
 		LIB_SUFFIX="dylib"
 		DMIOCTLS="no"
-		SELINUX="no" ;;
+		SELINUX="no"
+		DMU="no" ;;
 esac
 
 ################################################################################
@@ -296,6 +298,20 @@ else
 else
   test -d "${kerneldir}" || { AC_MSG_WARN(kernel dir $kerneldir not found); missingkernel=yes ; }
 fi
+
+################################################################################
+dnl -- Disable dm-userspace
+AC_MSG_CHECKING(whether to enable dm-userspace)
+AC_ARG_ENABLE(dmu, [  --disable-dmu        Disable dm-userspace support],
+DMU=$enableval)
+AC_MSG_RESULT($DMU)
+
+if test "x${DMU}" = "xyes"; then
+	if test "x${missingkernel}" = xyes; then
+		AC_ERROR("Kernel source required to build dm-userspace tools")
+	fi
+fi
+	
 
 ################################################################################
 dnl -- Kernel version string
@@ -413,6 +429,7 @@ AC_SUBST(DEVICE_MODE)
 AC_SUBST(DEVICE_MODE)
 AC_SUBST(DMEVENTD)
 AC_SUBST(PKGCONFIG)
+AC_SUBST(DMU)
 
 ################################################################################
 dnl -- First and last lines should not contain files to generate in order to 
diff -r 0200430c78db lib/.exported_symbols
--- a/lib/.exported_symbols	Thu Jan 25 23:36:05 2007 +0000
+++ b/lib/.exported_symbols	Mon Jan 29 14:32:56 2007 -0800
@@ -127,3 +127,26 @@ dm_report_field_uint32
 dm_report_field_uint32
 dm_report_field_uint64
 dm_report_field_set_value
+dmu_async_map
+dmu_async_map_done
+dmu_ctl_close
+dmu_ctl_open
+dmu_ctl_send_queue
+dmu_events_pending
+dmu_get_ctl_fd
+dmu_kill_mapping
+dmu_make_mapping
+dmu_map_dup
+dmu_map_get_block
+dmu_map_get_id
+dmu_map_is_write
+dmu_map_set_block
+dmu_map_set_copy_src_dev
+dmu_map_set_dest_dev
+dmu_map_set_offset
+dmu_map_set_origin_block
+dmu_map_set_sync
+dmu_map_set_writable
+dmu_process_events
+dmu_register_map_done_handler
+dmu_register_map_handler
\ No newline at end of file
diff -r 0200430c78db lib/Makefile.in
--- a/lib/Makefile.in	Thu Jan 25 23:36:05 2007 +0000
+++ b/lib/Makefile.in	Mon Jan 29 14:32:56 2007 -0800
@@ -16,6 +16,7 @@ top_srcdir = @top_srcdir@
 top_srcdir = @top_srcdir@
 VPATH = @srcdir@
 interface = @interface@
+kerneldir = @kerneldir@
 
 SOURCES =\
 	datastruct/bitset.c \
@@ -30,6 +31,11 @@ SOURCES =\
 	$(interface)/libdm-iface.c
 
 INCLUDES = -I$(interface)
+
+ifeq ("@DMU@", "yes")
+  INCLUDES += -I$(kerneldir)/include
+  SOURCES += dmu.c
+endif
 
 LIB_STATIC = $(interface)/libdevmapper.a
 
diff -r 0200430c78db lib/libdevmapper.h
--- a/lib/libdevmapper.h	Thu Jan 25 23:36:05 2007 +0000
+++ b/lib/libdevmapper.h	Mon Jan 29 14:32:56 2007 -0800
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+ * Copyright IBM Corp., 2006
  *
  * This file is part of the device-mapper userspace tools.
  *
@@ -27,6 +28,7 @@
 #include <limits.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stdint.h>
 
 /*****************************************************************
  * The first section of this file provides direct access to the 
@@ -711,4 +713,58 @@ void dm_report_field_set_value(struct dm
 void dm_report_field_set_value(struct dm_report_field *field, const void *value,
 			       const void *sortvalue);
 
+
+/**************
+ * dm-userspace
+ **************/
+
+struct dmu_context;
+struct dmu_map_data;
+
+/* Returns 1 to allow IO to complete, 0 to delay */
+typedef int (*map_done_handler_t)(void *data, struct dmu_map_data *map_data);
+
+/* Returns 1 to map IO, -1 to fail IO, 0 to delay */
+typedef int (*map_req_handler_t)(void *data, struct dmu_map_data *map_data);
+
+/* High-level control operations */
+struct dmu_context *dmu_ctl_open(char *dev, int flags);
+int dmu_ctl_close(struct dmu_context *ctx);
+int dmu_ctl_send_queue(struct dmu_context *ctx);
+void dmu_register_map_done_handler(struct dmu_context *ctx,
+				   map_done_handler_t handler,
+				   void *data);
+void dmu_register_map_handler(struct dmu_context *ctx,
+                              map_req_handler_t handler,
+                              void *data);
+int dmu_invalidate_block(struct dmu_context *ctx, uint64_t block);
+int dmu_events_pending(struct dmu_context *ctx, unsigned int msec);
+int dmu_process_events(struct dmu_context *ctx);
+int dmu_get_ctl_fd(struct dmu_context *ctx);
+
+/* Map manipulation functions */
+void dmu_map_set_block(struct dmu_map_data *data, uint64_t block);
+void dmu_map_set_origin_block(struct dmu_map_data *data, uint64_t block);
+uint64_t dmu_map_get_block(struct dmu_map_data *data);
+void dmu_map_set_offset(struct dmu_map_data *data, int64_t offset);
+uint32_t dmu_map_get_id(struct dmu_map_data *data);
+void dmu_map_set_dest_dev(struct dmu_map_data *data, dev_t dev);
+void dmu_map_set_copy_src_dev(struct dmu_map_data *data, dev_t dev);
+int dmu_map_is_write(struct dmu_map_data *data);
+void dmu_map_set_sync(struct dmu_map_data *data);
+void dmu_map_set_writable(struct dmu_map_data *data, int rw);
+struct dmu_map_data *dmu_map_dup(struct dmu_map_data *data);
+
+/* Functions for submitting out-of-order events */
+int dmu_async_map(struct dmu_context *ctx, 
+		  struct dmu_map_data *data, 
+		  int fail);
+int dmu_async_map_done(struct dmu_context *ctx, uint64_t id, int fail);
+
+/* Functions to manipulate the kernel map cache */
+int dmu_make_mapping(struct dmu_context *ctx,
+		     struct dmu_map_data *data);
+int dmu_kill_mapping(struct dmu_context *ctx,
+		     struct dmu_map_data *data);
+
 #endif				/* LIB_DEVICE_MAPPER_H */
diff -r 0200430c78db lib/dmu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/dmu.c	Mon Jan 29 14:32:56 2007 -0800
@@ -0,0 +1,638 @@
+/*
+ * Copyright IBM Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <libdevmapper.h>
+#include <linux/dm-userspace.h>
+#include <sys/mman.h>
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+
+#define MAX_MAJ_VER 0
+#define MAX_MIN_VER 1
+
+#define DMU_MSG_DEBUG 0
+
+struct uring {
+        uint32_t idx;
+        char *buf;
+        int size;
+};
+
+#if DMU_MSG_DEBUG
+#define DPRINTF( s, arg... ) fprintf(stderr, s, ##arg)
+#else
+#define DPRINTF( s, arg... )
+#endif
+
+struct dmu_events {
+	map_done_handler_t map_done_fn;
+	map_req_handler_t map_fn;
+};
+
+struct dmu_event_data {
+	void *map_done_user_data;
+	void *map_user_data;
+};
+
+struct dmu_context {
+	int fd;
+	uint32_t id_ctr;
+	struct dmu_events events;
+	struct dmu_event_data event_data;
+	
+	struct uring ukring;
+	struct uring kuring;
+
+	uint32_t pending;
+};
+
+struct dmu_map_data {
+	uint64_t org_block;
+	uint64_t block;
+	int64_t offset;
+	uint32_t id;
+	uint32_t flags;
+	dev_t dest_dev;
+	dev_t copy_src_dev;
+};
+
+void dmu_map_set_origin_block(struct dmu_map_data *data, uint64_t block)
+{
+	data->org_block = block;
+}
+
+void dmu_map_set_writable(struct dmu_map_data *data, int rw)
+{
+	dmu_set_flag(&data->flags, DMU_FLAG_WR);
+}
+
+void dmu_map_set_block(struct dmu_map_data *data, uint64_t block)
+{
+	data->block = block;
+}
+
+uint64_t dmu_map_get_block(struct dmu_map_data *data)
+{
+	return data->block;
+}
+
+void dmu_map_set_offset(struct dmu_map_data *data, int64_t offset)
+{
+	data->offset = offset;
+}
+
+uint32_t dmu_map_get_id(struct dmu_map_data *data)
+{
+	return data->id;
+}
+
+void dmu_map_set_dest_dev(struct dmu_map_data *data, dev_t dev)
+{
+	data->dest_dev = dev;
+}
+
+void dmu_map_set_copy_src_dev(struct dmu_map_data *data, dev_t dev)
+{
+	data->copy_src_dev = dev;
+	dmu_set_flag(&data->flags, DMU_FLAG_COPY_FIRST);
+}
+
+int dmu_map_is_write(struct dmu_map_data *data)
+{
+	return dmu_get_flag(&data->flags, DMU_FLAG_WR);
+}
+
+void dmu_map_set_sync(struct dmu_map_data *data)
+{
+	dmu_set_flag(&data->flags, DMU_FLAG_SYNC);
+}
+
+struct dmu_map_data *dmu_map_dup(struct dmu_map_data *data)
+{
+	struct dmu_map_data *dup;
+
+	dup = malloc(sizeof(*dup));
+	if (!dup)
+		return NULL;
+
+	if (data)
+		memcpy(dup, data, sizeof(*dup));
+
+	return dup;
+}
+
+/*
+ * Get the major/minor of the character control device that @dm_device
+ * has exported for us.  We do this by looking at the device status
+ * string.
+ */
+static int get_dm_control_dev(char *dm_device,
+			       unsigned *maj, unsigned *min)
+{
+	struct dm_task *task;
+	int ret;
+	void *next = NULL;
+	uint64_t start, length;
+	char *ttype = NULL, *params = NULL;
+
+	task = dm_task_create(DM_DEVICE_STATUS);
+
+	ret = dm_task_set_name(task, dm_device);
+	if (!ret) {
+		DPRINTF("Failed to set device-mapper target name\n");
+		dm_task_destroy(task);
+		return -1;
+	}
+
+	ret = dm_task_run(task);
+	if (!ret) {
+		DPRINTF("Failed to run device-mapper task\n");
+		dm_task_destroy(task);
+		return -1;
+	}
+
+	ret = 0;
+	do {
+		next = dm_get_next_target(task, next, &start, &length,
+					  &ttype, &params);
+
+		if (strcmp(ttype, "userspace") == 0) {
+			ret = sscanf(params, "%x:%x", maj, min);
+			if (ret == 2)
+				break;
+		}
+
+	} while (next);
+
+	return 0;
+}
+
+/*
+ * Create the character device node for our control channel
+ */
+static int make_device_node(unsigned major, unsigned minor)
+{
+	char path[256];
+
+	sprintf(path, "/dev/dmu%i", minor);
+
+	return mknod(path, S_IFCHR, makedev(major, minor));
+}
+
+static char *dmu_get_ctl_device(char *dm_device)
+{
+	unsigned ctl_major, ctl_minor;
+	static char path[256];
+
+	if (get_dm_control_dev(dm_device, &ctl_major, &ctl_minor) < 0)
+		return NULL;
+
+	if (ctl_major == 0) {
+		DPRINTF("Unable to get device number\n");
+		return NULL;
+	}
+
+	sprintf(path, "/dev/dmu%i", ctl_minor);
+
+	if (access(path, R_OK | W_OK)) {
+		if (make_device_node(ctl_major, ctl_minor)) {
+			DPRINTF("Failed to create device node: %s",
+				strerror(errno));
+			return NULL;
+		}
+	}
+
+	return path;
+}
+
+static void dmu_split_dev(dev_t dev, uint32_t *maj, uint32_t *min)
+{
+	*maj = (dev & 0xFF00) >> 8;
+	*min = (dev & 0x00FF);
+}
+
+static inline void ring_index_inc(struct uring *ring)
+{
+        ring->idx = (ring->idx == DMU_MAX_EVENTS - 1) ? 0 : ring->idx + 1;
+}
+
+static inline struct dmu_msg *head_ring_hdr(struct uring *ring)
+{
+        uint32_t pidx, off, pos;
+
+        pidx = ring->idx / DMU_EVENT_PER_PAGE;
+        off = ring->idx % DMU_EVENT_PER_PAGE;
+        pos = pidx * PAGE_SIZE + off * sizeof(struct dmu_msg);
+
+        return (struct dmu_msg *) (ring->buf + pos);
+}
+
+/* Queue a message for sending */
+static int dmu_ctl_queue_msg(struct dmu_context *ctx, int type, void *msgbuf)
+{
+	struct dmu_msg *msg;
+
+	msg = (struct dmu_msg *)head_ring_hdr(&ctx->ukring);
+	if (msg->hdr.status) {
+		DPRINTF("No room in ring, flushing...\n");
+		dmu_ctl_send_queue(ctx);
+
+		/* FIXME: Need a better way to wait for space to free up */
+		usleep(50000);
+
+		msg = (struct dmu_msg *)head_ring_hdr(&ctx->ukring);
+		if (msg->hdr.status) {
+			printf("#################### Still no room!\n");
+			return -ENOMEM;
+		}
+	}
+
+	msg->hdr.msg_type = type;
+	msg->hdr.id = ctx->id_ctr++;
+
+	memcpy(&msg->payload, msgbuf, sizeof(msg->payload));
+
+	ring_index_inc(&ctx->ukring);
+	msg->hdr.status = 1;
+	ctx->pending++;
+
+	return 1;
+}
+
+/* Flush queue of messages to the kernel */
+int dmu_ctl_send_queue(struct dmu_context *ctx)
+{
+	int r;
+
+	DPRINTF("Flushing outgoing queue\n");
+
+	r = write(ctx->fd, &r, 1);
+
+	ctx->pending = 0;
+
+	return r;
+}
+
+static int check_version(char *dev)
+{
+	struct dm_task *task;
+	struct dm_versions *target, *last;
+	int ret;
+
+	task = dm_task_create(DM_DEVICE_LIST_VERSIONS);
+
+	ret = dm_task_set_name(task, dev);
+	if (!ret) {
+		DPRINTF("Failed to set device-mapper target name\n");
+		dm_task_destroy(task);
+		return -1;
+	}
+
+	ret = dm_task_run(task);
+	if (!ret) {
+		DPRINTF("Failed to run device-mapper task\n");
+		dm_task_destroy(task);
+		return -1;
+	}
+
+	target = dm_task_get_versions(task);
+
+	do {
+		last = target;
+		
+		if (strcmp(target->name, "userspace") == 0) {
+			DPRINTF("%s version: %i.%i.%i\n",
+				target->name,
+				target->version[0],
+				target->version[1],
+				target->version[2]);
+			break;
+		}
+
+		target = (void *) target + target->next;
+	} while (last != target);
+
+	if (!target) {
+		DPRINTF("userspace target not found\n");
+		return -1;
+	}
+	
+	if ((target->version[0] == MAX_MAJ_VER) && 
+	    (target->version[1] == MAX_MIN_VER))
+		return 1;
+	else
+		return 0; /* Unsupported */
+}
+
+struct dmu_context *dmu_ctl_open(char *dev, int flags)
+{
+	int fd, r;
+	struct dmu_context *ctx = NULL;
+	char *ctl_dev;
+	char *ringbuf;
+
+	r = check_version(dev);
+	if (r <= 0) {
+		return NULL;
+	}
+	
+	ctl_dev = dmu_get_ctl_device(dev);
+	if (ctl_dev == NULL)
+		return NULL;
+	else if (access(ctl_dev, R_OK | W_OK))
+		return NULL;
+
+	fd = open(ctl_dev, O_RDWR | flags);
+	if (fd < 0)
+		goto out;
+
+	ctx = calloc(sizeof(*ctx), 1);
+	if (!ctx)
+		goto out;
+
+	ctx->fd = fd;
+	ctx->id_ctr = 0;
+	memset(&ctx->events, 0, sizeof(ctx->events));
+	memset(&ctx->event_data, 0, sizeof(ctx->event_data));
+
+	ringbuf = mmap(NULL, DMU_RING_SIZE * 2, PROT_READ | PROT_WRITE,
+		       MAP_SHARED, fd, 0);
+        if (ringbuf == MAP_FAILED) {
+                printf("fail to mmap, %m\n");
+                return NULL;
+        }
+
+	ctx->kuring.idx = ctx->ukring.idx = 0;
+	ctx->kuring.buf = ringbuf;
+	ctx->ukring.buf = ringbuf + DMU_RING_SIZE;
+
+	return ctx;
+
+ out:
+	if (ctx)
+		free(ctx);
+
+	return NULL;
+}
+
+int dmu_ctl_close(struct dmu_context *ctx)
+{
+	return close(ctx->fd);
+}
+
+void dmu_register_map_done_handler(struct dmu_context *ctx,
+				   map_done_handler_t handler,
+				   void *data)
+{
+	ctx->events.map_done_fn = handler;
+	ctx->event_data.map_done_user_data = data;
+}
+
+void dmu_register_map_handler(struct dmu_context *ctx,
+			      map_req_handler_t handler,
+			      void *data)
+{
+	ctx->events.map_fn = handler;
+	ctx->event_data.map_user_data = data;
+}
+
+int dmu_make_mapping(struct dmu_context *ctx,
+		     struct dmu_map_data *data)
+{
+	struct dmu_msg_make_mapping msg;
+	int r;
+
+	msg.org_block = data->org_block;
+	msg.new_block = data->block;
+	msg.offset = data->offset;
+	dmu_split_dev(data->dest_dev, &msg.dev_maj, &msg.dev_min);
+	msg.flags = 0;
+	dmu_cpy_flag(&msg.flags, data->flags, DMU_FLAG_WR);
+
+	r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAKE_MAPPING, &msg);
+
+	return r;
+}
+
+int dmu_kill_mapping(struct dmu_context *ctx,
+		     struct dmu_map_data *data)
+{
+	struct dmu_msg_make_mapping msg;
+	int r;
+
+	msg.org_block = data->org_block;
+
+	r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_KILL_MAPPING, &msg);
+
+	return r;
+}
+
+int dmu_async_map_done(struct dmu_context *ctx, uint64_t id, int fail)
+{
+	struct dmu_msg_map_done msg;
+	int r;
+
+	msg.org_block = 0;
+	msg.flags = 0;
+	msg.id_of_op = id;
+
+	if (fail)
+		r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_DONE_FAILED, &msg);
+	else
+		r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_DONE, &msg);
+
+	return r;
+}
+
+int dmu_async_map(struct dmu_context *ctx, 
+		  struct dmu_map_data *data,
+		  int fail)
+{
+	struct dmu_msg_map_response msg;
+	int r;
+
+	msg.new_block = data->block;
+	msg.offset = data->offset;
+	msg.flags = data->flags;
+	msg.id_of_req = data->id;
+
+	dmu_split_dev(data->copy_src_dev, &msg.src_maj, &msg.src_min);
+	dmu_split_dev(data->dest_dev, &msg.dst_maj, &msg.dst_min);
+
+	if (fail)
+		r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_FAILED, &msg);
+	else
+		r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_BLOCK_RESP, &msg);
+
+	return r;
+}
+
+int dmu_events_pending(struct dmu_context *ctx, unsigned int msec)
+{
+	fd_set fds;
+	struct timeval tv;
+
+	FD_ZERO(&fds);
+	FD_SET(ctx->fd, &fds);
+
+	tv.tv_sec = msec / 1000;
+	tv.tv_usec = (msec % 1000) * 1000;
+
+	if (select(ctx->fd + 1, &fds, NULL, NULL, &tv) < 0)
+		return 0;
+
+	if (FD_ISSET(ctx->fd, &fds))
+		return 1;
+	else
+		return 0;
+}
+
+static int fire_map_req_event(struct dmu_context *ctx,
+			      struct dmu_msg_map_request *req,
+			      uint64_t id)
+{
+	struct dmu_map_data data;
+	int ret;
+
+	if (!ctx->events.map_fn)
+		return 1;
+
+	DPRINTF("Map event for %llu %c\n",
+		req->org_block,
+		dmu_get_flag(&req->flags, DMU_FLAG_WR) ? 'W':'R');
+
+	data.block = req->org_block;
+	data.offset = 0;
+	data.id = id;
+	data.flags = req->flags;
+	data.dest_dev = data.copy_src_dev = 0;
+
+	dmu_clr_flag(&data.flags, DMU_FLAG_COPY_FIRST);
+	dmu_clr_flag(&data.flags, DMU_FLAG_SYNC);
+
+	ret = ctx->events.map_fn(ctx->event_data.map_user_data, &data);
+
+	if (ret != 0) {
+		/* If the handler returns 0, we assume they will
+		 * complete the operation later 
+		 */
+		dmu_async_map(ctx, &data, ret < 0);
+		DPRINTF("Mapped %llu\n", data.block);
+	}
+
+	return ret != 0;
+}
+
+static int fire_map_done_event(struct dmu_context *ctx,
+			       struct dmu_msg_map_done *msg,
+			       uint64_t id)
+{
+	struct dmu_map_data data;
+	int ret = 1;
+
+	if (ctx->events.map_done_fn) {
+		data.block = msg->org_block;
+		data.offset = 0;
+		data.id = msg->id_of_op;
+		data.flags = msg->flags;
+		data.dest_dev = data.copy_src_dev = 0;
+		
+		ret = ctx->events.map_done_fn(ctx->event_data.map_done_user_data, 
+					      &data);
+	}
+
+	if (ret > 0) {
+		/* If the handler returns 0, we assume they will
+		 * complete the operation later 
+		 */
+		dmu_async_map_done(ctx, msg->id_of_op, ret < 0);
+		DPRINTF("Completed %llu (%llu)\n", 
+			msg->org_block, msg->id_of_op);
+	}
+
+	return ret != 0;
+}
+
+static int decode_message(struct dmu_context *ctx, int type, uint64_t id,
+			  uint8_t *msg)
+{
+	switch (type) {
+	case DM_USERSPACE_MAP_BLOCK_REQ:
+		DPRINTF("Request event: %u\n", id);
+		return fire_map_req_event(ctx,
+					  (struct dmu_msg_map_request *)msg,
+					  id);
+	case DM_USERSPACE_MAP_DONE:
+		DPRINTF("Map Done event\n");
+		return fire_map_done_event(ctx,
+					 (struct dmu_msg_map_done *)msg,
+					 id);
+	default:
+		printf("Unknown message type: %i\n", type);
+		return -1; /* Unknown message type */
+	};
+}
+
+static int dmu_process_event(struct dmu_context *ctx)
+{
+	struct dmu_msg *msg;
+	int ret;
+
+	msg = head_ring_hdr(&ctx->kuring);
+	if (!msg->hdr.status)
+		return -1;
+
+	ret = decode_message(ctx, msg->hdr.msg_type, msg->hdr.id,
+			     (uint8_t *)&msg->payload);
+
+	msg->hdr.status = 0;
+	ring_index_inc(&ctx->kuring);
+
+	return ret;
+}
+
+int dmu_process_events(struct dmu_context *ctx)
+{
+	int ret, do_flush = 1;
+	uint32_t count;
+
+	//DPRINTF("Processing events\n");
+
+	for (count = 0; count < DMU_MAX_EVENTS; count++) {
+		ret = dmu_process_event(ctx);
+		
+		if (ret > 0)
+			do_flush = 1;
+	}
+
+	DPRINTF("Pending events: %u\n", ctx->pending);
+	if (ctx->pending)
+		dmu_ctl_send_queue(ctx);
+
+	//DPRINTF("Finished processing events\n");
+
+	return 1;
+}
+
+int dmu_get_ctl_fd(struct dmu_context *ctx)
+{
+	return ctx->fd;
+}

[-- Attachment #1.1.4: Type: text/plain, Size: 96 bytes --]



-- 
Dan Smith
IBM Linux Technology Center
Open Hypervisor Team
email: danms@us.ibm.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 188 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



  reply	other threads:[~2007-01-31 15:25 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-01-29 22:40 [PATCH 1/2] Add userspace device-mapper target Dan Smith
2007-01-31 12:39 ` FUJITA Tomonori
2007-01-31 15:25   ` Dan Smith [this message]
2007-02-01 15:47     ` FUJITA Tomonori
2007-02-08 15:48 ` FUJITA Tomonori
2007-02-08 16:33   ` Dan Smith
2007-02-08 23:11     ` FUJITA Tomonori
2007-02-09 15:54       ` Dan Smith
2007-02-10  0:34         ` FUJITA Tomonori
2007-02-19 15:16         ` Dan Smith
2007-02-19 23:55           ` FUJITA Tomonori
2007-02-21 21:35             ` Dan Smith
2007-02-28 16:24               ` Dan Smith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m3d54vqkvc.fsf@guaranine.beaverton.ibm.com \
    --to=danms@us.ibm.com \
    --cc=dm-devel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.