From: Jim Rees <rees@umich.edu>
To: Benny Halevy <bhalevy@panasas.com>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: [PATCH 14/88] pnfsblock: call and parse getdevicelist
Date: Tue, 7 Jun 2011 13:27:28 -0400 [thread overview]
Message-ID: <43efe43d446261a498acbb571f459ce25b171edf.1307464382.git.rees@umich.edu> (raw)
In-Reply-To: <cover.1307464381.git.rees@umich.edu>
From: Fred Isaman <iisaman@citi.umich.edu>
Call GETDEVICELIST during mount, then call and parse GETDEVICEINFO
for each device returned.
[pnfsblock: fix pnfs_deviceid references]
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
[pnfsblock: fix print format warnings for sector_t and size_t]
[pnfs-block: #include <linux/vmalloc.h>]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[pnfsblock: fix bug determining size of striped volume]
[pnfsblock: fix oops when using multiple devices]
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
fs/nfs/blocklayout/Makefile | 2 +-
fs/nfs/blocklayout/blocklayout.c | 163 +++++++++++++++++-
fs/nfs/blocklayout/blocklayout.h | 89 ++++++++++
fs/nfs/blocklayout/blocklayoutdev.c | 324 +++++++++++++++++++++++++++++++++++
fs/nfs/blocklayout/blocklayoutdm.c | 72 ++++++++
5 files changed, 646 insertions(+), 4 deletions(-)
create mode 100644 fs/nfs/blocklayout/blocklayoutdm.c
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile
index 36d959f..2c4c062 100644
--- a/fs/nfs/blocklayout/Makefile
+++ b/fs/nfs/blocklayout/Makefile
@@ -2,4 +2,4 @@
# Makefile for the pNFS block layout driver kernel module
#
obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
-blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o
+blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9889f27..ebaa48a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/vmalloc.h>
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -133,26 +134,182 @@ bl_cleanup_layoutcommit(struct pnfs_layout_type *lo,
dprintk("%s enter\n", __func__);
}
+static void free_blk_mountid(struct block_mount_id *mid)
+{
+ if (mid) {
+ struct pnfs_block_dev *dev;
+ spin_lock(&mid->bm_lock);
+ while (!list_empty(&mid->bm_devlist)) {
+ dev = list_first_entry(&mid->bm_devlist,
+ struct pnfs_block_dev,
+ bm_node);
+ list_del(&dev->bm_node);
+ free_block_dev(dev);
+ }
+ spin_unlock(&mid->bm_lock);
+ kfree(mid);
+ }
+}
+
+/* This is mostly copied form the filelayout's get_device_info function.
+ * It seems much of this should be at the generic pnfs level.
+ */
+static struct pnfs_block_dev *
+nfs4_blk_get_deviceinfo(struct super_block *sb, struct nfs_fh *fh,
+ struct pnfs_deviceid *d_id,
+ struct list_head *sdlist)
+{
+ struct pnfs_device *dev;
+ struct pnfs_block_dev *rv = NULL;
+ int maxpages = NFS4_GETDEVINFO_MAXSIZE >> PAGE_SHIFT;
+ struct page *pages[maxpages];
+ int alloced_pages = 0, used_pages = 1;
+ int j, rc;
+
+ dprintk("%s enter\n", __func__);
+ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ dprintk("%s kmalloc failed\n", __func__);
+ return NULL;
+ }
+ retry_once:
+ dprintk("%s trying used_pages %d\n", __func__, used_pages);
+ for (; alloced_pages < used_pages; alloced_pages++) {
+ pages[alloced_pages] = alloc_page(GFP_KERNEL);
+ if (!pages[alloced_pages])
+ goto out_free;
+ }
+ /* set dev->area */
+ if (used_pages == 1)
+ dev->area = page_address(pages[0]);
+ else {
+ dev->area = vmap(pages, used_pages, VM_MAP, PAGE_KERNEL);
+ if (!dev->area)
+ goto out_free;
+ }
+
+ memcpy(&dev->dev_id, d_id, sizeof(*d_id));
+ dev->layout_type = LAYOUT_BLOCK_VOLUME;
+ dev->dev_notify_types = 0;
+ dev->pages = pages;
+ dev->pgbase = 0;
+ dev->pglen = PAGE_SIZE * used_pages;
+ dev->mincount = 0;
+
+ rc = pnfs_callback_ops->nfs_getdeviceinfo(sb, dev);
+ dprintk("%s getdevice info returns %d used_pages %d\n", __func__, rc,
+ used_pages);
+ if (rc == -ETOOSMALL && used_pages == 1) {
+ dev->area = NULL;
+ used_pages = (dev->mincount + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (used_pages > 1 && used_pages <= maxpages)
+ goto retry_once;
+ }
+ if (rc)
+ goto out_free;
+
+ rv = nfs4_blk_decode_device(sb, dev, sdlist);
+ out_free:
+ if (used_pages > 1 && dev->area != NULL)
+ vunmap(dev->area);
+ for (j = 0; j < alloced_pages; j++)
+ __free_page(pages[j]);
+ kfree(dev);
+ return rv;
+}
+
+
/*
- * This is just a STUB to check the scsi scanning code
+ * Retrieve the list of available devices for the mountpoint.
*/
static struct pnfs_mount_type *
bl_initialize_mountpoint(struct super_block *sb, struct nfs_fh *fh)
{
+ struct block_mount_id *b_mt_id = NULL;
+ struct pnfs_mount_type *mtype = NULL;
+ struct pnfs_devicelist *dlist = NULL;
+ struct pnfs_block_dev *bdev;
LIST_HEAD(scsi_disklist);
+ int status, i;
dprintk("%s enter\n", __func__);
- nfs4_blk_create_scsi_disk_list(&scsi_disklist);
+ if (NFS_SB(sb)->pnfs_blksize == 0) {
+ dprintk("%s Server did not return blksize\n", __func__);
+ return NULL;
+ }
+ b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
+ if (!b_mt_id)
+ goto out_error;
+ /* Initialize nfs4 block layout mount id */
+ b_mt_id->bm_sb = sb; /* back pointer to retrieve nfs_server struct */
+ spin_lock_init(&b_mt_id->bm_lock);
+ INIT_LIST_HEAD(&b_mt_id->bm_devlist);
+ mtype = kzalloc(sizeof(struct pnfs_mount_type), GFP_KERNEL);
+ if (!mtype)
+ goto out_error;
+ mtype->mountid = (void *)b_mt_id;
+
+ /* Construct a list of all visible scsi disks that have not been
+ * claimed.
+ */
+ status = nfs4_blk_create_scsi_disk_list(&scsi_disklist);
+ if (status < 0)
+ goto out_error;
+
+ dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
+ if (!dlist)
+ goto out_error;
+ dlist->eof = 0;
+ while (!dlist->eof) {
+ status = pnfs_callback_ops->nfs_getdevicelist(sb, fh, dlist);
+ if (status)
+ goto out_error;
+ dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
+ __func__, dlist->num_devs, dlist->eof);
+ /* For each device returned in dlist, call GETDEVICEINFO, and
+ * decode the opaque topology encoding to create a flat
+ * volume topology, matching VOLUME_SIMPLE disk signatures
+ * to disks in the visible scsi disk list.
+ * Construct an LVM meta device from the flat volume topology.
+ */
+ for (i = 0; i < dlist->num_devs; i++) {
+ bdev = nfs4_blk_get_deviceinfo(sb, fh,
+ &dlist->dev_id[i],
+ &scsi_disklist);
+ if (!bdev)
+ goto out_error;
+ spin_lock(&b_mt_id->bm_lock);
+ list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
+ spin_unlock(&b_mt_id->bm_lock);
+ }
+ }
+ dprintk("%s SUCCESS\n", __func__);
+
+ out_return:
+ kfree(dlist);
nfs4_blk_destroy_disk_list(&scsi_disklist);
+ return mtype;
- return NULL;
+ out_error:
+ free_blk_mountid(b_mt_id);
+ kfree(mtype);
+ mtype = NULL;
+ goto out_return;
}
static int
bl_uninitialize_mountpoint(struct pnfs_mount_type *mtype)
{
+ struct block_mount_id *b_mt_id = NULL;
+
dprintk("%s enter\n", __func__);
+ if (!mtype)
+ return 0;
+ b_mt_id = (struct block_mount_id *)mtype->mountid;
+ free_blk_mountid(b_mt_id);
+ kfree(mtype);
+ dprintk("%s RETURNS\n", __func__);
return 0;
}
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 5dbb8f2..4af6685 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -38,6 +38,19 @@
extern struct class shost_class; /* exported from drivers/scsi/hosts.c */
+struct block_mount_id {
+ struct super_block *bm_sb; /* back pointer */
+ spinlock_t bm_lock; /* protects list */
+ struct list_head bm_devlist; /* holds pnfs_block_dev */
+};
+
+struct pnfs_block_dev {
+ struct list_head bm_node;
+ char *bm_mdevname; /* meta device name */
+ struct pnfs_deviceid bm_mdevid; /* associated devid */
+ struct block_device *bm_mdev; /* meta device itself */
+};
+
/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
struct visible_block_device {
struct list_head vi_node;
@@ -46,8 +59,84 @@ struct visible_block_device {
int vi_put_done;
};
+enum blk_vol_type {
+ PNFS_BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */
+ PNFS_BLOCK_VOLUME_SLICE = 1, /* slice of another volume */
+ PNFS_BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */
+ PNFS_BLOCK_VOLUME_STRIPE = 3 /* striped across multiple volumes */
+};
+
+/* All disk offset/lengths are stored in 512-byte sectors */
+struct pnfs_blk_volume {
+ uint32_t bv_type;
+ sector_t bv_size;
+ struct pnfs_blk_volume **bv_vols;
+ int bv_vol_n;
+ union {
+ dev_t bv_dev;
+ sector_t bv_stripe_unit;
+ sector_t bv_offset;
+ };
+};
+
+/* Since components need not be aligned, cannot use sector_t */
+struct pnfs_blk_sig_comp {
+ int64_t bs_offset; /* In bytes */
+ uint32_t bs_length; /* In bytes */
+ char *bs_string;
+};
+
+/* Maximum number of signatures components in a simple volume */
+# define PNFS_BLOCK_MAX_SIG_COMP 16
+
+struct pnfs_blk_sig {
+ int si_num_comps;
+ struct pnfs_blk_sig_comp si_comps[PNFS_BLOCK_MAX_SIG_COMP];
+};
+
+uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
+
+#define BLK_READBUF(p, e, nbytes) do { \
+ p = blk_overflow(p, e, nbytes); \
+ if (!p) { \
+ printk(KERN_WARNING \
+ "%s: reply buffer overflowed in line %d.\n", \
+ __func__, __LINE__); \
+ goto out_err; \
+ } \
+} while (0)
+
+#define READ32(x) (x) = ntohl(*p++)
+#define READ64(x) do { \
+ (x) = (uint64_t)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+#define COPYMEM(x, nbytes) do { \
+ memcpy((x), p, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+#define READ_DEVID(x) COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
+#define READ_SECTOR(x) do { \
+ READ64(tmp); \
+ if (tmp & 0x1ff) { \
+ printk(KERN_WARNING \
+ "%s Value not 512-byte aligned at line %d\n", \
+ __func__, __LINE__); \
+ goto out_err; \
+ } \
+ (x) = tmp >> 9; \
+} while (0)
+
/* blocklayoutdev.c */
+struct pnfs_block_dev *nfs4_blk_decode_device(struct super_block *sb,
+ struct pnfs_device *dev,
+ struct list_head *sdlist);
int nfs4_blk_create_scsi_disk_list(struct list_head *);
void nfs4_blk_destroy_disk_list(struct list_head *);
+/* blocklayoutdm.c */
+struct pnfs_block_dev *nfs4_blk_init_metadev(struct super_block *sb,
+ struct pnfs_device *dev);
+int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
+void free_block_dev(struct pnfs_block_dev *bdev);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index b4f52fb..f1689b9 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -229,3 +229,327 @@ int nfs4_blk_create_scsi_disk_list(struct list_head *dlist)
return class_for_each_device(&shost_class, NULL,
&lc, nfs4_blk_iter_scsi_disk_list);
}
+/* We are given an array of XDR encoded array indices, each of which should
+ * refer to a previously decoded device. Translate into a list of pointers
+ * to the appropriate pnfs_blk_volume's.
+ */
+static int set_vol_array(uint32_t **pp, uint32_t *end,
+ struct pnfs_blk_volume *vols, int working)
+{
+ int i, index;
+ uint32_t *p = *pp;
+ struct pnfs_blk_volume **array = vols[working].bv_vols;
+ for (i = 0; i < vols[working].bv_vol_n; i++) {
+ BLK_READBUF(p, end, 4);
+ READ32(index);
+ if ((index < 0) || (index >= working)) {
+ dprintk("%s Index %i out of expected range\n",
+ __func__, index);
+ goto out_err;
+ }
+ array[i] = &vols[index];
+ }
+ *pp = p;
+ return 0;
+ out_err:
+ return -EIO;
+}
+
+static uint64_t sum_subvolume_sizes(struct pnfs_blk_volume *vol)
+{
+ int i;
+ uint64_t sum = 0;
+ for (i = 0; i < vol->bv_vol_n; i++)
+ sum += vol->bv_vols[i]->bv_size;
+ return sum;
+}
+
+static int decode_blk_signature(uint32_t **pp, uint32_t *end,
+ struct pnfs_blk_sig *sig)
+{
+ int i, tmp;
+ uint32_t *p = *pp;
+
+ BLK_READBUF(p, end, 4);
+ READ32(sig->si_num_comps);
+ if (sig->si_num_comps == 0) {
+ dprintk("%s 0 components in sig\n", __func__);
+ goto out_err;
+ }
+ if (sig->si_num_comps >= PNFS_BLOCK_MAX_SIG_COMP) {
+ dprintk("number of sig comps %i >= PNFS_BLOCK_MAX_SIG_COMP\n",
+ sig->si_num_comps);
+ goto out_err;
+ }
+ for (i = 0; i < sig->si_num_comps; i++) {
+ BLK_READBUF(p, end, 12);
+ READ64(sig->si_comps[i].bs_offset);
+ READ32(tmp);
+ sig->si_comps[i].bs_length = tmp;
+ BLK_READBUF(p, end, tmp);
+ /* Note we rely here on fact that sig is used immediately
+ * for mapping, then thrown away.
+ */
+ sig->si_comps[i].bs_string = (char *)p;
+ p += XDR_QUADLEN(tmp);
+ }
+ *pp = p;
+ return 0;
+ out_err:
+ return -EIO;
+}
+
+/* Translate a signature component into a block and offset. */
+static void get_sector(struct block_device *bdev,
+ struct pnfs_blk_sig_comp *comp,
+ sector_t *block,
+ uint32_t *offset_in_block)
+{
+ int64_t use_offset = comp->bs_offset;
+ unsigned int blkshift = blksize_bits(block_size(bdev));
+
+ dprintk("%s enter\n", __func__);
+ if (use_offset < 0)
+ use_offset += (get_capacity(bdev->bd_disk) << 9);
+ *block = use_offset >> blkshift;
+ *offset_in_block = use_offset - (*block << blkshift);
+
+ dprintk("%s block %llu offset_in_block %u\n",
+ __func__, (u64)*block, *offset_in_block);
+ return;
+}
+
+/*
+ * All signatures in sig must be found on bdev for verification.
+ * Returns True if sig matches, False otherwise.
+ *
+ * STUB - signature crossing a block boundary will cause problems.
+ */
+static int verify_sig(struct block_device *bdev, struct pnfs_blk_sig *sig)
+{
+ sector_t block = 0;
+ struct pnfs_blk_sig_comp *comp;
+ struct buffer_head *bh = NULL;
+ uint32_t offset_in_block = 0;
+ char *ptr;
+ int i;
+
+ dprintk("%s enter. bd_disk->capacity %ld, bd_block_size %d\n",
+ __func__, (unsigned long)get_capacity(bdev->bd_disk),
+ bdev->bd_block_size);
+ for (i = 0; i < sig->si_num_comps; i++) {
+ comp = &sig->si_comps[i];
+ dprintk("%s comp->bs_offset %lld, length=%d\n", __func__,
+ comp->bs_offset, comp->bs_length);
+ get_sector(bdev, comp, &block, &offset_in_block);
+ bh = __bread(bdev, block, bdev->bd_block_size);
+ if (!bh)
+ goto out_err;
+ ptr = (char *)bh->b_data + offset_in_block;
+ if (memcmp(ptr, comp->bs_string, comp->bs_length))
+ goto out_err;
+ brelse(bh);
+ }
+ dprintk("%s Complete Match Found\n", __func__);
+ return 1;
+
+out_err:
+ brelse(bh);
+ dprintk("%s No Match\n", __func__);
+ return 0;
+}
+
+/*
+ * map_sig_to_device()
+ * Given a signature, walk the list of visible scsi disks searching for
+ * a match. Returns True if mapping was done, False otherwise.
+ *
+ * While we're at it, fill in the vol->bv_size.
+ */
+/* XXX FRED - use normal 0=success status */
+static int map_sig_to_device(struct pnfs_blk_sig *sig,
+ struct pnfs_blk_volume *vol,
+ struct list_head *sdlist)
+{
+ int mapped = 0;
+ struct visible_block_device *vis_dev;
+
+ list_for_each_entry(vis_dev, sdlist, vi_node) {
+ if (vis_dev->vi_mapped)
+ continue;
+ mapped = verify_sig(vis_dev->vi_bdev, sig);
+ if (mapped) {
+ vol->bv_dev = vis_dev->vi_bdev->bd_dev;
+ vol->bv_size = get_capacity(vis_dev->vi_bdev->bd_disk);
+ vis_dev->vi_mapped = 1;
+ /* XXX FRED check this */
+ /* We no longer need to scan this device, and
+ * we need to "put" it before creating metadevice.
+ */
+ if (!vis_dev->vi_put_done) {
+ vis_dev->vi_put_done = 1;
+ nfs4_blkdev_put(vis_dev->vi_bdev);
+ }
+ break;
+ }
+ }
+ return mapped;
+}
+
+/* XDR decodes pnfs_block_volume4 structure */
+static int decode_blk_volume(uint32_t **pp, uint32_t *end,
+ struct pnfs_blk_volume *vols, int i,
+ struct list_head *sdlist, int *array_cnt)
+{
+ int status = 0;
+ struct pnfs_blk_sig sig;
+ uint32_t *p = *pp;
+ uint64_t tmp; /* Used by READ_SECTOR */
+ struct pnfs_blk_volume *vol = &vols[i];
+ int j;
+ u64 tmp_size;
+
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_type);
+ dprintk("%s vol->bv_type = %i\n", __func__, vol->bv_type);
+ switch (vol->bv_type) {
+ case PNFS_BLOCK_VOLUME_SIMPLE:
+ *array_cnt = 0;
+ status = decode_blk_signature(&p, end, &sig);
+ if (status)
+ return status;
+ status = map_sig_to_device(&sig, vol, sdlist);
+ if (!status) {
+ dprintk("Could not find disk for device\n");
+ return -EIO;
+ }
+ status = 0;
+ dprintk("%s Set Simple vol to dev %d:%d, size %llu\n",
+ __func__,
+ MAJOR(vol->bv_dev),
+ MINOR(vol->bv_dev),
+ (u64)vol->bv_size);
+ break;
+ case PNFS_BLOCK_VOLUME_SLICE:
+ BLK_READBUF(p, end, 16);
+ READ_SECTOR(vol->bv_offset);
+ READ_SECTOR(vol->bv_size);
+ *array_cnt = vol->bv_vol_n = 1;
+ status = set_vol_array(&p, end, vols, i);
+ break;
+ case PNFS_BLOCK_VOLUME_STRIPE:
+ BLK_READBUF(p, end, 8);
+ READ_SECTOR(vol->bv_stripe_unit);
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_vol_n);
+ if (!vol->bv_vol_n)
+ return -EIO;
+ *array_cnt = vol->bv_vol_n;
+ status = set_vol_array(&p, end, vols, i);
+ if (status)
+ return status;
+ /* Ensure all subvolumes are the same size */
+ for (j = 1; j < vol->bv_vol_n; j++) {
+ if (vol->bv_vols[j]->bv_size !=
+ vol->bv_vols[0]->bv_size) {
+ dprintk("%s varying subvol size\n", __func__);
+ return -EIO;
+ }
+ }
+ /* Make sure total size only includes addressable areas */
+ tmp_size = vol->bv_vols[0]->bv_size;
+ do_div(tmp_size, (u32)vol->bv_stripe_unit);
+ vol->bv_size = vol->bv_vol_n * tmp_size * vol->bv_stripe_unit;
+ dprintk("%s Set Stripe vol to size %llu\n",
+ __func__, (u64)vol->bv_size);
+ break;
+ case PNFS_BLOCK_VOLUME_CONCAT:
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_vol_n);
+ if (!vol->bv_vol_n)
+ return -EIO;
+ *array_cnt = vol->bv_vol_n;
+ status = set_vol_array(&p, end, vols, i);
+ if (status)
+ return status;
+ vol->bv_size = sum_subvolume_sizes(vol);
+ dprintk("%s Set Concat vol to size %llu\n",
+ __func__, (u64)vol->bv_size);
+ break;
+ default:
+ dprintk("Unknown volume type %i\n", vol->bv_type);
+ out_err:
+ return -EIO;
+ }
+ *pp = p;
+ return status;
+}
+
+/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
+ * in dev->dev_addr_buf.
+ */
+struct pnfs_block_dev *
+nfs4_blk_decode_device(struct super_block *sb,
+ struct pnfs_device *dev,
+ struct list_head *sdlist)
+{
+ int num_vols, i, status, count;
+ struct pnfs_blk_volume *vols, **arrays, **arrays_ptr;
+ uint32_t *p = dev->area;
+ uint32_t *end = (uint32_t *) ((char *) p + dev->mincount);
+ struct pnfs_block_dev *rv = NULL;
+ struct visible_block_device *vis_dev;
+
+ dprintk("%s enter\n", __func__);
+
+ READ32(num_vols);
+ dprintk("%s num_vols = %i\n", __func__, num_vols);
+
+ vols = kmalloc(sizeof(struct pnfs_blk_volume) * num_vols, GFP_KERNEL);
+ if (!vols)
+ return NULL;
+ /* Each volume in vols array needs its own array. Save time by
+ * allocating them all in one large hunk. Because each volume
+ * array can only reference previous volumes, and because once
+ * a concat or stripe references a volume, it may never be
+ * referenced again, the volume arrays are guaranteed to fit
+ * in the suprisingly small space allocated.
+ */
+ arrays = kmalloc(sizeof(struct pnfs_blk_volume *) * num_vols * 2,
+ GFP_KERNEL);
+ if (!arrays)
+ goto out;
+ arrays_ptr = arrays;
+
+ list_for_each_entry(vis_dev, sdlist, vi_node) {
+ /* Wipe crud left from parsing previous device */
+ vis_dev->vi_mapped = 0;
+ }
+ for (i = 0; i < num_vols; i++) {
+ vols[i].bv_vols = arrays_ptr;
+ status = decode_blk_volume(&p, end, vols, i, sdlist, &count);
+ if (status)
+ goto out;
+ arrays_ptr += count;
+ }
+
+ /* Check that we have used up opaque */
+ if (p != end) {
+ dprintk("Undecoded cruft at end of opaque\n");
+ goto out;
+ }
+
+ /* Now use info in vols to create the meta device */
+ rv = nfs4_blk_init_metadev(sb, dev);
+ if (!rv)
+ goto out;
+ status = nfs4_blk_flatten(vols, num_vols, rv);
+ if (status) {
+ free_block_dev(rv);
+ rv = NULL;
+ }
+ out:
+ kfree(arrays);
+ kfree(vols);
+ return rv;
+}
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
new file mode 100644
index 0000000..15eaed2
--- /dev/null
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -0,0 +1,72 @@
+/*
+ * linux/fs/nfs/blocklayout/blocklayoutdm.c
+ *
+ * Module for the NFSv4.1 pNFS block layout driver.
+ *
+ * Copyright (c) 2007 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Fred Isaman <iisaman@umich.edu>
+ * Andy Adamson <andros@citi.umich.edu>
+ *
+ * permission is granted to use, copy, create derivative works and
+ * redistribute this software and such derivative works for any purpose,
+ * so long as the name of the university of michigan is not used in
+ * any advertising or publicity pertaining to the use or distribution
+ * of this software without specific, written prior authorization. if
+ * the above copyright notice or any other identification of the
+ * university of michigan is included in any copy of any portion of
+ * this software, then the disclaimer below must also be included.
+ *
+ * this software is provided as is, without representation from the
+ * university of michigan as to its fitness for any purpose, and without
+ * warranty by the university of michigan of any kind, either express
+ * or implied, including without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose. the regents
+ * of the university of michigan shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential damages,
+ * with respect to any claim arising out or in connection with the use
+ * of the software, even if it has been or is hereafter advised of the
+ * possibility of such damages.
+ */
+
+#include "blocklayout.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+
+/* Stub */
+static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
+{
+ return 0;
+}
+
+void free_block_dev(struct pnfs_block_dev *bdev)
+{
+ if (bdev) {
+ if (bdev->bm_mdev) {
+ dprintk("%s Removing DM device: %s %d:%d\n",
+ __func__,
+ bdev->bm_mdevname,
+ MAJOR(bdev->bm_mdev->bd_dev),
+ MINOR(bdev->bm_mdev->bd_dev));
+ /* XXX Check status ?? */
+ nfs4_blk_metadev_release(bdev);
+ }
+ kfree(bdev);
+ }
+}
+
+/* Stub */
+struct pnfs_block_dev *nfs4_blk_init_metadev(struct super_block *sb,
+ struct pnfs_device *dev)
+{
+ return NULL;
+}
+
+/* Stub */
+int nfs4_blk_flatten(struct pnfs_blk_volume *vols, int size,
+ struct pnfs_block_dev *bdev)
+{
+ return 0;
+}
+
--
1.7.4.1
next prev parent reply other threads:[~2011-06-07 17:27 UTC|newest]
Thread overview: 136+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-07 17:24 [PATCH 00/88] pnfs block layout driver rees
2011-06-07 17:26 ` [PATCH 01/88] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-07 17:26 ` [PATCH 02/88] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-07 17:26 ` [PATCH 03/88] pnfs_post_submit: Restore "pnfs: pnfs_do_flush" part 1 Jim Rees
2011-06-07 17:26 ` [PATCH 04/88] pnfs_post_submit: Restore the pnfs_write_end part of "pnfs: commit and pnfs_write_end" Jim Rees
2011-06-07 17:26 ` [PATCH 05/88] pnfs: xdr support for three word attribute bitmap Jim Rees
2011-06-07 17:26 ` [PATCH 06/88] pnfs: HACK: ask for layout_blksize on mount Jim Rees
2011-06-07 17:26 ` [PATCH 07/88] pnfs: HACK: modify write_end_cleanup Jim Rees
2011-06-07 17:26 ` [PATCH 08/88] HACK: propagate fsdata into nfs_writepage_setup Jim Rees
2011-06-07 17:26 ` [PATCH 09/88] pnfs: HACK: adjust eof handling Jim Rees
2011-06-07 17:27 ` [PATCH 10/88] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-07 17:27 ` [PATCH 11/88] pnfsblock: blocklayout stub Jim Rees
2011-06-07 17:27 ` [PATCH 12/88] pnfsblock: expose scsi interface Jim Rees
2011-06-07 17:27 ` [PATCH 13/88] pnfsblock: scan scsi devices Jim Rees
2011-06-07 17:27 ` Jim Rees [this message]
2011-06-07 17:27 ` [PATCH 15/88] pnfsblock: dm kernel interface Jim Rees
2011-06-07 17:27 ` [PATCH 16/88] pnfsblock: select BLK_DEV_DM when PNFS_BLOCK is configured Jim Rees
2011-06-07 17:27 ` [PATCH 17/88] pnfsblock: create and destroy dm metadevice Jim Rees
2011-06-07 17:27 ` [PATCH 18/88] pnfsblock: construct and load md table Jim Rees
2011-06-07 17:28 ` [PATCH 19/88] pnfsblock: layout alloc and free Jim Rees
2011-06-07 17:28 ` [PATCH 20/88] pnfsblock: basic extent code Jim Rees
2011-06-07 17:28 ` [PATCH 21/88] pnfsblock: lseg alloc and free Jim Rees
2011-06-07 17:28 ` [PATCH 22/88] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-07 17:28 ` [PATCH 23/88] pnfsblock: merge extents Jim Rees
2011-06-07 17:28 ` [PATCH 24/88] pnfsblock: find_get_extent Jim Rees
2011-06-07 17:28 ` [PATCH 25/88] pnfsblock: bl_read_pagelist Jim Rees
2011-06-07 17:28 ` [PATCH 26/88] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-07 17:29 ` [PATCH 27/88] pnfsblock: read path error handling Jim Rees
2011-06-07 17:29 ` [PATCH 28/88] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-07 17:29 ` [PATCH 29/88] pnfsblock: write_begin Jim Rees
2011-06-07 17:29 ` [PATCH 30/88] pnfsblock: write_end Jim Rees
2011-06-07 17:29 ` [PATCH 31/88] pnfsblock: write_end_cleanup Jim Rees
2011-06-07 17:29 ` [PATCH 32/88] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-07 17:29 ` [PATCH 33/88] pnfsblock: bl_write_pagelist Jim Rees
2011-06-07 17:29 ` [PATCH 34/88] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 35/88] pnfsblock: bl_setup_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 36/88] pnfsblock: encode_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 37/88] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 38/88] pnfsblock: merge rw extents Jim Rees
2011-06-07 17:30 ` [PATCH 39/88] pnfsblock: debugging dprintks for clist info Jim Rees
2011-06-07 17:30 ` [PATCH 40/88] SQAUSHME: blocklayoutdriver: NULL pointer reference when committing too many extents Jim Rees
2011-06-07 17:30 ` [PATCH 41/88] SQUASHME: pnfs-block: remove of CONFIG_PNFS fallout Jim Rees
2011-06-07 17:30 ` [PATCH 42/88] SQUASHME: pnfsblock: Fix a memory leak Jim Rees
2011-06-07 17:31 ` [PATCH 43/88] SQUASHME: pnfsblock: fix bug when decoding block device info Jim Rees
2011-06-07 17:31 ` [PATCH 44/88] SQUASHME: pnfsblock: Wrong extent refcount in block extents list Jim Rees
2011-06-07 17:31 ` [PATCH 45/88] SQUASHME: pnfsblock: Implement release_inval_marks Jim Rees
2011-06-07 17:31 ` [PATCH 46/88] SQUASHME: pnfsblock: Fix missing extent in commit list Jim Rees
2011-06-07 17:31 ` [PATCH 47/88] pnfsblock: use the session max response size for getdeviceinfo's maxcount Jim Rees
2011-06-07 17:31 ` [PATCH 48/88] SQUASHME: pnfs-block: fix compile breakage Jim Rees
2011-06-07 17:31 ` [PATCH 49/88] SQUASHME: pnfs-block: convert APIs pnfs-post-submit Jim Rees
2011-06-07 17:32 ` [PATCH 50/88] pnfsblock: Lookup list entry of layouts and tags in reverse order Jim Rees
2011-06-07 17:32 ` [PATCH 51/88] pnfsblock: expose block_class interface Jim Rees
2011-06-07 17:32 ` [PATCH 52/88] pnfsblock: iterating all local block disks instead of only scsi disks when initializing mount point Jim Rees
2011-06-07 17:32 ` [PATCH 53/88] SQUASHME: pnfsblock: set pnfs_blksize before calling set_pnfs_layoutdriver Jim Rees
2011-06-07 17:32 ` [PATCH 54/88] SQUASHME: pnfsblock: get rid of threshold policy ops Jim Rees
2011-06-07 17:32 ` [PATCH 55/88] SQUASHME: pnfsblock: write_begin adjust for removed fields Jim Rees
2011-06-07 17:32 ` [PATCH 56/88] SQUASHME: pnfsblock: write_end adjust for removed ok_to_use_pnfs Jim Rees
2011-06-07 17:32 ` [PATCH 57/88] SQUASHME: pnfsblock: write_end_cleanup " Jim Rees
2011-06-07 17:32 ` [PATCH 58/88] SQUASHME: pnfsblock: bl_write_pagelist support functions adjust for missing PG_USE_PNFS Jim Rees
2011-06-07 17:33 ` [PATCH 59/88] SQUASHME: pnfsblock: bl_write_pagelist " Jim Rees
2011-06-07 17:33 ` [PATCH 60/88] SQUASHME: pnfs-block: nfs4_blk_add_block_disk ret must be signed Jim Rees
2011-06-07 17:33 ` [PATCH 61/88] SQUASHME: pnfs-block: use new alloc/free_layout API Jim Rees
2011-06-07 17:33 ` [PATCH 62/88] SQUASHME: pnfs-block: use new commit api Jim Rees
2011-06-07 17:33 ` [PATCH 63/88] SQUASHME: pnfs-block: use new read_pagelist api Jim Rees
2011-06-07 17:33 ` [PATCH 64/88] SQUASHME: pnfs-block: use new write_pagelist api Jim Rees
2011-06-07 17:33 ` [PATCH 65/88] pnfs-block: Add support for simple rpc pipefs Jim Rees
2011-06-07 17:33 ` [PATCH 66/88] pnfs-block: Remove device creation from kernel Jim Rees
2011-06-07 17:33 ` [PATCH 67/88] SQUASHME: pnfs-block: apply types rename Jim Rees
2011-06-07 17:34 ` [PATCH 68/88] SQUASHME: pnfs-block: Revert "pnfsblock: expose block_class interface" Jim Rees
2011-06-07 17:34 ` [PATCH 69/88] SQUASHME: pnfsblock: remove obsolete include file from blocklayout.h Jim Rees
2011-06-07 17:34 ` [PATCH 70/88] SQUASHME: pnfsblock: use nfs4_deviceid Jim Rees
2011-06-07 17:34 ` [PATCH 71/88] SQUASHME: pnfsblock: no callback ops Jim Rees
2011-06-07 17:34 ` [PATCH 72/88] SQAUSHME: pnfsblock: no PNFS_NFS_SERVER Jim Rees
2011-06-07 17:34 ` [PATCH 73/88] SQUASHME: pnfsblock: no dev_notify_types Jim Rees
2011-06-07 17:34 ` [PATCH 74/88] SQUASHME: pnfsblock: use new struct pnfs_layout_hdr Jim Rees
2011-06-07 17:34 ` [PATCH 75/88] SQUASHME: pnfsblock: compile error in blocklayout code Jim Rees
2011-06-07 17:34 ` [PATCH 76/88] SQUASHME: pnfs-block: deprecate get_stripesize Jim Rees
2011-06-07 17:35 ` [PATCH 77/88] move include lines out of include file Jim Rees
2011-06-07 17:35 ` [PATCH 78/88] SQUASHME: pnfs-block: use {set,clear}_layoutdriver Jim Rees
2011-06-07 17:35 ` [PATCH 79/88] SQUASHME: pnfs-block: Return failure from bl_initialize_mountpoint Jim Rees
2011-06-07 17:35 ` [PATCH 80/88] SQUASHME: pnfs-block: fixup setup_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 81/88] SQUASHME: pnfs-block: fixup cleanup_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 82/88] SQUASHME: pnfs-block: fixup encode_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 83/88] SQUASHME: pnfs-block: fixup layoutcommit methods args Jim Rees
2011-06-07 17:35 ` [PATCH 84/88] pnfs-block: fix blocklayoutdev.c for new blkdev_get_by_dev() Jim Rees
2011-06-07 17:35 ` [PATCH 85/88] SQUASHME: pnfs-block: use pnfs_layout_hdr field prefix Jim Rees
2011-06-07 17:35 ` [PATCH 86/88] SQUASHME: pnfs: blocklayout: port block layout code Jim Rees
2011-06-08 1:27 ` Benny Halevy
2011-06-08 2:06 ` Benny Halevy
2011-06-08 7:38 ` Peng Tao
2011-06-07 17:36 ` [PATCH 87/88] Add configurable prefetch size for layoutget Jim Rees
2011-06-08 2:01 ` Benny Halevy
2011-06-08 2:18 ` Jim Rees
2011-06-08 7:15 ` Peng Tao
2011-06-09 6:06 ` Benny Halevy
2011-06-09 11:49 ` Jim Rees
2011-06-09 13:32 ` Benny Halevy
2011-06-09 13:58 ` Jim Rees
2011-06-09 15:07 ` Peng Tao
2011-06-09 21:22 ` Benny Halevy
2011-06-10 6:00 ` tao.peng
2011-06-10 12:33 ` Benny Halevy
2011-06-10 14:09 ` tao.peng
2011-06-10 19:23 ` Benny Halevy
2011-06-10 20:03 ` Fred Isaman
2011-06-10 21:15 ` Benny Halevy
2011-06-11 1:46 ` Peng Tao
2011-06-10 23:20 ` Boaz Harrosh
2011-06-11 2:19 ` Peng Tao
2011-06-12 14:40 ` Boaz Harrosh
2011-06-12 18:46 ` Peng Tao
2011-06-11 1:35 ` Peng Tao
2011-06-09 21:23 ` Benny Halevy
2011-06-10 5:36 ` tao.peng
2011-06-10 12:36 ` Benny Halevy
2011-06-10 14:17 ` tao.peng
2011-06-10 19:02 ` Benny Halevy
2011-06-09 15:01 ` Peng Tao
2011-06-09 14:54 ` Peng Tao
2011-06-09 21:30 ` Benny Halevy
2011-06-10 6:02 ` tao.peng
2011-06-10 12:47 ` Benny Halevy
2011-06-10 14:30 ` tao.peng
2011-06-10 19:07 ` Benny Halevy
2011-06-10 16:23 ` Boaz Harrosh
2011-06-10 16:44 ` Boaz Harrosh
2011-06-09 6:08 ` Benny Halevy
2011-06-07 17:36 ` [PATCH 88/88] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-08 2:05 ` Benny Halevy
2011-06-08 7:06 ` Peng Tao
2011-06-08 7:29 ` Peng Tao
2011-06-09 21:52 ` [PATCH 00/88] pnfs block layout driver Boaz Harrosh
2011-06-09 22:15 ` Jim Rees
2011-06-10 2:16 ` Boaz Harrosh
2011-06-10 2:20 ` Boaz Harrosh
2011-06-10 4:04 ` Benny Halevy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=43efe43d446261a498acbb571f459ce25b171edf.1307464382.git.rees@umich.edu \
--to=rees@umich.edu \
--cc=bhalevy@panasas.com \
--cc=honey@citi.umich.edu \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).