From: Benny Halevy <bhalevy.lists@gmail.com>
To: Jim Rees <rees@umich.edu>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: Re: [PATCH 17/34] pnfsblock: call and parse getdevicelist
Date: Tue, 14 Jun 2011 11:36:44 -0400 [thread overview]
Message-ID: <4DF7800C.1010503@gmail.com> (raw)
In-Reply-To: <c42adbc8a8c7e03043bfbef1b9a2ceb72f1baf89.1307921138.git.rees@umich.edu>
On 2011-06-12 19:44, Jim Rees wrote:
> From: Fred Isaman <iisaman@citi.umich.edu>
>
> Call GETDEVICELIST during mount, then call and parse GETDEVICEINFO
> for each device returned.
>
> [pnfsblock: fix pnfs_deviceid references]
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> [pnfsblock: fix print format warnings for sector_t and size_t]
> [pnfs-block: #include <linux/vmalloc.h>]
> [pnfsblock: no PNFS_NFS_SERVER]
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> [pnfsblock: fix bug determining size of striped volume]
> [pnfsblock: fix oops when using multiple devices]
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> ---
> fs/nfs/blocklayout/blocklayout.c | 155 +++++++++++++++++++++++++++++++++++++-
> fs/nfs/blocklayout/blocklayout.h | 95 +++++++++++++++++++++++
> 2 files changed, 248 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index 88b9d1a..36374f4 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -31,7 +31,7 @@
> */
> #include <linux/module.h>
> #include <linux/init.h>
> -
> +#include <linux/vmalloc.h>
> #include "blocklayout.h"
>
> #define NFSDBG_FACILITY NFSDBG_PNFS_LD
> @@ -164,17 +164,168 @@ bl_cleanup_layoutcommit(struct pnfs_layout_hdr *lo,
> {
> }
>
> +static void free_blk_mountid(struct block_mount_id *mid)
> +{
> + if (mid) {
> + struct pnfs_block_dev *dev;
> + spin_lock(&mid->bm_lock);
> + while (!list_empty(&mid->bm_devlist)) {
> + dev = list_first_entry(&mid->bm_devlist,
> + struct pnfs_block_dev,
> + bm_node);
> + list_del(&dev->bm_node);
> + free_block_dev(dev);
> + }
> + spin_unlock(&mid->bm_lock);
> + kfree(mid);
> + }
> +}
> +
> +/* This is mostly copied from the filelayout's get_device_info function.
> + * It seems much of this should be at the generic pnfs level.
> + */
> +static struct pnfs_block_dev *
> +nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
> + struct nfs4_deviceid *d_id,
> + struct list_head *sdlist)
> +{
> + struct pnfs_device *dev;
> + struct pnfs_block_dev *rv = NULL;
> + u32 max_resp_sz;
> + int max_pages;
> + struct page **pages = NULL;
> + int i, rc;
> +
> + /*
> + * Use the session max response size as the basis for setting
> + * GETDEVICEINFO's maxcount
> + */
> + max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
> + max_pages = max_resp_sz >> PAGE_SHIFT;
> + dprintk("%s max_resp_sz %u max_pages %d\n",
> + __func__, max_resp_sz, max_pages);
> +
> + dev = kmalloc(sizeof(*dev), GFP_KERNEL);
> + if (!dev) {
> + dprintk("%s kmalloc failed\n", __func__);
> + return NULL;
> + }
> +
> + pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
> + if (pages == NULL) {
> + kfree(dev);
> + return NULL;
> + }
> + for (i = 0; i < max_pages; i++) {
> + pages[i] = alloc_page(GFP_KERNEL);
> + if (!pages[i])
> + goto out_free;
> + }
> +
> + /* set dev->area */
> + dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
> + if (!dev->area)
> + goto out_free;
> +
> + memcpy(&dev->dev_id, d_id, sizeof(*d_id));
> + dev->layout_type = LAYOUT_BLOCK_VOLUME;
> + dev->pages = pages;
> + dev->pgbase = 0;
> + dev->pglen = PAGE_SIZE * max_pages;
> + dev->mincount = 0;
> +
> + dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
> + rc = nfs4_proc_getdeviceinfo(server, dev);
> + dprintk("%s getdevice info returns %d\n", __func__, rc);
> + if (rc)
> + goto out_free;
> +
> + rv = nfs4_blk_decode_device(server, dev, sdlist);
> + out_free:
> + if (dev->area != NULL)
> + vunmap(dev->area);
> + for (i = 0; i < max_pages; i++)
> + __free_page(pages[i]);
> + kfree(pages);
> + kfree(dev);
> + return rv;
> +}
> +
> static int
> bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
> {
> + struct block_mount_id *b_mt_id = NULL;
> + struct pnfs_mount_type *mtype = NULL;
> + struct pnfs_devicelist *dlist = NULL;
> + struct pnfs_block_dev *bdev;
> + LIST_HEAD(block_disklist);
> + int status = 0, i;
> +
> dprintk("%s enter\n", __func__);
> - return 0;
> +
> + if (server->pnfs_blksize == 0) {
> + dprintk("%s Server did not return blksize\n", __func__);
> + return -EINVAL;
> + }
> + b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
> + if (!b_mt_id) {
> + status = -ENOMEM;
> + goto out_error;
> + }
> + /* Initialize nfs4 block layout mount id */
> + spin_lock_init(&b_mt_id->bm_lock);
> + INIT_LIST_HEAD(&b_mt_id->bm_devlist);
> +
> + dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
> + if (!dlist)
> + goto out_error;
> + dlist->eof = 0;
> + while (!dlist->eof) {
> + status = nfs4_proc_getdevicelist(server, fh, dlist);
> + if (status)
> + goto out_error;
> + dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
> + __func__, dlist->num_devs, dlist->eof);
> + /* For each device returned in dlist, call GETDEVICEINFO, and
> + * decode the opaque topology encoding to create a flat
> + * volume topology, matching VOLUME_SIMPLE disk signatures
> + * to disks in the visible block disk list.
> + * Construct an LVM meta device from the flat volume topology.
> + */
> + for (i = 0; i < dlist->num_devs; i++) {
> + bdev = nfs4_blk_get_deviceinfo(server, fh,
> + &dlist->dev_id[i],
> + &block_disklist);
> + if (!bdev) {
> + status = -ENODEV;
> + goto out_error;
> + }
> + spin_lock(&b_mt_id->bm_lock);
> + list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
> + spin_unlock(&b_mt_id->bm_lock);
> + }
> + }
> + dprintk("%s SUCCESS\n", __func__);
> + server->pnfs_ld_data = b_mt_id;
> +
> + out_return:
> + kfree(dlist);
> + return status;
> +
> + out_error:
> + free_blk_mountid(b_mt_id);
> + kfree(mtype);
> + goto out_return;
> }
>
> static int
> bl_clear_layoutdriver(struct nfs_server *server)
> {
> + struct block_mount_id *b_mt_id = server->pnfs_ld_data;
> +
> dprintk("%s enter\n", __func__);
> + free_blk_mountid(b_mt_id);
> + dprintk("%s RETURNS\n", __func__);
> return 0;
> }
>
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index 6bbfc3d..21fa21c 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -35,12 +35,60 @@
> #include <linux/nfs_fs.h>
> #include "../pnfs.h"
>
> +struct block_mount_id {
> + spinlock_t bm_lock; /* protects list */
> + struct list_head bm_devlist; /* holds pnfs_block_dev */
> +};
> +
> struct pnfs_block_dev {
> struct list_head bm_node;
> struct nfs4_deviceid bm_mdevid; /* associated devid */
> struct block_device *bm_mdev; /* meta device itself */
> };
>
> +/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
> +struct visible_block_device {
> + struct list_head vi_node;
> + struct block_device *vi_bdev;
> + int vi_mapped;
> + int vi_put_done;
> +};
> +
> +enum blk_vol_type {
> + PNFS_BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */
> + PNFS_BLOCK_VOLUME_SLICE = 1, /* slice of another volume */
> + PNFS_BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */
> + PNFS_BLOCK_VOLUME_STRIPE = 3 /* striped across multiple volumes */
> +};
> +
> +/* All disk offset/lengths are stored in 512-byte sectors */
> +struct pnfs_blk_volume {
> + uint32_t bv_type;
> + sector_t bv_size;
> + struct pnfs_blk_volume **bv_vols;
> + int bv_vol_n;
> + union {
> + dev_t bv_dev;
> + sector_t bv_stripe_unit;
> + sector_t bv_offset;
> + };
> +};
> +
> +/* Since components need not be aligned, cannot use sector_t */
> +struct pnfs_blk_sig_comp {
> + int64_t bs_offset; /* In bytes */
> + uint32_t bs_length; /* In bytes */
> + char *bs_string;
> +};
> +
> +/* Maximum number of signatures components in a simple volume */
> +# define PNFS_BLOCK_MAX_SIG_COMP 16
> +
> +struct pnfs_blk_sig {
> + int si_num_comps;
> + struct pnfs_blk_sig_comp si_comps[PNFS_BLOCK_MAX_SIG_COMP];
> +};
> +
> enum exstate4 {
> PNFS_BLOCK_READWRITE_DATA = 0,
> PNFS_BLOCK_READ_DATA = 1,
> @@ -96,6 +144,8 @@ struct pnfs_block_layout {
> sector_t bl_blocksize; /* Server blocksize in sectors */
> };
>
> +#define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data))
> +
> static inline struct pnfs_block_layout *
> BLK_LO2EXT(struct pnfs_layout_hdr *lo)
> {
> @@ -108,6 +158,51 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
> return BLK_LO2EXT(lseg->pls_layout);
> }
>
> +uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
> +
> +#define BLK_READBUF(p, e, nbytes) do { \
> + p = blk_overflow(p, e, nbytes); \
> + if (!p) { \
> + printk(KERN_WARNING \
> + "%s: reply buffer overflowed in line %d.\n", \
> + __func__, __LINE__); \
> + goto out_err; \
> + } \
> +} while (0)
> +
> +#define READ32(x) (x) = ntohl(*p++)
> +#define READ64(x) do { \
> + (x) = (uint64_t)ntohl(*p++) << 32; \
> + (x) |= ntohl(*p++); \
> +} while (0)
> +#define COPYMEM(x, nbytes) do { \
> + memcpy((x), p, nbytes); \
> + p += XDR_QUADLEN(nbytes); \
> +} while (0)
> +#define READ_DEVID(x) COPYMEM((x)->data, NFS4_DEVICEID4_SIZE)
> +#define READ_SECTOR(x) do { \
> + READ64(tmp); \
> + if (tmp & 0x1ff) { \
> + printk(KERN_WARNING \
> + "%s Value not 512-byte aligned at line %d\n", \
> + __func__, __LINE__); \
> + goto out_err; \
> + } \
> + (x) = tmp >> 9; \
> +} while (0)
> +
> +#define WRITE32(n) do { \
> + *p++ = htonl(n); \
> + } while (0)
> +#define WRITE64(n) do { \
> + *p++ = htonl((uint32_t)((n) >> 32)); \
> + *p++ = htonl((uint32_t)(n)); \
> +} while (0)
> +#define WRITEMEM(ptr, nbytes) do { \
> + p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
> +} while (0)
> +#define WRITE_DEVID(x) WRITEMEM((x)->data, NFS4_DEVICEID4_SIZE)
> +
please don't use these obsolete macros and rather directly use the
official xdr {en,de}coding helpers and be32_to_cpu.
we're trying to eradicate them from the nfs client
Benny
> /* blocklayoutdev.c */
> struct block_device *nfs4_blkdev_get(dev_t dev);
> int nfs4_blkdev_put(struct block_device *bdev);
next prev parent reply other threads:[~2011-06-14 15:37 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-12 23:43 [PATCH 00/34] pnfs block layout driver based on v3.0-rc2 Jim Rees
2011-06-12 23:43 ` [PATCH 01/34] pnfs: GETDEVICELIST Jim Rees
2011-06-12 23:43 ` [PATCH 02/34] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-12 23:43 ` [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-13 14:36 ` Fred Isaman
2011-06-14 10:40 ` tao.peng
2011-06-14 13:58 ` Fred Isaman
2011-06-14 14:28 ` Benny Halevy
2011-06-12 23:43 ` [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation Jim Rees
2011-06-13 14:44 ` Fred Isaman
2011-06-14 11:01 ` tao.peng
2011-06-14 14:05 ` Fred Isaman
2011-06-14 15:53 ` Peng Tao
2011-06-14 16:02 ` Fred Isaman
2011-06-12 23:43 ` [PATCH 05/34] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-06-14 15:01 ` Benny Halevy
2011-06-14 15:08 ` Peng Tao
2011-06-12 23:44 ` [PATCH 06/34] pnfs: cleanup_layoutcommit Jim Rees
2011-06-13 21:19 ` Benny Halevy
2011-06-14 15:16 ` Peng Tao
2011-06-14 15:10 ` Benny Halevy
2011-06-14 15:21 ` Peng Tao
2011-06-14 15:19 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 07/34] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-14 15:13 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 08/34] pnfsblock: blocklayout stub Jim Rees
2011-06-12 23:44 ` [PATCH 09/34] pnfsblock: layout alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 10/34] Add support for simple rpc pipefs Jim Rees
2011-06-12 23:44 ` [PATCH 11/34] pnfs-block: Add block device discovery pipe Jim Rees
2011-06-12 23:44 ` [PATCH 12/34] pnfsblock: basic extent code Jim Rees
2011-06-12 23:44 ` [PATCH 13/34] pnfsblock: add device operations Jim Rees
2011-06-12 23:44 ` [PATCH 14/34] pnfsblock: remove " Jim Rees
2011-06-12 23:44 ` [PATCH 15/34] pnfsblock: lseg alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 16/34] pnfsblock: merge extents Jim Rees
2011-06-12 23:44 ` [PATCH 17/34] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-14 15:36 ` Benny Halevy [this message]
2011-06-12 23:44 ` [PATCH 18/34] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-13 15:56 ` Fred Isaman
2011-06-12 23:44 ` [PATCH 19/34] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-12 23:44 ` [PATCH 20/34] pnfsblock: find_get_extent Jim Rees
2011-06-12 23:44 ` [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-14 15:40 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 22/34] pnfsblock: merge rw extents Jim Rees
2011-06-12 23:44 ` [PATCH 23/34] pnfsblock: encode_layoutcommit Jim Rees
2011-06-14 15:44 ` Benny Halevy
2011-06-12 23:44 ` [PATCH 24/34] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-12 23:44 ` [PATCH 25/34] pnfsblock: bl_read_pagelist Jim Rees
2011-06-12 23:44 ` [PATCH 26/34] pnfsblock: write_begin Jim Rees
2011-06-12 23:44 ` [PATCH 27/34] pnfsblock: write_end Jim Rees
2011-06-12 23:44 ` [PATCH 28/34] pnfsblock: write_end_cleanup Jim Rees
2011-06-12 23:45 ` [PATCH 29/34] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-12 23:45 ` [PATCH 30/34] pnfsblock: bl_write_pagelist Jim Rees
2011-06-12 23:45 ` [PATCH 31/34] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-12 23:45 ` [PATCH 32/34] pnfsblock: Implement release_inval_marks Jim Rees
2011-06-12 23:45 ` [PATCH 33/34] Add configurable prefetch size for layoutget Jim Rees
2011-06-12 23:45 ` [PATCH 34/34] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-14 16:15 ` Benny Halevy
2011-06-14 16:22 ` Fred Isaman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DF7800C.1010503@gmail.com \
--to=bhalevy.lists@gmail.com \
--cc=honey@citi.umich.edu \
--cc=linux-nfs@vger.kernel.org \
--cc=rees@umich.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).