linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Benny Halevy <bhalevy.lists@gmail.com>
To: Jim Rees <rees@umich.edu>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: Re: [PATCH 17/34] pnfsblock: call and parse getdevicelist
Date: Tue, 14 Jun 2011 11:36:44 -0400	[thread overview]
Message-ID: <4DF7800C.1010503@gmail.com> (raw)
In-Reply-To: <c42adbc8a8c7e03043bfbef1b9a2ceb72f1baf89.1307921138.git.rees@umich.edu>

On 2011-06-12 19:44, Jim Rees wrote:
> From: Fred Isaman <iisaman@citi.umich.edu>
> 
> Call GETDEVICELIST during mount, then call and parse GETDEVICEINFO
> for each device returned.
> 
> [pnfsblock: fix pnfs_deviceid references]
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> [pnfsblock: fix print format warnings for sector_t and size_t]
> [pnfs-block: #include <linux/vmalloc.h>]
> [pnfsblock: no PNFS_NFS_SERVER]
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> [pnfsblock: fix bug determining size of striped volume]
> [pnfsblock: fix oops when using multiple devices]
> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
> ---
>  fs/nfs/blocklayout/blocklayout.c |  155 +++++++++++++++++++++++++++++++++++++-
>  fs/nfs/blocklayout/blocklayout.h |   95 +++++++++++++++++++++++
>  2 files changed, 248 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index 88b9d1a..36374f4 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -31,7 +31,7 @@
>   */
>  #include <linux/module.h>
>  #include <linux/init.h>
> -
> +#include <linux/vmalloc.h>
>  #include "blocklayout.h"
>  
>  #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
> @@ -164,17 +164,168 @@ bl_cleanup_layoutcommit(struct pnfs_layout_hdr *lo,
>  {
>  }
>  
> +static void free_blk_mountid(struct block_mount_id *mid)
> +{
> +	if (mid) {
> +		struct pnfs_block_dev *dev;
> +		spin_lock(&mid->bm_lock);
> +		while (!list_empty(&mid->bm_devlist)) {
> +			dev = list_first_entry(&mid->bm_devlist,
> +					       struct pnfs_block_dev,
> +					       bm_node);
> +			list_del(&dev->bm_node);
> +			free_block_dev(dev);
> +		}
> +		spin_unlock(&mid->bm_lock);
> +		kfree(mid);
> +	}
> +}
> +
> +/* This is mostly copied from the filelayout's get_device_info function.
> + * It seems much of this should be at the generic pnfs level.
> + */
> +static struct pnfs_block_dev *
> +nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
> +			struct nfs4_deviceid *d_id,
> +			struct list_head *sdlist)
> +{
> +	struct pnfs_device *dev;
> +	struct pnfs_block_dev *rv = NULL;
> +	u32 max_resp_sz;
> +	int max_pages;
> +	struct page **pages = NULL;
> +	int i, rc;
> +
> +	/*
> +	 * Use the session max response size as the basis for setting
> +	 * GETDEVICEINFO's maxcount
> +	 */
> +	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
> +	max_pages = max_resp_sz >> PAGE_SHIFT;
> +	dprintk("%s max_resp_sz %u max_pages %d\n",
> +		__func__, max_resp_sz, max_pages);
> +
> +	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
> +	if (!dev) {
> +		dprintk("%s kmalloc failed\n", __func__);
> +		return NULL;
> +	}
> +
> +	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
> +	if (pages == NULL) {
> +		kfree(dev);
> +		return NULL;
> +	}
> +	for (i = 0; i < max_pages; i++) {
> +		pages[i] = alloc_page(GFP_KERNEL);
> +		if (!pages[i])
> +			goto out_free;
> +	}
> +
> +	/* set dev->area */
> +	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
> +	if (!dev->area)
> +		goto out_free;
> +
> +	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
> +	dev->layout_type = LAYOUT_BLOCK_VOLUME;
> +	dev->pages = pages;
> +	dev->pgbase = 0;
> +	dev->pglen = PAGE_SIZE * max_pages;
> +	dev->mincount = 0;
> +
> +	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
> +	rc = nfs4_proc_getdeviceinfo(server, dev);
> +	dprintk("%s getdevice info returns %d\n", __func__, rc);
> +	if (rc)
> +		goto out_free;
> +
> +	rv = nfs4_blk_decode_device(server, dev, sdlist);
> + out_free:
> +	if (dev->area != NULL)
> +		vunmap(dev->area);
> +	for (i = 0; i < max_pages; i++)
> +		__free_page(pages[i]);
> +	kfree(pages);
> +	kfree(dev);
> +	return rv;
> +}
> +
>  static int
>  bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
>  {
> +	struct block_mount_id *b_mt_id = NULL;
> +	struct pnfs_mount_type *mtype = NULL;
> +	struct pnfs_devicelist *dlist = NULL;
> +	struct pnfs_block_dev *bdev;
> +	LIST_HEAD(block_disklist);
> +	int status = 0, i;
> +
>  	dprintk("%s enter\n", __func__);
> -	return 0;
> +
> +	if (server->pnfs_blksize == 0) {
> +		dprintk("%s Server did not return blksize\n", __func__);
> +		return -EINVAL;
> +	}
> +	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
> +	if (!b_mt_id) {
> +		status = -ENOMEM;
> +		goto out_error;
> +	}
> +	/* Initialize nfs4 block layout mount id */
> +	spin_lock_init(&b_mt_id->bm_lock);
> +	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
> +
> +	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
> +	if (!dlist)
> +		goto out_error;
> +	dlist->eof = 0;
> +	while (!dlist->eof) {
> +		status = nfs4_proc_getdevicelist(server, fh, dlist);
> +		if (status)
> +			goto out_error;
> +		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
> +			__func__, dlist->num_devs, dlist->eof);
> +		/* For each device returned in dlist, call GETDEVICEINFO, and
> +		 * decode the opaque topology encoding to create a flat
> +		 * volume topology, matching VOLUME_SIMPLE disk signatures
> +		 * to disks in the visible block disk list.
> +		 * Construct an LVM meta device from the flat volume topology.
> +		 */
> +		for (i = 0; i < dlist->num_devs; i++) {
> +			bdev = nfs4_blk_get_deviceinfo(server, fh,
> +						     &dlist->dev_id[i],
> +						     &block_disklist);
> +			if (!bdev) {
> +				status = -ENODEV;
> +				goto out_error;
> +			}
> +			spin_lock(&b_mt_id->bm_lock);
> +			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
> +			spin_unlock(&b_mt_id->bm_lock);
> +		}
> +	}
> +	dprintk("%s SUCCESS\n", __func__);
> +	server->pnfs_ld_data = b_mt_id;
> +
> + out_return:
> +	kfree(dlist);
> +	return status;
> +
> + out_error:
> +	free_blk_mountid(b_mt_id);
> +	kfree(mtype);
> +	goto out_return;
>  }
>  
>  static int
>  bl_clear_layoutdriver(struct nfs_server *server)
>  {
> +	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
> +
>  	dprintk("%s enter\n", __func__);
> +	free_blk_mountid(b_mt_id);
> +	dprintk("%s RETURNS\n", __func__);
>  	return 0;
>  }
>  
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index 6bbfc3d..21fa21c 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -35,12 +35,60 @@
>  #include <linux/nfs_fs.h>
>  #include "../pnfs.h"
>  
> +struct block_mount_id {
> +	spinlock_t			bm_lock;    /* protects list */
> +	struct list_head		bm_devlist; /* holds pnfs_block_dev */
> +};
> +
>  struct pnfs_block_dev {
>  	struct list_head		bm_node;
>  	struct nfs4_deviceid		bm_mdevid;    /* associated devid */
>  	struct block_device		*bm_mdev;     /* meta device itself */
>  };
>  
> +/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
> +struct visible_block_device {
> +	struct list_head	vi_node;
> +	struct block_device	*vi_bdev;
> +	int			vi_mapped;
> +	int			vi_put_done;
> +};
> +
> +enum blk_vol_type {
> +	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
> +	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
> +	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
> +	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
> +};
> +
> +/* All disk offset/lengths are stored in 512-byte sectors */
> +struct pnfs_blk_volume {
> +	uint32_t		bv_type;
> +	sector_t 		bv_size;
> +	struct pnfs_blk_volume 	**bv_vols;
> +	int 			bv_vol_n;
> +	union {
> +		dev_t			bv_dev;
> +		sector_t		bv_stripe_unit;
> +		sector_t 		bv_offset;
> +	};
> +};
> +
> +/* Since components need not be aligned, cannot use sector_t */
> +struct pnfs_blk_sig_comp {
> +	int64_t 	bs_offset;  /* In bytes */
> +	uint32_t   	bs_length;  /* In bytes */
> +	char 		*bs_string;
> +};
> +
> +/* Maximum number of signatures components in a simple volume */
> +# define PNFS_BLOCK_MAX_SIG_COMP 16
> +
> +struct pnfs_blk_sig {
> +	int 				si_num_comps;
> +	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
> +};
> +
>  enum exstate4 {
>  	PNFS_BLOCK_READWRITE_DATA	= 0,
>  	PNFS_BLOCK_READ_DATA		= 1,
> @@ -96,6 +144,8 @@ struct pnfs_block_layout {
>  	sector_t		bl_blocksize;  /* Server blocksize in sectors */
>  };
>  
> +#define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data))
> +
>  static inline struct pnfs_block_layout *
>  BLK_LO2EXT(struct pnfs_layout_hdr *lo)
>  {
> @@ -108,6 +158,51 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
>          return BLK_LO2EXT(lseg->pls_layout);
>  }
>  
> +uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
> +
> +#define BLK_READBUF(p, e, nbytes)  do { \
> +	p = blk_overflow(p, e, nbytes); \
> +	if (!p) { \
> +		printk(KERN_WARNING \
> +			"%s: reply buffer overflowed in line %d.\n", \
> +			__func__, __LINE__); \
> +		goto out_err; \
> +	} \
> +} while (0)
> +
> +#define READ32(x)         (x) = ntohl(*p++)
> +#define READ64(x)         do {                  \
> +	(x) = (uint64_t)ntohl(*p++) << 32;           \
> +	(x) |= ntohl(*p++);                     \
> +} while (0)
> +#define COPYMEM(x, nbytes) do {                 \
> +	memcpy((x), p, nbytes);                 \
> +	p += XDR_QUADLEN(nbytes);               \
> +} while (0)
> +#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_DEVICEID4_SIZE)
> +#define READ_SECTOR(x)     do { \
> +	READ64(tmp); \
> +	if (tmp & 0x1ff) { \
> +		printk(KERN_WARNING \
> +		       "%s Value not 512-byte aligned at line %d\n", \
> +		       __func__, __LINE__);			     \
> +		goto out_err; \
> +	} \
> +	(x) = tmp >> 9; \
> +} while (0)
> +
> +#define WRITE32(n)               do { \
> +	*p++ = htonl(n); \
> +	} while (0)
> +#define WRITE64(n)               do {                           \
> +	*p++ = htonl((uint32_t)((n) >> 32));			\
> +	*p++ = htonl((uint32_t)(n));				\
> +} while (0)
> +#define WRITEMEM(ptr, nbytes)     do {                          \
> +	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
> +} while (0)
> +#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_DEVICEID4_SIZE)
> +

please don't use these obsolete macros and rather directly use the
official xdr {en,de}coding helpers and be32_to_cpu.
we're trying to eradicate them from the nfs client

Benny

>  /* blocklayoutdev.c */
>  struct block_device *nfs4_blkdev_get(dev_t dev);
>  int nfs4_blkdev_put(struct block_device *bdev);

  reply	other threads:[~2011-06-14 15:37 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-12 23:43 [PATCH 00/34] pnfs block layout driver based on v3.0-rc2 Jim Rees
2011-06-12 23:43 ` [PATCH 01/34] pnfs: GETDEVICELIST Jim Rees
2011-06-12 23:43 ` [PATCH 02/34] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-12 23:43 ` [PATCH 03/34] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-13 14:36   ` Fred Isaman
2011-06-14 10:40     ` tao.peng
2011-06-14 13:58       ` Fred Isaman
2011-06-14 14:28       ` Benny Halevy
2011-06-12 23:43 ` [PATCH 04/34] pnfs: hook nfs_write_begin/end to allow layout driver manipulation Jim Rees
2011-06-13 14:44   ` Fred Isaman
2011-06-14 11:01     ` tao.peng
2011-06-14 14:05       ` Fred Isaman
2011-06-14 15:53         ` Peng Tao
2011-06-14 16:02           ` Fred Isaman
2011-06-12 23:43 ` [PATCH 05/34] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-06-14 15:01   ` Benny Halevy
2011-06-14 15:08     ` Peng Tao
2011-06-12 23:44 ` [PATCH 06/34] pnfs: cleanup_layoutcommit Jim Rees
2011-06-13 21:19   ` Benny Halevy
2011-06-14 15:16     ` Peng Tao
2011-06-14 15:10   ` Benny Halevy
2011-06-14 15:21     ` Peng Tao
2011-06-14 15:19   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 07/34] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-14 15:13   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 08/34] pnfsblock: blocklayout stub Jim Rees
2011-06-12 23:44 ` [PATCH 09/34] pnfsblock: layout alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 10/34] Add support for simple rpc pipefs Jim Rees
2011-06-12 23:44 ` [PATCH 11/34] pnfs-block: Add block device discovery pipe Jim Rees
2011-06-12 23:44 ` [PATCH 12/34] pnfsblock: basic extent code Jim Rees
2011-06-12 23:44 ` [PATCH 13/34] pnfsblock: add device operations Jim Rees
2011-06-12 23:44 ` [PATCH 14/34] pnfsblock: remove " Jim Rees
2011-06-12 23:44 ` [PATCH 15/34] pnfsblock: lseg alloc and free Jim Rees
2011-06-12 23:44 ` [PATCH 16/34] pnfsblock: merge extents Jim Rees
2011-06-12 23:44 ` [PATCH 17/34] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-14 15:36   ` Benny Halevy [this message]
2011-06-12 23:44 ` [PATCH 18/34] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-13 15:56   ` Fred Isaman
2011-06-12 23:44 ` [PATCH 19/34] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-12 23:44 ` [PATCH 20/34] pnfsblock: find_get_extent Jim Rees
2011-06-12 23:44 ` [PATCH 21/34] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-14 15:40   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 22/34] pnfsblock: merge rw extents Jim Rees
2011-06-12 23:44 ` [PATCH 23/34] pnfsblock: encode_layoutcommit Jim Rees
2011-06-14 15:44   ` Benny Halevy
2011-06-12 23:44 ` [PATCH 24/34] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-12 23:44 ` [PATCH 25/34] pnfsblock: bl_read_pagelist Jim Rees
2011-06-12 23:44 ` [PATCH 26/34] pnfsblock: write_begin Jim Rees
2011-06-12 23:44 ` [PATCH 27/34] pnfsblock: write_end Jim Rees
2011-06-12 23:44 ` [PATCH 28/34] pnfsblock: write_end_cleanup Jim Rees
2011-06-12 23:45 ` [PATCH 29/34] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-12 23:45 ` [PATCH 30/34] pnfsblock: bl_write_pagelist Jim Rees
2011-06-12 23:45 ` [PATCH 31/34] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-12 23:45 ` [PATCH 32/34] pnfsblock: Implement release_inval_marks Jim Rees
2011-06-12 23:45 ` [PATCH 33/34] Add configurable prefetch size for layoutget Jim Rees
2011-06-12 23:45 ` [PATCH 34/34] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-14 16:15   ` Benny Halevy
2011-06-14 16:22     ` Fred Isaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DF7800C.1010503@gmail.com \
    --to=bhalevy.lists@gmail.com \
    --cc=honey@citi.umich.edu \
    --cc=linux-nfs@vger.kernel.org \
    --cc=rees@umich.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).