All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] pNFS generic device ID cache version 3
@ 2010-04-26 16:18 andros
  2010-04-26 16:18 ` [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache andros
  0 siblings, 1 reply; 7+ messages in thread
From: andros @ 2010-04-26 16:18 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs


This patch set implements a shared RCU device ID cache servicing multiple
mounts of a single layout type per meta data server (struct nfs_client).

Device id's are referenced by layout segments which hold a pointer to the
nfs4_deviceid struct.

An hlist is used for the cache due to the large number of devices used by
the object and block layout drivers.

Note that nfs4_deviceid_hash() is the same as the NFSD opaque_hash function.
Perhaps they should be shared.

0001-SQUASHME-pnfs_submit-generic-device-ID-cache.patch
0002-SQUASHME-pnfs_submit-fix-multiple-mount-set_pnfs_lay.patch
0003-SQUASHME-pnfs-submit-file-layout-driver-generic-devi.patch

These patches apply to the 2.6.34-rc3 pnfs-submit branch.

Testing:
-------
CONFIG_NFS_V4_1 set:

NFSv4.1/pNFS mounts:
Connectathon tests pass against GFS2/pNFS with a single AUTH_SYS mount, a double
AUTH_SYS mount, and an AUTH_SYS and AUTH_GSS/KRB5 mount (which creates
two superblocks under a struct nfs_client and both share the device id cache).

NFSv4.0 mount;
Connectathon tests pass

Did not test with multiple device ID's. I will create a mulitple device ID
test with the pynfs file layout server.

CONFIG_NFS_V4_1 not set:

NFSv4.0 mount: Connectathon tests pass.

-->Andy


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache
  2010-04-26 16:18 [PATCH 0/3] pNFS generic device ID cache version 3 andros
@ 2010-04-26 16:18 ` andros
  2010-04-26 16:18   ` [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver andros
  2010-05-03 11:48   ` [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: " Benny Halevy
  0 siblings, 2 replies; 7+ messages in thread
From: andros @ 2010-04-26 16:18 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

A shared RCU device ID cache servicing multiple mounts of a single layout type
per meta data server (struct nfs_client).

Device IDs of type deviceid4 are required by all layout types, long lived and
read at each I/O.  They are added to the deviceid cache at first reference by
a layout via GETDEVICEINFO and (currently) are only removed at umount.

Reference count the device ID cache for each mounted file system
in the initialize_mountpoint layoutdriver_io_operation.

Dereference the device id cache on file system in the uninitialize_mountpoint
layoutdriver_io_operation called at umount

Each layoutsegment assigns a pointer and takes a reference to the
nfs4_deviceid structure identified by the layout deviceid.
This is so that there are no deviceid lookups for the normal I/O path.

Even thought required by all layouttypes, the deviceid is not exposed in the
LAYOUTGET4res but is instead hidden in the opaque layouttype4.

Therefore, each layout type alloc_lseg calls nfs4_set_layout_deviceid,
and free_lseg calls nfs4_unset_layout_deviceid.

While the file layout driver will not cache very many deviceid's, the object
and block layout drivers could cache 100's for a large installation.
Use an hlist.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/pnfs.c             |  167 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4_pnfs.h |   50 +++++++++++++
 include/linux/nfs_fs_sb.h |    1 +
 3 files changed, 218 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 91572aa..bf906cc 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -45,6 +45,7 @@
 #include <linux/nfs4.h>
 #include <linux/pnfs_xdr.h>
 #include <linux/nfs4_pnfs.h>
+#include <linux/rculist.h>
 
 #include "internal.h"
 #include "nfs4_fs.h"
@@ -2296,3 +2297,169 @@ struct pnfs_client_operations pnfs_ops = {
 
 EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
 EXPORT_SYMBOL(pnfs_register_layoutdriver);
+
+
+/* Device ID cache. Supports one layout type per struct nfs_client */
+int
+nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
+			 void (*free_callback)(struct kref *))
+{
+	struct nfs4_deviceid_cache *c;
+
+	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	spin_lock(&clp->cl_lock);
+	if (clp->cl_devid_cache != NULL) {
+		kref_get(&clp->cl_devid_cache->dc_kref);
+		spin_unlock(&clp->cl_lock);
+		dprintk("%s [kref [%d]]\n", __func__,
+			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
+		kfree(c);
+	} else {
+		int i;
+
+		spin_lock_init(&c->dc_lock);
+		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
+			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
+		kref_init(&c->dc_kref);
+		c->dc_free_callback = free_callback;
+		clp->cl_devid_cache = c;
+		spin_unlock(&clp->cl_lock);
+		dprintk("%s [new]\n", __func__);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
+
+void
+nfs4_init_deviceid_node(struct nfs4_deviceid *d)
+{
+	INIT_HLIST_NODE(&d->de_node);
+	kref_init(&d->de_kref);
+}
+EXPORT_SYMBOL(nfs4_init_deviceid_node);
+
+/* Called from layoutdriver_io_operations->alloc_lseg */
+void
+nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
+{
+	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
+	l->deviceid = d;
+	kref_get(&d->de_kref);
+}
+EXPORT_SYMBOL(nfs4_set_layout_deviceid);
+
+/* Called from layoutdriver_io_operations->free_lseg */
+void
+nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
+			   struct nfs4_deviceid *d,
+			   void (*free_callback)(struct kref *))
+{
+	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
+	l->deviceid = NULL;
+	kref_put(&d->de_kref, free_callback);
+}
+EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
+
+struct nfs4_deviceid *
+nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
+{
+	struct nfs4_deviceid *d;
+	struct hlist_node *n;
+	long hash = nfs4_deviceid_hash(id);
+
+	dprintk("--> %s hash %ld\n", __func__, hash);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
+		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
+			rcu_read_unlock();
+			return d;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
+EXPORT_SYMBOL(nfs4_find_deviceid);
+
+/*
+ * Add or kref_get a deviceid.
+ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
+ */
+struct nfs4_deviceid *
+nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
+{
+	struct nfs4_deviceid *d;
+	struct hlist_node *n;
+	long hash = nfs4_deviceid_hash(&new->de_id);
+
+	dprintk("--> %s hash %ld\n", __func__, hash);
+	spin_lock(&c->dc_lock);
+	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
+		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
+			spin_unlock(&c->dc_lock);
+			dprintk("%s [discard]\n", __func__);
+			c->dc_free_callback(&new->de_kref);
+			return d;
+		}
+	}
+	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
+	spin_unlock(&c->dc_lock);
+	dprintk("%s [new]\n", __func__);
+	return new;
+}
+EXPORT_SYMBOL(nfs4_add_deviceid);
+
+static int
+nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash)
+{
+	struct nfs4_deviceid *d;
+	struct hlist_node *n;
+
+	dprintk("--> %s hash %ld\n", __func__, hash);
+	spin_lock(&c->dc_lock);
+	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
+		hlist_del_rcu(&d->de_node);
+		spin_unlock(&c->dc_lock);
+		synchronize_rcu();
+		dprintk("%s [%d]\n", __func__,
+			atomic_read(&d->de_kref.refcount));
+		kref_put(&d->de_kref, c->dc_free_callback);
+		return 1;
+	}
+	spin_unlock(&c->dc_lock);
+	return 0;
+}
+
+static void
+nfs4_free_deviceid_cache(struct kref *kref)
+{
+	struct nfs4_deviceid_cache *cache =
+		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
+	int more;
+	long i;
+
+	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) {
+		more = 1;
+		while (more)
+			more = nfs4_remove_deviceid(cache, i);
+	}
+	kfree(cache);
+}
+
+void
+nfs4_put_deviceid_cache(struct nfs_client *clp)
+{
+	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
+	int refcount;
+
+	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
+	spin_lock(&clp->cl_lock);
+	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
+	if (refcount == 1)
+		clp->cl_devid_cache = NULL;
+	spin_unlock(&clp->cl_lock);
+	dprintk("%s [%d]\n", __func__, refcount);
+	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
+}
+EXPORT_SYMBOL(nfs4_put_deviceid_cache);
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 3caac60..3b7aeb7 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -106,6 +106,7 @@ struct pnfs_layout_segment {
 	struct kref kref;
 	bool valid;
 	struct pnfs_layout_type *layout;
+	struct nfs4_deviceid *deviceid;
 	u8 ld_data[];			/* layout driver private data */
 };
 
@@ -275,6 +276,55 @@ struct pnfs_devicelist {
 	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
 };
 
+/*
+ * Device ID RCU cache. A device ID is unique per client ID and layout type.
+ */
+#define NFS4_DEVICE_ID_HASH_BITS	5
+#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
+#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
+
+static inline u32
+nfs4_deviceid_hash(struct pnfs_deviceid *id)
+{
+	unsigned char *cptr = (unsigned char *)id->data;
+	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
+	u32 x = 0;
+
+	while (nbytes--) {
+		x *= 37;
+		x += *cptr++;
+	}
+	return x & NFS4_DEVICE_ID_HASH_MASK;
+}
+
+struct nfs4_deviceid_cache {
+	spinlock_t		dc_lock;
+	struct kref		dc_kref;
+	void			(*dc_free_callback)(struct kref *);
+	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
+};
+
+/* Device ID cache node */
+struct nfs4_deviceid {
+	struct hlist_node	de_node;
+	struct pnfs_deviceid	de_id;
+	struct kref		de_kref;
+};
+
+extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
+				void (*free_callback)(struct kref *));
+extern void nfs4_put_deviceid_cache(struct nfs_client *);
+extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
+extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_deviceid_cache *,
+				struct pnfs_deviceid *);
+extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid_cache *,
+				struct nfs4_deviceid *);
+extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
+				struct nfs4_deviceid *);
+extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *,
+				struct nfs4_deviceid *,
+				void (*free_callback)(struct kref *));
+
 /* pNFS client callback functions.
  * These operations allow the layout driver to access pNFS client
  * specific information or call pNFS client->server operations.
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 8522461..ef2e18e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -87,6 +87,7 @@ struct nfs_client {
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session; 	/* sharred session */
 	struct list_head	cl_lo_inodes;	/* Inodes having layouts */
+	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
 #endif /* CONFIG_NFS_V4_1 */
 
 #ifdef CONFIG_NFS_FSCACHE
-- 
1.6.6


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver
  2010-04-26 16:18 ` [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache andros
@ 2010-04-26 16:18   ` andros
  2010-04-26 16:18     ` [PATCH 3/3] SQUASHME pnfs-submit: file layout driver generic device ID cache andros
  2010-05-03 11:48   ` [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: " Benny Halevy
  1 sibling, 1 reply; 7+ messages in thread
From: andros @ 2010-04-26 16:18 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

The same struct nfs_server can enter set_pnfs_layoutdriver for mounts that
share a super block.  Don't initialize a pnfs mountpoint more than once.

Don't set the pnfs_curr_ld until the pnfs mountpoint initialization succeeds

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/pnfs.c |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bf906cc..a3e8231 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -215,20 +215,25 @@ set_pnfs_layoutdriver(struct super_block *sb, struct nfs_fh *fh, u32 id)
 	struct pnfs_mount_type *mt;
 	struct nfs_server *server = NFS_SB(sb);
 
+	if (server->pnfs_curr_ld)
+		return;
+
 	if (id > 0 && find_pnfs(id, &mod)) {
-		dprintk("%s: Setting pNFS module\n", __func__);
-		server->pnfs_curr_ld = mod->pnfs_ld_type;
-		mt = server->pnfs_curr_ld->ld_io_ops->initialize_mountpoint(
+		mt = mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
 			sb, fh);
 		if (!mt) {
 			printk(KERN_ERR "%s: Error initializing mount point "
 			       "for layout driver %u. ", __func__, id);
 			goto out_err;
 		}
-		/* Layout driver succeeded in initializing mountpoint */
+		/*
+		 * Layout driver succeeded in initializing mountpoint
+		 * and has taken a reference on the nfs_client cl_devid_cache
+		 */
+		server->pnfs_curr_ld = mod->pnfs_ld_type;
 		server->pnfs_mountid = mt;
-		/* Set the rpc_ops */
 		server->nfs_client->rpc_ops = &pnfs_v4_clientops;
+		dprintk("%s: pNFS module for %u set\n", __func__, id);
 		return;
 	}
 
-- 
1.6.6


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] SQUASHME pnfs-submit: file layout driver generic device ID cache
  2010-04-26 16:18   ` [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver andros
@ 2010-04-26 16:18     ` andros
  0 siblings, 0 replies; 7+ messages in thread
From: andros @ 2010-04-26 16:18 UTC (permalink / raw)
  To: pnfs; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

Replace the per superblock deviceid cache with the generic deviceid cache.

Embed struct nfs4_deviceid into struct nfs4_file_layout_dsaddr, the file layout
specific deviceid structure.  Provide a free_deviceid_callback.

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/client.c            |    1 +
 fs/nfs/nfs4filelayout.c    |   54 ++++++------
 fs/nfs/nfs4filelayout.h    |   12 +--
 fs/nfs/nfs4filelayoutdev.c |  199 +++++++++-----------------------------------
 4 files changed, 71 insertions(+), 195 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e13ccb7..887d71e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -38,6 +38,7 @@
 #include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/nfs4_pnfs.h>
 
 #include <asm/system.h>
 
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 0530b59..79b9df2 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -76,17 +76,11 @@ filelayout_initialize_mountpoint(struct super_block *sb, struct nfs_fh *fh)
 {
 	struct filelayout_mount_type *fl_mt;
 	struct pnfs_mount_type *mt;
-	int status;
 
 	fl_mt = kmalloc(sizeof(struct filelayout_mount_type), GFP_KERNEL);
 	if (!fl_mt)
 		goto error_ret;
 
-	/* Initialize nfs4 file layout specific device list structure */
-	fl_mt->hlist = kmalloc(sizeof(struct nfs4_pnfs_dev_hlist), GFP_KERNEL);
-	if (!fl_mt->hlist)
-		goto cleanup_fl_mt;
-
 	mt = kmalloc(sizeof(struct pnfs_mount_type), GFP_KERNEL);
 	if (!mt)
 		goto cleanup_fl_mt;
@@ -94,11 +88,11 @@ filelayout_initialize_mountpoint(struct super_block *sb, struct nfs_fh *fh)
 	fl_mt->fl_sb = sb;
 	mt->mountid = (void *)fl_mt;
 
-	status = nfs4_pnfs_devlist_init(fl_mt->hlist);
-	if (status)
+	if (nfs4_alloc_init_deviceid_cache(NFS_SB(sb)->nfs_client,
+					   nfs4_fl_free_deviceid_callback))
 		goto cleanup_mt;
 
-	dprintk("%s: device list has been initialized successfully\n",
+	dprintk("%s: deviceid cache has been initialized successfully\n",
 		__func__);
 	return mt;
 
@@ -106,11 +100,10 @@ cleanup_mt: ;
 	kfree(mt);
 
 cleanup_fl_mt: ;
-	kfree(fl_mt->hlist);
 	kfree(fl_mt);
 
 error_ret: ;
-	printk(KERN_WARNING "%s: device list could not be initialized\n",
+	printk(KERN_WARNING "%s: deviceid cache could not be initialized\n",
 		__func__);
 
 	return NULL;
@@ -123,13 +116,11 @@ filelayout_uninitialize_mountpoint(struct pnfs_mount_type *mountid)
 {
 	struct filelayout_mount_type *fl_mt = NULL;
 
+	dprintk("--> %s\n", __func__);
 	if (mountid) {
 		fl_mt = (struct filelayout_mount_type *)mountid->mountid;
-
-		if (fl_mt != NULL) {
-			nfs4_pnfs_devlist_destroy(fl_mt->hlist);
-			kfree(fl_mt);
-		}
+		nfs4_put_deviceid_cache(NFS_SB(fl_mt->fl_sb)->nfs_client);
+		kfree(fl_mt);
 		kfree(mountid);
 	}
 	return 0;
@@ -381,8 +372,7 @@ filelayout_check_layout(struct pnfs_layout_type *lo,
 	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
 
 	dprintk("--> %s\n", __func__);
-	dsaddr = nfs4_pnfs_device_item_find(FILE_MT(PNFS_INODE(lo))->hlist,
-					     &fl->dev_id);
+	dsaddr = nfs4_pnfs_device_item_find(nfss->nfs_client, &fl->dev_id);
 	if (dsaddr == NULL) {
 		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
 		if (dsaddr == NULL) {
@@ -421,13 +411,17 @@ filelayout_check_layout(struct pnfs_layout_type *lo,
 		dprintk("%s Stripe unit (%u) not aligned with rsize %u wsize %u\n",
 			__func__, fl->stripe_unit, nfss->ds_rsize, nfss->ds_wsize);
 	}
+
+	/* reference the device */
+	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
+
 	status = 0;
 out:
 	dprintk("--> %s returns %d\n", __func__, status);
 	return status;
 }
 
-static void filelayout_free_lseg(struct pnfs_layout_segment *lseg);
+static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
 static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
 
 /* Decode layout and store in layoutid.  Overwrite any existing layout
@@ -512,6 +506,7 @@ filelayout_alloc_lseg(struct pnfs_layout_type *layoutid,
 	struct pnfs_layout_segment *lseg;
 	int rc;
 
+	dprintk("--> %s\n", __func__);
 	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
 		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
 	if (!lseg)
@@ -520,7 +515,7 @@ filelayout_alloc_lseg(struct pnfs_layout_type *layoutid,
 	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
 
 	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
-		filelayout_free_lseg(lseg);
+		_filelayout_free_lseg(lseg);
 		lseg = NULL;
 	}
 	return lseg;
@@ -537,12 +532,21 @@ static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
 }
 
 static void
-filelayout_free_lseg(struct pnfs_layout_segment *lseg)
+_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 {
 	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
 	kfree(lseg);
 }
 
+static void
+filelayout_free_lseg(struct pnfs_layout_segment *lseg)
+{
+	dprintk("--> %s\n", __func__);
+	nfs4_unset_layout_deviceid(lseg, lseg->deviceid,
+				   nfs4_fl_free_deviceid_callback);
+	_filelayout_free_lseg(lseg);
+}
+
 /*
  * Allocate a new nfs_write_data struct and initialize
  */
@@ -618,12 +622,8 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
 	stripesz = filelayout_get_stripesize(layoutid);
 	dprintk("%s stripesize %Zd\n", __func__, stripesz);
 
-	dsaddr = nfs4_pnfs_device_item_find(FILE_MT(data->inode)->hlist,
-					     &nfslay->dev_id);
-	if (dsaddr == NULL) {
-		data->pdata.pnfs_error = -EIO;
-		goto out;
-	}
+	dsaddr = container_of(data->pdata.lseg->deviceid,
+			      struct nfs4_file_layout_dsaddr, deviceid);
 
 	INIT_LIST_HEAD(&head);
 	INIT_LIST_HEAD(&head2);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 12498a2..fbf307c 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -43,8 +43,7 @@ struct nfs4_pnfs_ds {
 };
 
 struct nfs4_file_layout_dsaddr {
-	struct hlist_node	hash_node;   /* nfs4_pnfs_dev_hlist dev_list */
-	struct pnfs_deviceid	dev_id;
+	struct nfs4_deviceid	deviceid;
 	u32 			stripe_count;
 	u8			*stripe_indices;
 	u32			ds_num;
@@ -86,15 +85,13 @@ struct nfs4_filelayout {
 
 struct filelayout_mount_type {
 	struct super_block *fl_sb;
-	struct nfs4_pnfs_dev_hlist *hlist;
 };
 
 extern struct pnfs_client_operations *pnfs_callback_ops;
 
+extern void nfs4_fl_free_deviceid_callback(struct kref *);
 extern void print_ds(struct nfs4_pnfs_ds *ds);
 char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
-int  nfs4_pnfs_devlist_init(struct nfs4_pnfs_dev_hlist *hlist);
-void nfs4_pnfs_devlist_destroy(struct nfs4_pnfs_dev_hlist *hlist);
 int nfs4_pnfs_dserver_get(struct pnfs_layout_segment *lseg,
 			  loff_t offset,
 			  size_t count,
@@ -102,9 +99,8 @@ int nfs4_pnfs_dserver_get(struct pnfs_layout_segment *lseg,
 u32 filelayout_dserver_get_index(loff_t offset,
 				 struct nfs4_file_layout_dsaddr *di,
 				 struct nfs4_filelayout_segment *layout);
-struct nfs4_file_layout_dsaddr *
-nfs4_pnfs_device_item_find(struct nfs4_pnfs_dev_hlist *hlist,
-			   struct pnfs_deviceid *dev_id);
+extern struct nfs4_file_layout_dsaddr *
+nfs4_pnfs_device_item_find(struct nfs_client *, struct pnfs_deviceid *dev_id);
 struct nfs4_file_layout_dsaddr *
 get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
 
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 045c204..61a3381 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -45,6 +45,7 @@
 
 #include <linux/utsname.h>
 #include <linux/vmalloc.h>
+#include <linux/nfs4_pnfs.h>
 #include <linux/pnfs_xdr.h>
 #include "nfs4filelayout.h"
 #include "internal.h"
@@ -98,42 +99,6 @@ deviceid_fmt(const struct pnfs_deviceid *dev_id)
 	return buf;
 }
 
-unsigned long
-_deviceid_hash(const struct pnfs_deviceid *dev_id)
-{
-	unsigned char *cptr = (unsigned char *)dev_id->data;
-	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
-	u64 x = 0;
-
-	while (nbytes--) {
-		x *= 37;
-		x += *cptr++;
-	}
-	return x & NFS4_PNFS_DEV_HASH_MASK;
-}
-
-/* Assumes lock is held */
-static inline struct nfs4_file_layout_dsaddr *
-_device_lookup(struct nfs4_pnfs_dev_hlist *hlist,
-	       const struct pnfs_deviceid *dev_id)
-{
-	unsigned long      hash;
-	struct hlist_node *np;
-
-	dprintk("_device_lookup: dev_id=%s\n", deviceid_fmt(dev_id));
-
-	hash = _deviceid_hash(dev_id);
-
-	hlist_for_each(np, &hlist->dev_list[hash]) {
-		struct nfs4_file_layout_dsaddr *dsaddr;
-		dsaddr = hlist_entry(np, struct nfs4_file_layout_dsaddr,
-				  hash_node);
-		if (!memcmp(&dsaddr->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE))
-			return dsaddr;
-	}
-	return NULL;
-}
-
 /* nfs4_ds_cache_lock is held */
 static inline struct nfs4_pnfs_ds *
 _data_server_lookup(u32 ip_addr, u32 port)
@@ -152,22 +117,6 @@ _data_server_lookup(u32 ip_addr, u32 port)
 	return NULL;
 }
 
-
-/* Assumes lock is held */
-static inline void
-_device_add(struct nfs4_pnfs_dev_hlist *hlist,
-	    struct nfs4_file_layout_dsaddr *dsaddr)
-{
-	unsigned long      hash;
-
-	dprintk("_device_add: dev_id=%s ds_list:\n",
-		deviceid_fmt(&dsaddr->dev_id));
-	print_ds_list(dsaddr);
-
-	hash = _deviceid_hash(&dsaddr->dev_id);
-	hlist_add_head(&dsaddr->hash_node, &hlist->dev_list[hash]);
-}
-
 /* Create an rpc to the data server defined in 'dev_list' */
 static int
 nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
@@ -269,118 +218,47 @@ out_put:
 static void
 destroy_ds(struct nfs4_pnfs_ds *ds)
 {
+	dprintk("--> %s\n", __func__);
+	print_ds(ds);
+
 	if (ds->ds_clp)
 		nfs_put_client(ds->ds_clp);
 	kfree(ds);
 }
 
-/* Assumes lock is NOT held */
 static void
-nfs4_pnfs_device_destroy(struct nfs4_file_layout_dsaddr *dsaddr,
-			 struct nfs4_pnfs_dev_hlist *hlist)
+nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 {
 	struct nfs4_pnfs_ds *ds;
-	LIST_HEAD(release);
 	int i;
 
-	if (!dsaddr)
-		return;
-
-	dprintk("%s: dev_id=%s\ndev_list:\n", __func__,
-		deviceid_fmt(&dsaddr->dev_id));
-	print_ds_list(dsaddr);
-
-	write_lock(&hlist->dev_lock);
-	hlist_del_init(&dsaddr->hash_node);
+	dprintk("%s: device id=%s\n", __func__,
+		deviceid_fmt(&dsaddr->deviceid.de_id));
 
 	for (i = 0; i < dsaddr->ds_num; i++) {
 		ds = dsaddr->ds_list[i];
 		if (ds != NULL) {
-			/* if we are last user - move to release list */
 			if (atomic_dec_and_lock(&ds->ds_count,
 						&nfs4_ds_cache_lock)) {
 				list_del_init(&ds->ds_node);
 				spin_unlock(&nfs4_ds_cache_lock);
-				list_add(&ds->ds_node, &release);
+				destroy_ds(ds);
 			}
 		}
 	}
-	write_unlock(&hlist->dev_lock);
-	while (!list_empty(&release)) {
-		ds = list_entry(release.next, struct nfs4_pnfs_ds, ds_node);
-		list_del(&ds->ds_node);
-		destroy_ds(ds);
-	}
+	kfree(dsaddr->stripe_indices);
 	kfree(dsaddr);
 }
 
-int
-nfs4_pnfs_devlist_init(struct nfs4_pnfs_dev_hlist *hlist)
-{
-	int i;
-
-	rwlock_init(&hlist->dev_lock);
-
-	for (i = 0; i < NFS4_PNFS_DEV_HASH_SIZE; i++) {
-		INIT_HLIST_HEAD(&hlist->dev_list[i]);
-	}
-
-	return 0;
-}
-
-/* De-alloc all devices for a mount point.  This is called in
- * nfs4_kill_super.
- */
 void
-nfs4_pnfs_devlist_destroy(struct nfs4_pnfs_dev_hlist *hlist)
+nfs4_fl_free_deviceid_callback(struct kref *kref)
 {
-	int i;
+	struct nfs4_deviceid *device =
+		container_of(kref, struct nfs4_deviceid, de_kref);
+	struct nfs4_file_layout_dsaddr *dsaddr =
+		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
 
-	if (hlist == NULL)
-		return;
-
-	/* No lock held, as synchronization should occur at upper levels */
-	for (i = 0; i < NFS4_PNFS_DEV_HASH_SIZE; i++) {
-		struct hlist_node *np, *next;
-
-		hlist_for_each_safe(np, next, &hlist->dev_list[i]) {
-			struct nfs4_file_layout_dsaddr *dsaddr;
-			dsaddr = hlist_entry(np,
-					     struct nfs4_file_layout_dsaddr,
-					     hash_node);
-			/* nfs4_pnfs_device_destroy grabs hlist->dev_lock */
-			nfs4_pnfs_device_destroy(dsaddr, hlist);
-		}
-	}
-}
-
-/*
- * Add the device to the list of available devices for this mount point.
- * The * rpc client is created during first I/O.
- */
-static int
-nfs4_pnfs_device_add(struct filelayout_mount_type *mt,
-		     struct nfs4_file_layout_dsaddr *dsaddr)
-{
-	struct nfs4_file_layout_dsaddr *tmp_dsaddr;
-	struct nfs4_pnfs_dev_hlist *hlist = mt->hlist;
-
-	dprintk("nfs4_pnfs_device_add\n");
-
-	/* Write lock, do lookup again, and then add device */
-	write_lock(&hlist->dev_lock);
-	tmp_dsaddr = _device_lookup(hlist, &dsaddr->dev_id);
-	if (tmp_dsaddr == NULL)
-		_device_add(hlist, dsaddr);
-	write_unlock(&hlist->dev_lock);
-
-	/* Cleanup, if device was recently added */
-	if (tmp_dsaddr != NULL) {
-		dprintk(" device found, not adding (after creation)\n");
-		nfs4_pnfs_device_destroy(dsaddr, hlist);
-	}
-
-	return 0;
+	nfs4_fl_free_deviceid(dsaddr);
 }
 
 static void
@@ -514,7 +392,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 	dsaddr->stripe_count = cnt;
 	dsaddr->ds_num = num;
 
-	memcpy(&dsaddr->dev_id, &pdev->dev_id, NFS4_PNFS_DEVICEID4_SIZE);
+	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
+	       NFS4_PNFS_DEVICEID4_SIZE);
 
 	/* Go back an read stripe indices */
 	p = indicesp;
@@ -553,37 +432,40 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
 			}
 		}
 	}
+	nfs4_init_deviceid_node(&dsaddr->deviceid);
+
 	return dsaddr;
 
 out_err_free:
-	nfs4_pnfs_device_destroy(dsaddr, FILE_MT(ino)->hlist);
+	nfs4_fl_free_deviceid(dsaddr);
 out_err:
 	dprintk("%s ERROR: returning NULL\n", __func__);
 	return NULL;
 }
 
-/* Decode the opaque device specified in 'dev'
- * and add it to the list of available devices for this
- * mount point.
- * Must at some point be followed up with nfs4_pnfs_device_destroy
+/*
+ * Decode the opaque device specified in 'dev'
+ * and add it to the list of available devices.
+ * If the deviceid is already cached, nfs4_add_deviceid will return
+ * a pointer to the cached struct and throw away the new.
  */
 static struct nfs4_file_layout_dsaddr*
 decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
 {
 	struct nfs4_file_layout_dsaddr *dsaddr;
+	struct nfs4_deviceid *d;
 
 	dsaddr = decode_device(inode, dev);
 	if (!dsaddr) {
-		printk(KERN_WARNING "%s: Could not decode device\n",
+		printk(KERN_WARNING "%s: Could not decode or add device\n",
 			__func__);
-		nfs4_pnfs_device_destroy(dsaddr, FILE_MT(inode)->hlist);
 		return NULL;
 	}
 
-	if (nfs4_pnfs_device_add(FILE_MT(inode), dsaddr))
-		return NULL;
+	d = nfs4_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
+			      &dsaddr->deviceid);
 
-	return dsaddr;
+	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
 }
 
 /* Retrieve the information for dev_id, add it to the list
@@ -658,16 +540,15 @@ out_free:
 }
 
 struct nfs4_file_layout_dsaddr *
-nfs4_pnfs_device_item_find(struct nfs4_pnfs_dev_hlist *hlist,
-			   struct pnfs_deviceid *dev_id)
+nfs4_pnfs_device_item_find(struct nfs_client *clp, struct pnfs_deviceid *id)
 {
-	struct nfs4_file_layout_dsaddr *dsaddr;
+	struct nfs4_deviceid *d;
 
-	read_lock(&hlist->dev_lock);
-	dsaddr = _device_lookup(hlist, dev_id);
-	read_unlock(&hlist->dev_lock);
-
-	return dsaddr;
+	d = nfs4_find_deviceid(clp->cl_devid_cache, id);
+	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
+		deviceid_fmt(id), d);
+	return (d == NULL) ? NULL :
+		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
 }
 
 /* Want res = ((offset / layout->stripe_unit) % dsaddr->stripe_count)
@@ -705,10 +586,8 @@ nfs4_pnfs_dserver_get(struct pnfs_layout_segment *lseg,
 	if (!layout)
 		return 1;
 
-	dsaddr = nfs4_pnfs_device_item_find(FILE_MT(inode)->hlist,
-					    &layout->dev_id);
-	if (dsaddr == NULL)
-		return 1;
+	dsaddr = container_of(lseg->deviceid, struct nfs4_file_layout_dsaddr,
+			      deviceid);
 
 	stripe_idx = filelayout_dserver_get_index(offset, dsaddr, layout);
 
-- 
1.6.6


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache
  2010-04-26 16:18 ` [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache andros
  2010-04-26 16:18   ` [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver andros
@ 2010-05-03 11:48   ` Benny Halevy
  2010-05-03 13:57     ` William A. (Andy) Adamson
  1 sibling, 1 reply; 7+ messages in thread
From: Benny Halevy @ 2010-05-03 11:48 UTC (permalink / raw)
  To: andros; +Cc: linux-nfs

On Apr. 26, 2010, 19:18 +0300, andros@netapp.com wrote:
> From: Andy Adamson <andros@netapp.com>
> 
> A shared RCU device ID cache servicing multiple mounts of a single layout type
> per meta data server (struct nfs_client).
> 
> Device IDs of type deviceid4 are required by all layout types, long lived and
> read at each I/O.  They are added to the deviceid cache at first reference by
> a layout via GETDEVICEINFO and (currently) are only removed at umount.
> 
> Reference count the device ID cache for each mounted file system
> in the initialize_mountpoint layoutdriver_io_operation.
> 
> Dereference the device id cache on file system in the uninitialize_mountpoint
> layoutdriver_io_operation called at umount
> 
> Each layoutsegment assigns a pointer and takes a reference to the
> nfs4_deviceid structure identified by the layout deviceid.
> This is so that there are no deviceid lookups for the normal I/O path.
> 
> Even thought required by all layouttypes, the deviceid is not exposed in the
> LAYOUTGET4res but is instead hidden in the opaque layouttype4.
> 
> Therefore, each layout type alloc_lseg calls nfs4_set_layout_deviceid,
> and free_lseg calls nfs4_unset_layout_deviceid.
> 
> While the file layout driver will not cache very many deviceid's, the object
> and block layout drivers could cache 100's for a large installation.
> Use an hlist.
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> ---
>  fs/nfs/pnfs.c             |  167 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/nfs4_pnfs.h |   50 +++++++++++++
>  include/linux/nfs_fs_sb.h |    1 +
>  3 files changed, 218 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 91572aa..bf906cc 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -45,6 +45,7 @@
>  #include <linux/nfs4.h>
>  #include <linux/pnfs_xdr.h>
>  #include <linux/nfs4_pnfs.h>
> +#include <linux/rculist.h>
>  
>  #include "internal.h"
>  #include "nfs4_fs.h"
> @@ -2296,3 +2297,169 @@ struct pnfs_client_operations pnfs_ops = {
>  
>  EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
>  EXPORT_SYMBOL(pnfs_register_layoutdriver);
> +
> +
> +/* Device ID cache. Supports one layout type per struct nfs_client */
> +int
> +nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
> +			 void (*free_callback)(struct kref *))
> +{
> +	struct nfs4_deviceid_cache *c;
> +
> +	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
> +	if (!c)
> +		return -ENOMEM;
> +	spin_lock(&clp->cl_lock);
> +	if (clp->cl_devid_cache != NULL) {
> +		kref_get(&clp->cl_devid_cache->dc_kref);
> +		spin_unlock(&clp->cl_lock);
> +		dprintk("%s [kref [%d]]\n", __func__,
> +			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
> +		kfree(c);
> +	} else {
> +		int i;
> +
> +		spin_lock_init(&c->dc_lock);
> +		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
> +			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
> +		kref_init(&c->dc_kref);
> +		c->dc_free_callback = free_callback;
> +		clp->cl_devid_cache = c;
> +		spin_unlock(&clp->cl_lock);
> +		dprintk("%s [new]\n", __func__);
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
> +
> +void
> +nfs4_init_deviceid_node(struct nfs4_deviceid *d)
> +{
> +	INIT_HLIST_NODE(&d->de_node);
> +	kref_init(&d->de_kref);
> +}
> +EXPORT_SYMBOL(nfs4_init_deviceid_node);
> +
> +/* Called from layoutdriver_io_operations->alloc_lseg */
> +void
> +nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
> +{
> +	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
> +	l->deviceid = d;
> +	kref_get(&d->de_kref);
> +}
> +EXPORT_SYMBOL(nfs4_set_layout_deviceid);
> +
> +/* Called from layoutdriver_io_operations->free_lseg */
> +void
> +nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
> +			   struct nfs4_deviceid *d,
> +			   void (*free_callback)(struct kref *))
> +{
> +	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
> +	l->deviceid = NULL;
> +	kref_put(&d->de_kref, free_callback);
> +}
> +EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
> +
> +struct nfs4_deviceid *
> +nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
> +{
> +	struct nfs4_deviceid *d;
> +	struct hlist_node *n;
> +	long hash = nfs4_deviceid_hash(id);
> +
> +	dprintk("--> %s hash %ld\n", __func__, hash);
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
> +		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
> +			rcu_read_unlock();
> +			return d;
> +		}
> +	}
> +	rcu_read_unlock();
> +	return NULL;
> +}
> +EXPORT_SYMBOL(nfs4_find_deviceid);
> +
> +/*
> + * Add or kref_get a deviceid.
> + * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
> + */
> +struct nfs4_deviceid *
> +nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
> +{
> +	struct nfs4_deviceid *d;
> +	struct hlist_node *n;
> +	long hash = nfs4_deviceid_hash(&new->de_id);
> +
> +	dprintk("--> %s hash %ld\n", __func__, hash);
> +	spin_lock(&c->dc_lock);
> +	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
> +		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
> +			spin_unlock(&c->dc_lock);
> +			dprintk("%s [discard]\n", __func__);
> +			c->dc_free_callback(&new->de_kref);
> +			return d;
> +		}
> +	}
> +	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
> +	spin_unlock(&c->dc_lock);
> +	dprintk("%s [new]\n", __func__);
> +	return new;
> +}
> +EXPORT_SYMBOL(nfs4_add_deviceid);
> +
> +static int
> +nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash)
> +{
> +	struct nfs4_deviceid *d;
> +	struct hlist_node *n;
> +
> +	dprintk("--> %s hash %ld\n", __func__, hash);
> +	spin_lock(&c->dc_lock);
> +	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
> +		hlist_del_rcu(&d->de_node);
> +		spin_unlock(&c->dc_lock);
> +		synchronize_rcu();
> +		dprintk("%s [%d]\n", __func__,
> +			atomic_read(&d->de_kref.refcount));
> +		kref_put(&d->de_kref, c->dc_free_callback);
> +		return 1;
> +	}
> +	spin_unlock(&c->dc_lock);
> +	return 0;
> +}
> +
> +static void
> +nfs4_free_deviceid_cache(struct kref *kref)
> +{
> +	struct nfs4_deviceid_cache *cache =
> +		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
> +	int more;
> +	long i;
> +
> +	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) {
> +		more = 1;
> +		while (more)
> +			more = nfs4_remove_deviceid(cache, i);

Andy, this can be simplified to

		while (nfs4_remove_deviceid(cache, i))
			;

If ok with you, I'll make this change upon merging.

Benny

> +	}
> +	kfree(cache);
> +}
> +
> +void
> +nfs4_put_deviceid_cache(struct nfs_client *clp)
> +{
> +	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
> +	int refcount;
> +
> +	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
> +	spin_lock(&clp->cl_lock);
> +	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
> +	if (refcount == 1)
> +		clp->cl_devid_cache = NULL;
> +	spin_unlock(&clp->cl_lock);
> +	dprintk("%s [%d]\n", __func__, refcount);
> +	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
> +}
> +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 3caac60..3b7aeb7 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -106,6 +106,7 @@ struct pnfs_layout_segment {
>  	struct kref kref;
>  	bool valid;
>  	struct pnfs_layout_type *layout;
> +	struct nfs4_deviceid *deviceid;
>  	u8 ld_data[];			/* layout driver private data */
>  };
>  
> @@ -275,6 +276,55 @@ struct pnfs_devicelist {
>  	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
>  };
>  
> +/*
> + * Device ID RCU cache. A device ID is unique per client ID and layout type.
> + */
> +#define NFS4_DEVICE_ID_HASH_BITS	5
> +#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
> +#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
> +
> +static inline u32
> +nfs4_deviceid_hash(struct pnfs_deviceid *id)
> +{
> +	unsigned char *cptr = (unsigned char *)id->data;
> +	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
> +	u32 x = 0;
> +
> +	while (nbytes--) {
> +		x *= 37;
> +		x += *cptr++;
> +	}
> +	return x & NFS4_DEVICE_ID_HASH_MASK;
> +}
> +
> +struct nfs4_deviceid_cache {
> +	spinlock_t		dc_lock;
> +	struct kref		dc_kref;
> +	void			(*dc_free_callback)(struct kref *);
> +	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
> +};
> +
> +/* Device ID cache node */
> +struct nfs4_deviceid {
> +	struct hlist_node	de_node;
> +	struct pnfs_deviceid	de_id;
> +	struct kref		de_kref;
> +};
> +
> +extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
> +				void (*free_callback)(struct kref *));
> +extern void nfs4_put_deviceid_cache(struct nfs_client *);
> +extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
> +extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_deviceid_cache *,
> +				struct pnfs_deviceid *);
> +extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid_cache *,
> +				struct nfs4_deviceid *);
> +extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
> +				struct nfs4_deviceid *);
> +extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *,
> +				struct nfs4_deviceid *,
> +				void (*free_callback)(struct kref *));
> +
>  /* pNFS client callback functions.
>   * These operations allow the layout driver to access pNFS client
>   * specific information or call pNFS client->server operations.
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 8522461..ef2e18e 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -87,6 +87,7 @@ struct nfs_client {
>  	u32			cl_exchange_flags;
>  	struct nfs4_session	*cl_session; 	/* sharred session */
>  	struct list_head	cl_lo_inodes;	/* Inodes having layouts */
> +	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
>  #endif /* CONFIG_NFS_V4_1 */
>  
>  #ifdef CONFIG_NFS_FSCACHE


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache
  2010-05-03 11:48   ` [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: " Benny Halevy
@ 2010-05-03 13:57     ` William A. (Andy) Adamson
  0 siblings, 0 replies; 7+ messages in thread
From: William A. (Andy) Adamson @ 2010-05-03 13:57 UTC (permalink / raw)
  To: Benny Halevy; +Cc: linux-nfs

On Mon, May 3, 2010 at 7:48 AM, Benny Halevy <bhalevy@panasas.com> wrot=
e:
> On Apr. 26, 2010, 19:18 +0300, andros@netapp.com wrote:
>> From: Andy Adamson <andros@netapp.com>
>>
>> A shared RCU device ID cache servicing multiple mounts of a single l=
ayout type
>> per meta data server (struct nfs_client).
>>
>> Device IDs of type deviceid4 are required by all layout types, long =
lived and
>> read at each I/O. =A0They are added to the deviceid cache at first r=
eference by
>> a layout via GETDEVICEINFO and (currently) are only removed at umoun=
t.
>>
>> Reference count the device ID cache for each mounted file system
>> in the initialize_mountpoint layoutdriver_io_operation.
>>
>> Dereference the device id cache on file system in the uninitialize_m=
ountpoint
>> layoutdriver_io_operation called at umount
>>
>> Each layoutsegment assigns a pointer and takes a reference to the
>> nfs4_deviceid structure identified by the layout deviceid.
>> This is so that there are no deviceid lookups for the normal I/O pat=
h.
>>
>> Even thought required by all layouttypes, the deviceid is not expose=
d in the
>> LAYOUTGET4res but is instead hidden in the opaque layouttype4.
>>
>> Therefore, each layout type alloc_lseg calls nfs4_set_layout_devicei=
d,
>> and free_lseg calls nfs4_unset_layout_deviceid.
>>
>> While the file layout driver will not cache very many deviceid's, th=
e object
>> and block layout drivers could cache 100's for a large installation.
>> Use an hlist.
>>
>> Signed-off-by: Andy Adamson <andros@netapp.com>
>> ---
>> =A0fs/nfs/pnfs.c =A0 =A0 =A0 =A0 =A0 =A0 | =A0167 ++++++++++++++++++=
+++++++++++++++++++++++++++
>> =A0include/linux/nfs4_pnfs.h | =A0 50 +++++++++++++
>> =A0include/linux/nfs_fs_sb.h | =A0 =A01 +
>> =A03 files changed, 218 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 91572aa..bf906cc 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -45,6 +45,7 @@
>> =A0#include <linux/nfs4.h>
>> =A0#include <linux/pnfs_xdr.h>
>> =A0#include <linux/nfs4_pnfs.h>
>> +#include <linux/rculist.h>
>>
>> =A0#include "internal.h"
>> =A0#include "nfs4_fs.h"
>> @@ -2296,3 +2297,169 @@ struct pnfs_client_operations pnfs_ops =3D {
>>
>> =A0EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
>> =A0EXPORT_SYMBOL(pnfs_register_layoutdriver);
>> +
>> +
>> +/* Device ID cache. Supports one layout type per struct nfs_client =
*/
>> +int
>> +nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0void (*free_callback)(s=
truct kref *))
>> +{
>> + =A0 =A0 struct nfs4_deviceid_cache *c;
>> +
>> + =A0 =A0 c =3D kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERN=
EL);
>> + =A0 =A0 if (!c)
>> + =A0 =A0 =A0 =A0 =A0 =A0 return -ENOMEM;
>> + =A0 =A0 spin_lock(&clp->cl_lock);
>> + =A0 =A0 if (clp->cl_devid_cache !=3D NULL) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 kref_get(&clp->cl_devid_cache->dc_kref);
>> + =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&clp->cl_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s [kref [%d]]\n", __func__,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 atomic_read(&clp->cl_devid=
_cache->dc_kref.refcount));
>> + =A0 =A0 =A0 =A0 =A0 =A0 kfree(c);
>> + =A0 =A0 } else {
>> + =A0 =A0 =A0 =A0 =A0 =A0 int i;
>> +
>> + =A0 =A0 =A0 =A0 =A0 =A0 spin_lock_init(&c->dc_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 for (i =3D 0; i < NFS4_DEVICE_ID_HASH_SIZE=
 ; i++)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 INIT_HLIST_HEAD(&c->dc_dev=
iceids[i]);
>> + =A0 =A0 =A0 =A0 =A0 =A0 kref_init(&c->dc_kref);
>> + =A0 =A0 =A0 =A0 =A0 =A0 c->dc_free_callback =3D free_callback;
>> + =A0 =A0 =A0 =A0 =A0 =A0 clp->cl_devid_cache =3D c;
>> + =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&clp->cl_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s [new]\n", __func__);
>> + =A0 =A0 }
>> + =A0 =A0 return 0;
>> +}
>> +EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
>> +
>> +void
>> +nfs4_init_deviceid_node(struct nfs4_deviceid *d)
>> +{
>> + =A0 =A0 INIT_HLIST_NODE(&d->de_node);
>> + =A0 =A0 kref_init(&d->de_kref);
>> +}
>> +EXPORT_SYMBOL(nfs4_init_deviceid_node);
>> +
>> +/* Called from layoutdriver_io_operations->alloc_lseg */
>> +void
>> +nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4=
_deviceid *d)
>> +{
>> + =A0 =A0 dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.ref=
count));
>> + =A0 =A0 l->deviceid =3D d;
>> + =A0 =A0 kref_get(&d->de_kref);
>> +}
>> +EXPORT_SYMBOL(nfs4_set_layout_deviceid);
>> +
>> +/* Called from layoutdriver_io_operations->free_lseg */
>> +void
>> +nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs4_devicei=
d *d,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0void (*free_callbac=
k)(struct kref *))
>> +{
>> + =A0 =A0 dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.ref=
count));
>> + =A0 =A0 l->deviceid =3D NULL;
>> + =A0 =A0 kref_put(&d->de_kref, free_callback);
>> +}
>> +EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
>> +
>> +struct nfs4_deviceid *
>> +nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_devic=
eid *id)
>> +{
>> + =A0 =A0 struct nfs4_deviceid *d;
>> + =A0 =A0 struct hlist_node *n;
>> + =A0 =A0 long hash =3D nfs4_deviceid_hash(id);
>> +
>> + =A0 =A0 dprintk("--> %s hash %ld\n", __func__, hash);
>> + =A0 =A0 rcu_read_lock();
>> + =A0 =A0 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_=
node) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVIC=
EID4_SIZE)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rcu_read_unlock();
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return d;
>> + =A0 =A0 =A0 =A0 =A0 =A0 }
>> + =A0 =A0 }
>> + =A0 =A0 rcu_read_unlock();
>> + =A0 =A0 return NULL;
>> +}
>> +EXPORT_SYMBOL(nfs4_find_deviceid);
>> +
>> +/*
>> + * Add or kref_get a deviceid.
>> + * GETDEVICEINFOs for same deviceid can race. If deviceid is found,=
 discard new
>> + */
>> +struct nfs4_deviceid *
>> +nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_device=
id *new)
>> +{
>> + =A0 =A0 struct nfs4_deviceid *d;
>> + =A0 =A0 struct hlist_node *n;
>> + =A0 =A0 long hash =3D nfs4_deviceid_hash(&new->de_id);
>> +
>> + =A0 =A0 dprintk("--> %s hash %ld\n", __func__, hash);
>> + =A0 =A0 spin_lock(&c->dc_lock);
>> + =A0 =A0 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_=
node) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (!memcmp(&d->de_id, &new->de_id, NFS4_P=
NFS_DEVICEID4_SIZE)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&c->dc_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s [discard]\n", =
__func__);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 c->dc_free_callback(&new->=
de_kref);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return d;
>> + =A0 =A0 =A0 =A0 =A0 =A0 }
>> + =A0 =A0 }
>> + =A0 =A0 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
>> + =A0 =A0 spin_unlock(&c->dc_lock);
>> + =A0 =A0 dprintk("%s [new]\n", __func__);
>> + =A0 =A0 return new;
>> +}
>> +EXPORT_SYMBOL(nfs4_add_deviceid);
>> +
>> +static int
>> +nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash)
>> +{
>> + =A0 =A0 struct nfs4_deviceid *d;
>> + =A0 =A0 struct hlist_node *n;
>> +
>> + =A0 =A0 dprintk("--> %s hash %ld\n", __func__, hash);
>> + =A0 =A0 spin_lock(&c->dc_lock);
>> + =A0 =A0 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_=
node) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 hlist_del_rcu(&d->de_node);
>> + =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&c->dc_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 synchronize_rcu();
>> + =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s [%d]\n", __func__,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 atomic_read(&d->de_kref.re=
fcount));
>> + =A0 =A0 =A0 =A0 =A0 =A0 kref_put(&d->de_kref, c->dc_free_callback)=
;
>> + =A0 =A0 =A0 =A0 =A0 =A0 return 1;
>> + =A0 =A0 }
>> + =A0 =A0 spin_unlock(&c->dc_lock);
>> + =A0 =A0 return 0;
>> +}
>> +
>> +static void
>> +nfs4_free_deviceid_cache(struct kref *kref)
>> +{
>> + =A0 =A0 struct nfs4_deviceid_cache *cache =3D
>> + =A0 =A0 =A0 =A0 =A0 =A0 container_of(kref, struct nfs4_deviceid_ca=
che, dc_kref);
>> + =A0 =A0 int more;
>> + =A0 =A0 long i;
>> +
>> + =A0 =A0 for (i =3D 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 more =3D 1;
>> + =A0 =A0 =A0 =A0 =A0 =A0 while (more)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 more =3D nfs4_remove_devic=
eid(cache, i);
>
> Andy, this can be simplified to
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0while (nfs4_remove_deviceid(cache, i))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0;
>
> If ok with you, I'll make this change upon merging.

Yes - looks fine, thanks.

-->Andy

>
> Benny
>
>> + =A0 =A0 }
>> + =A0 =A0 kfree(cache);
>> +}
>> +
>> +void
>> +nfs4_put_deviceid_cache(struct nfs_client *clp)
>> +{
>> + =A0 =A0 struct nfs4_deviceid_cache *tmp =3D clp->cl_devid_cache;
>> + =A0 =A0 int refcount;
>> +
>> + =A0 =A0 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_de=
vid_cache);
>> + =A0 =A0 spin_lock(&clp->cl_lock);
>> + =A0 =A0 refcount =3D atomic_read(&clp->cl_devid_cache->dc_kref.ref=
count);
>> + =A0 =A0 if (refcount =3D=3D 1)
>> + =A0 =A0 =A0 =A0 =A0 =A0 clp->cl_devid_cache =3D NULL;
>> + =A0 =A0 spin_unlock(&clp->cl_lock);
>> + =A0 =A0 dprintk("%s [%d]\n", __func__, refcount);
>> + =A0 =A0 kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
>> +}
>> +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
>> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
>> index 3caac60..3b7aeb7 100644
>> --- a/include/linux/nfs4_pnfs.h
>> +++ b/include/linux/nfs4_pnfs.h
>> @@ -106,6 +106,7 @@ struct pnfs_layout_segment {
>> =A0 =A0 =A0 struct kref kref;
>> =A0 =A0 =A0 bool valid;
>> =A0 =A0 =A0 struct pnfs_layout_type *layout;
>> + =A0 =A0 struct nfs4_deviceid *deviceid;
>> =A0 =A0 =A0 u8 ld_data[]; =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* lay=
out driver private data */
>> =A0};
>>
>> @@ -275,6 +276,55 @@ struct pnfs_devicelist {
>> =A0 =A0 =A0 struct pnfs_deviceid =A0 =A0dev_id[NFS4_PNFS_GETDEVLIST_=
MAXNUM];
>> =A0};
>>
>> +/*
>> + * Device ID RCU cache. A device ID is unique per client ID and lay=
out type.
>> + */
>> +#define NFS4_DEVICE_ID_HASH_BITS =A0 =A0 5
>> +#define NFS4_DEVICE_ID_HASH_SIZE =A0 =A0 (1 << NFS4_DEVICE_ID_HASH_=
BITS)
>> +#define NFS4_DEVICE_ID_HASH_MASK =A0 =A0 (NFS4_DEVICE_ID_HASH_SIZE =
- 1)
>> +
>> +static inline u32
>> +nfs4_deviceid_hash(struct pnfs_deviceid *id)
>> +{
>> + =A0 =A0 unsigned char *cptr =3D (unsigned char *)id->data;
>> + =A0 =A0 unsigned int nbytes =3D NFS4_PNFS_DEVICEID4_SIZE;
>> + =A0 =A0 u32 x =3D 0;
>> +
>> + =A0 =A0 while (nbytes--) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 x *=3D 37;
>> + =A0 =A0 =A0 =A0 =A0 =A0 x +=3D *cptr++;
>> + =A0 =A0 }
>> + =A0 =A0 return x & NFS4_DEVICE_ID_HASH_MASK;
>> +}
>> +
>> +struct nfs4_deviceid_cache {
>> + =A0 =A0 spinlock_t =A0 =A0 =A0 =A0 =A0 =A0 =A0dc_lock;
>> + =A0 =A0 struct kref =A0 =A0 =A0 =A0 =A0 =A0 dc_kref;
>> + =A0 =A0 void =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(*dc_free_call=
back)(struct kref *);
>> + =A0 =A0 struct hlist_head =A0 =A0 =A0 dc_deviceids[NFS4_DEVICE_ID_=
HASH_SIZE];
>> +};
>> +
>> +/* Device ID cache node */
>> +struct nfs4_deviceid {
>> + =A0 =A0 struct hlist_node =A0 =A0 =A0 de_node;
>> + =A0 =A0 struct pnfs_deviceid =A0 =A0de_id;
>> + =A0 =A0 struct kref =A0 =A0 =A0 =A0 =A0 =A0 de_kref;
>> +};
>> +
>> +extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void (*fre=
e_callback)(struct kref *));
>> +extern void nfs4_put_deviceid_cache(struct nfs_client *);
>> +extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
>> +extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_devicei=
d_cache *,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct pnf=
s_deviceid *);
>> +extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid=
_cache *,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs=
4_deviceid *);
>> +extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs=
4_deviceid *);
>> +extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *=
,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs=
4_deviceid *,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void (*fre=
e_callback)(struct kref *));
>> +
>> =A0/* pNFS client callback functions.
>> =A0 * These operations allow the layout driver to access pNFS client
>> =A0 * specific information or call pNFS client->server operations.
>> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
>> index 8522461..ef2e18e 100644
>> --- a/include/linux/nfs_fs_sb.h
>> +++ b/include/linux/nfs_fs_sb.h
>> @@ -87,6 +87,7 @@ struct nfs_client {
>> =A0 =A0 =A0 u32 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 cl_exchange_=
flags;
>> =A0 =A0 =A0 struct nfs4_session =A0 =A0 *cl_session; =A0 =A0/* sharr=
ed session */
>> =A0 =A0 =A0 struct list_head =A0 =A0 =A0 =A0cl_lo_inodes; =A0 /* Ino=
des having layouts */
>> + =A0 =A0 struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS device=
id cache */
>> =A0#endif /* CONFIG_NFS_V4_1 */
>>
>> =A0#ifdef CONFIG_NFS_FSCACHE
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver
  2010-04-16 15:52 ` [PATCH 1/3] SQUASHME pnfs_submit: " andros
@ 2010-04-16 15:52   ` andros
  0 siblings, 0 replies; 7+ messages in thread
From: andros @ 2010-04-16 15:52 UTC (permalink / raw)
  To: pnfs-Xh+NVF5n0LLYtjvyW6yDsg; +Cc: linux-nfs, Andy Adamson

From: Andy Adamson <andros@netapp.com>

The same struct nfs_server can enter set_pnfs_layoutdriver for mounts that
share a super block.  Don't initialize a pnfs mountpoint more than once.

Don't set the pnfs_curr_ld until the pnfs mountpoint initialization succeeds

Signed-off-by: Andy Adamson <andros@netapp.com>
---
 fs/nfs/pnfs.c |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8492aef..1d903fd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -215,20 +215,25 @@ set_pnfs_layoutdriver(struct super_block *sb, struct nfs_fh *fh, u32 id)
 	struct pnfs_mount_type *mt;
 	struct nfs_server *server = NFS_SB(sb);
 
+	if (server->pnfs_curr_ld)
+		return;
+
 	if (id > 0 && find_pnfs(id, &mod)) {
-		dprintk("%s: Setting pNFS module\n", __func__);
-		server->pnfs_curr_ld = mod->pnfs_ld_type;
-		mt = server->pnfs_curr_ld->ld_io_ops->initialize_mountpoint(
+		mt = mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
 			sb, fh);
 		if (!mt) {
 			printk(KERN_ERR "%s: Error initializing mount point "
 			       "for layout driver %u. ", __func__, id);
 			goto out_err;
 		}
-		/* Layout driver succeeded in initializing mountpoint */
+		/*
+		 * Layout driver succeeded in initializing mountpoint
+		 * and has taken a reference on the nfs_client cl_devid_cache
+		 */
+		server->pnfs_curr_ld = mod->pnfs_ld_type;
 		server->pnfs_mountid = mt;
-		/* Set the rpc_ops */
 		server->nfs_client->rpc_ops = &pnfs_v4_clientops;
+		dprintk("%s: pNFS module for %u set\n", __func__, id);
 		return;
 	}
 
-- 
1.6.6


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-05-03 13:57 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-26 16:18 [PATCH 0/3] pNFS generic device ID cache version 3 andros
2010-04-26 16:18 ` [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache andros
2010-04-26 16:18   ` [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver andros
2010-04-26 16:18     ` [PATCH 3/3] SQUASHME pnfs-submit: file layout driver generic device ID cache andros
2010-05-03 11:48   ` [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: " Benny Halevy
2010-05-03 13:57     ` William A. (Andy) Adamson
  -- strict thread matches above, loose matches on Subject: below --
2010-04-16 15:52 [PATCH 0/3] pNFS " andros
2010-04-16 15:52 ` [PATCH 1/3] SQUASHME pnfs_submit: " andros
2010-04-16 15:52   ` [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver andros

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.