All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ceph: new mount option that control fscache data are indexed
@ 2017-06-27  4:23 Yan, Zheng
  2017-06-27 15:08 ` Jeff Layton
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Yan, Zheng @ 2017-06-27  4:23 UTC (permalink / raw)
  To: ceph-devel; +Cc: milosz, jlayton, anish_gupta, dhowells, Yan, Zheng

Current ceph uses FSID as primary index key of fscache data. This
allows ceph to retain cached data across remount. But this causes
problem (kernel opps, fscache does not support sharing data) when
a filesystem get mounted (with fscache enabled) several times.

The fix is adding a new mount option, which makes ceph use client
ID as primary index key. Client ID is unique for each mount. For
the old fscache mount option, only allow one fscache instance for
each filesystem.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 fs/ceph/super.c |  32 ++++++++++------
 fs/ceph/super.h |   5 ++-
 3 files changed, 131 insertions(+), 23 deletions(-)

diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 4e7421c..b4956b4 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
 	.version	= 0,
 };
 
-static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
-					     void *buffer, uint16_t maxbuf)
+static DEFINE_MUTEX(ceph_fscache_fsid_lock);
+static LIST_HEAD(ceph_fscache_fsid_list);
+
+struct ceph_fscache_fsid {
+	struct list_head list;
+	struct fscache_cookie *fscache;
+	struct ceph_fsid fsid;
+};
+
+static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
+					  void *buffer, uint16_t maxbuf)
 {
 	const struct ceph_fs_client* fsc = cookie_netfs_data;
 	uint16_t klen;
@@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
 static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
 	.name		= "CEPH.fsid",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= ceph_fscache_session_get_key,
+	.get_key	= ceph_fscache_fsid_get_key,
+};
+
+static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
+					    void *buffer, uint16_t maxbuf)
+{
+	const struct ceph_fs_client* fsc = cookie_netfs_data;
+	const struct ceph_fsid *fsid = &fsc->client->fsid;
+	u64 client_id = fsc->client->monc.auth->global_id;
+	uint16_t fsid_len, key_len;
+
+	fsid_len = sizeof(*fsid);
+	key_len = fsid_len + sizeof(client_id);
+	if (key_len > maxbuf)
+		return 0;
+
+	memcpy(buffer, fsid, fsid_len);
+	memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
+
+	return key_len;
+}
+
+static const struct fscache_cookie_def ceph_fscache_client_object_def = {
+	.name		= "CEPH.client",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= ceph_fscache_client_get_key,
 };
 
 int ceph_fscache_register(void)
@@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
 
 int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
 {
-	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
-					      &ceph_fscache_fsid_object_def,
-					      fsc, true);
-	if (!fsc->fscache)
-		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
+	const struct ceph_fsid *fsid = &fsc->client->fsid;
+	struct ceph_fscache_fsid *ent;
+	int err = 0;
+
+	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
+		fsc->fscache = fscache_acquire_cookie(
+						ceph_cache_netfs.primary_index,
+						&ceph_fscache_client_object_def,
+						fsc, true);
+		if (!fsc->fscache)
+			pr_err("Unable to register fsid: %p "
+			       "fscache cookie\n", fsc);
+	} else {
+		mutex_lock(&ceph_fscache_fsid_lock);
+		list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
+			if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
+				pr_err("fscache cookie already registered "
+				       "for fsid %pU\n", fsid);
+				pr_err("  use tmpfsc mount option instead\n");
+				err = -EBUSY;
+				goto out_unlock;
+			}
+		}
 
-	return 0;
+		ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		fsc->fscache = fscache_acquire_cookie(
+						ceph_cache_netfs.primary_index,
+						&ceph_fscache_fsid_object_def,
+						fsc, true);
+
+		if (fsc->fscache) {
+			memcpy(&ent->fsid, fsid, sizeof(*fsid));
+			ent->fscache = fsc->fscache;
+			list_add_tail(&ent->list, &ceph_fscache_fsid_list);
+		} else {
+			kfree(ent);
+			pr_err("Unable to register fsid: %p "
+			       "fscache cookie\n", fsc);
+		}
+out_unlock:
+		mutex_unlock(&ceph_fscache_fsid_lock);
+	}
+	return err;
 }
 
 static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
@@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
 
 void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
 {
-	fscache_relinquish_cookie(fsc->fscache, 0);
+	if (fscache_cookie_valid(fsc->fscache)) {
+		if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
+			const struct ceph_fsid *fsid = &fsc->client->fsid;
+			struct ceph_fscache_fsid *ent, *found = NULL;
+
+			mutex_lock(&ceph_fscache_fsid_lock);
+			list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
+				if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
+					found = ent;
+					break;
+				}
+			}
+			if (found) {
+				WARN_ON_ONCE(found->fscache != fsc->fscache);
+				list_del(&found->list);
+				kfree(found);
+			} else {
+				WARN_ON_ONCE(true);
+			}
+			mutex_unlock(&ceph_fscache_fsid_lock);
+		}
+		__fscache_relinquish_cookie(fsc->fscache, 0);
+	}
 	fsc->fscache = NULL;
 }
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 14e78dd..bb6dd7f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -134,6 +134,7 @@ enum {
 	Opt_ino32,
 	Opt_noino32,
 	Opt_fscache,
+	Opt_tmpfscache,
 	Opt_nofscache,
 	Opt_poolperm,
 	Opt_nopoolperm,
@@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
 	{Opt_ino32, "ino32"},
 	{Opt_noino32, "noino32"},
 	{Opt_fscache, "fsc"},
+	{Opt_tmpfscache, "tmpfsc"},
 	{Opt_nofscache, "nofsc"},
 	{Opt_poolperm, "poolperm"},
 	{Opt_nopoolperm, "nopoolperm"},
@@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
 	case Opt_fscache:
 		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
 		break;
+	case Opt_tmpfscache:
+		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
+				CEPH_MOUNT_OPT_TMPFSCACHE;
+		break;
 	case Opt_nofscache:
 		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
 		break;
@@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_puts(m, ",noasyncreaddir");
 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
 		seq_puts(m, ",nodcache");
-	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
-		seq_puts(m, ",fsc");
+	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
+		if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
+			seq_puts(m, ",tmpfsc");
+		else
+			seq_puts(m, ",fsc");
+	}
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
 		seq_puts(m, ",nopoolperm");
 
@@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc->wb_pagevec_pool)
 		goto fail_trunc_wq;
 
-	/* setup fscache */
-	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
-	    (ceph_fscache_register_fs(fsc) != 0))
-		goto fail_fscache;
-
 	/* caps */
 	fsc->min_caps = fsopt->max_readdir;
 
 	return fsc;
 
-fail_fscache:
-	ceph_fscache_unregister_fs(fsc);
 fail_trunc_wq:
 	destroy_workqueue(fsc->trunc_wq);
 fail_pg_inv_wq:
@@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
 	dout("destroy_fs_client %p\n", fsc);
 
-	ceph_fscache_unregister_fs(fsc);
-
 	destroy_workqueue(fsc->wb_wq);
 	destroy_workqueue(fsc->pg_inv_wq);
 	destroy_workqueue(fsc->trunc_wq);
@@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 		if (err < 0)
 			goto out;
 
+		/* setup fscache */
+		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
+			err = ceph_fscache_register_fs(fsc);
+			if (err < 0)
+				goto out;
+		}
+
 		if (!fsc->mount_options->server_path) {
 			path = "";
 			dout("mount opening path \\t\n");
@@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
 	fsc->client->extra_mon_dispatch = NULL;
 	ceph_fs_debugfs_cleanup(fsc);
 
+	ceph_fscache_unregister_fs(fsc);
+
 	ceph_mdsc_destroy(fsc);
 
 	destroy_fs_client(fsc);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f8a0aba..21e5562 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,8 +36,9 @@
 #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
-#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
-#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
+#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
+#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
+#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<13) /* mount waits if no mds is up */
 
 #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
-- 
2.9.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: new mount option that control fscache data are indexed
  2017-06-27  4:23 [PATCH] ceph: new mount option that control fscache data are indexed Yan, Zheng
@ 2017-06-27 15:08 ` Jeff Layton
  2017-06-28  3:01   ` Yan, Zheng
  2017-06-27 16:20 ` Luis Henriques
       [not found] ` <32744905.807639.1498691427197@mail.yahoo.com>
  2 siblings, 1 reply; 5+ messages in thread
From: Jeff Layton @ 2017-06-27 15:08 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel; +Cc: milosz, anish_gupta, dhowells

On Tue, 2017-06-27 at 12:23 +0800, Yan, Zheng wrote:
> Current ceph uses FSID as primary index key of fscache data. This
> allows ceph to retain cached data across remount. But this causes
> problem (kernel opps, fscache does not support sharing data) when
> a filesystem get mounted (with fscache enabled) several times.
> 
> The fix is adding a new mount option, which makes ceph use client
> ID as primary index key. Client ID is unique for each mount. For
> the old fscache mount option, only allow one fscache instance for
> each filesystem.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>

Yuck. I hope this will be well documented. An admin will have no idea
what this does otherwise.

FWIW, the kernel nfs client solves this by unifying the pagecache
between mounts. You have to explicitly disable cache sharing if you want
different cache objects ("nosharecache").

That could be done with ceph too, but it would take some restructuring.

> ---
>  fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  fs/ceph/super.c |  32 ++++++++++------
>  fs/ceph/super.h |   5 ++-
>  3 files changed, 131 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
> index 4e7421c..b4956b4 100644
> --- a/fs/ceph/cache.c
> +++ b/fs/ceph/cache.c
> @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
>  	.version	= 0,
>  };
>  
> -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
> -					     void *buffer, uint16_t maxbuf)
> +static DEFINE_MUTEX(ceph_fscache_fsid_lock);
> +static LIST_HEAD(ceph_fscache_fsid_list);
> +
> +struct ceph_fscache_fsid {
> +	struct list_head list;
> +	struct fscache_cookie *fscache;
> +	struct ceph_fsid fsid;
> +};
> +
> +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
> +					  void *buffer, uint16_t maxbuf)
>  {
>  	const struct ceph_fs_client* fsc = cookie_netfs_data;
>  	uint16_t klen;
> @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>  static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
>  	.name		= "CEPH.fsid",
>  	.type		= FSCACHE_COOKIE_TYPE_INDEX,
> -	.get_key	= ceph_fscache_session_get_key,
> +	.get_key	= ceph_fscache_fsid_get_key,
> +};
> +
> +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
> +					    void *buffer, uint16_t maxbuf)
> +{
> +	const struct ceph_fs_client* fsc = cookie_netfs_data;
> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
> +	u64 client_id = fsc->client->monc.auth->global_id;
> +	uint16_t fsid_len, key_len;
> +
> +	fsid_len = sizeof(*fsid);
> +	key_len = fsid_len + sizeof(client_id);
> +	if (key_len > maxbuf)
> +		return 0;
> +
> +	memcpy(buffer, fsid, fsid_len);
> +	memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
> +
> +	return key_len;
> +}
> +
> +static const struct fscache_cookie_def ceph_fscache_client_object_def = {
> +	.name		= "CEPH.client",
> +	.type		= FSCACHE_COOKIE_TYPE_INDEX,
> +	.get_key	= ceph_fscache_client_get_key,
>  };
>  
>  int ceph_fscache_register(void)
> @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
>  
>  int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
>  {
> -	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
> -					      &ceph_fscache_fsid_object_def,
> -					      fsc, true);
> -	if (!fsc->fscache)
> -		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
> +	struct ceph_fscache_fsid *ent;
> +	int err = 0;
> +
> +	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
> +		fsc->fscache = fscache_acquire_cookie(
> +						ceph_cache_netfs.primary_index,
> +						&ceph_fscache_client_object_def,
> +						fsc, true);
> +		if (!fsc->fscache)
> +			pr_err("Unable to register fsid: %p "
> +			       "fscache cookie\n", fsc);
> +	} else {
> +		mutex_lock(&ceph_fscache_fsid_lock);
> +		list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +			if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +				pr_err("fscache cookie already registered "
> +				       "for fsid %pU\n", fsid);
> +				pr_err("  use tmpfsc mount option instead\n");
> +				err = -EBUSY;
> +				goto out_unlock;
> +			}
> +		}
>  
> -	return 0;
> +		ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +		if (!ent) {
> +			err = -ENOMEM;
> +			goto out_unlock;
> +		}
> +
> +		fsc->fscache = fscache_acquire_cookie(
> +						ceph_cache_netfs.primary_index,
> +						&ceph_fscache_fsid_object_def,
> +						fsc, true);
> +
> +		if (fsc->fscache) {
> +			memcpy(&ent->fsid, fsid, sizeof(*fsid));
> +			ent->fscache = fsc->fscache;
> +			list_add_tail(&ent->list, &ceph_fscache_fsid_list);
> +		} else {
> +			kfree(ent);
> +			pr_err("Unable to register fsid: %p "
> +			       "fscache cookie\n", fsc);
> +		}
> +out_unlock:
> +		mutex_unlock(&ceph_fscache_fsid_lock);
> +	}
> +	return err;
>  }
>  
>  static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
> @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
>  
>  void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
>  {
> -	fscache_relinquish_cookie(fsc->fscache, 0);
> +	if (fscache_cookie_valid(fsc->fscache)) {
> +		if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
> +			const struct ceph_fsid *fsid = &fsc->client->fsid;
> +			struct ceph_fscache_fsid *ent, *found = NULL;
> +
> +			mutex_lock(&ceph_fscache_fsid_lock);
> +			list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +				if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +					found = ent;
> +					break;
> +				}
> +			}
> +			if (found) {
> +				WARN_ON_ONCE(found->fscache != fsc->fscache);
> +				list_del(&found->list);
> +				kfree(found);
> +			} else {
> +				WARN_ON_ONCE(true);
> +			}
> +			mutex_unlock(&ceph_fscache_fsid_lock);
> +		}
> +		__fscache_relinquish_cookie(fsc->fscache, 0);
> +	}
>  	fsc->fscache = NULL;
>  }
>  
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 14e78dd..bb6dd7f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -134,6 +134,7 @@ enum {
>  	Opt_ino32,
>  	Opt_noino32,
>  	Opt_fscache,
> +	Opt_tmpfscache,
>  	Opt_nofscache,
>  	Opt_poolperm,
>  	Opt_nopoolperm,
> @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
>  	{Opt_ino32, "ino32"},
>  	{Opt_noino32, "noino32"},
>  	{Opt_fscache, "fsc"},
> +	{Opt_tmpfscache, "tmpfsc"},

Maybe allowing the fsc option to take an optional argument would be
cleaner?

    fsc=tmp

That would also leave open the option to allow other flavors in the
future.


>  	{Opt_nofscache, "nofsc"},
>  	{Opt_poolperm, "poolperm"},
>  	{Opt_nopoolperm, "nopoolperm"},
> @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
>  	case Opt_fscache:
>  		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
>  		break;
> +	case Opt_tmpfscache:
> +		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
> +				CEPH_MOUNT_OPT_TMPFSCACHE;
> +		break;
>  	case Opt_nofscache:
>  		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
>  		break;
> @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>  		seq_puts(m, ",noasyncreaddir");
>  	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
>  		seq_puts(m, ",nodcache");
> -	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
> -		seq_puts(m, ",fsc");
> +	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +		if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
> +			seq_puts(m, ",tmpfsc");
> +		else
> +			seq_puts(m, ",fsc");
> +	}
>  	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
>  		seq_puts(m, ",nopoolperm");
>  
> @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>  	if (!fsc->wb_pagevec_pool)
>  		goto fail_trunc_wq;
>  
> -	/* setup fscache */
> -	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
> -	    (ceph_fscache_register_fs(fsc) != 0))
> -		goto fail_fscache;
> -
>  	/* caps */
>  	fsc->min_caps = fsopt->max_readdir;
>  
>  	return fsc;
>  
> -fail_fscache:
> -	ceph_fscache_unregister_fs(fsc);
>  fail_trunc_wq:
>  	destroy_workqueue(fsc->trunc_wq);
>  fail_pg_inv_wq:
> @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>  {
>  	dout("destroy_fs_client %p\n", fsc);
>  
> -	ceph_fscache_unregister_fs(fsc);
> -
>  	destroy_workqueue(fsc->wb_wq);
>  	destroy_workqueue(fsc->pg_inv_wq);
>  	destroy_workqueue(fsc->trunc_wq);
> @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
>  		if (err < 0)
>  			goto out;
>  
> +		/* setup fscache */
> +		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +			err = ceph_fscache_register_fs(fsc);
> +			if (err < 0)
> +				goto out;
> +		}
> +
>  		if (!fsc->mount_options->server_path) {
>  			path = "";
>  			dout("mount opening path \\t\n");
> @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
>  	fsc->client->extra_mon_dispatch = NULL;
>  	ceph_fs_debugfs_cleanup(fsc);
>  
> +	ceph_fscache_unregister_fs(fsc);
> +
>  	ceph_mdsc_destroy(fsc);
>  
>  	destroy_fs_client(fsc);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index f8a0aba..21e5562 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -36,8 +36,9 @@
>  #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
>  #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
>  #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
> -#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
> -#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
> +#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
> +#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
> +#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<13) /* mount waits if no mds is up */
>  
>  #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
>  

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: new mount option that control fscache data are indexed
  2017-06-27  4:23 [PATCH] ceph: new mount option that control fscache data are indexed Yan, Zheng
  2017-06-27 15:08 ` Jeff Layton
@ 2017-06-27 16:20 ` Luis Henriques
       [not found] ` <32744905.807639.1498691427197@mail.yahoo.com>
  2 siblings, 0 replies; 5+ messages in thread
From: Luis Henriques @ 2017-06-27 16:20 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: ceph-devel, milosz, jlayton, anish_gupta, dhowells

"Yan, Zheng" <zyan@redhat.com> writes:

> Current ceph uses FSID as primary index key of fscache data. This
> allows ceph to retain cached data across remount. But this causes
> problem (kernel opps, fscache does not support sharing data) when
> a filesystem get mounted (with fscache enabled) several times.
>
> The fix is adding a new mount option, which makes ceph use client
> ID as primary index key. Client ID is unique for each mount. For
> the old fscache mount option, only allow one fscache instance for
> each filesystem.
>
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  fs/ceph/super.c |  32 ++++++++++------
>  fs/ceph/super.h |   5 ++-
>  3 files changed, 131 insertions(+), 23 deletions(-)
>
> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
> index 4e7421c..b4956b4 100644
> --- a/fs/ceph/cache.c
> +++ b/fs/ceph/cache.c
> @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
>  	.version	= 0,
>  };
>  
> -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
> -					     void *buffer, uint16_t maxbuf)
> +static DEFINE_MUTEX(ceph_fscache_fsid_lock);
> +static LIST_HEAD(ceph_fscache_fsid_list);
> +
> +struct ceph_fscache_fsid {
> +	struct list_head list;
> +	struct fscache_cookie *fscache;
> +	struct ceph_fsid fsid;
> +};
> +
> +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
> +					  void *buffer, uint16_t maxbuf)
>  {
>  	const struct ceph_fs_client* fsc = cookie_netfs_data;
>  	uint16_t klen;
> @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>  static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
>  	.name		= "CEPH.fsid",
>  	.type		= FSCACHE_COOKIE_TYPE_INDEX,
> -	.get_key	= ceph_fscache_session_get_key,
> +	.get_key	= ceph_fscache_fsid_get_key,
> +};
> +
> +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
> +					    void *buffer, uint16_t maxbuf)
> +{
> +	const struct ceph_fs_client* fsc = cookie_netfs_data;
> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
> +	u64 client_id = fsc->client->monc.auth->global_id;
> +	uint16_t fsid_len, key_len;
> +
> +	fsid_len = sizeof(*fsid);
> +	key_len = fsid_len + sizeof(client_id);
> +	if (key_len > maxbuf)
> +		return 0;
> +
> +	memcpy(buffer, fsid, fsid_len);
> +	memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
> +
> +	return key_len;
> +}
> +
> +static const struct fscache_cookie_def ceph_fscache_client_object_def = {
> +	.name		= "CEPH.client",
> +	.type		= FSCACHE_COOKIE_TYPE_INDEX,
> +	.get_key	= ceph_fscache_client_get_key,
>  };
>  
>  int ceph_fscache_register(void)
> @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
>  
>  int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
>  {
> -	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
> -					      &ceph_fscache_fsid_object_def,
> -					      fsc, true);
> -	if (!fsc->fscache)
> -		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
> +	struct ceph_fscache_fsid *ent;
> +	int err = 0;
> +
> +	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
> +		fsc->fscache = fscache_acquire_cookie(
> +						ceph_cache_netfs.primary_index,
> +						&ceph_fscache_client_object_def,
> +						fsc, true);
> +		if (!fsc->fscache)
> +			pr_err("Unable to register fsid: %p "
> +			       "fscache cookie\n", fsc);

err needs to be set here (-ENOMEM?) and also below, after the
kfree(ent).

Cheers,
-- 
Luís

> +	} else {
> +		mutex_lock(&ceph_fscache_fsid_lock);
> +		list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +			if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +				pr_err("fscache cookie already registered "
> +				       "for fsid %pU\n", fsid);
> +				pr_err("  use tmpfsc mount option instead\n");
> +				err = -EBUSY;
> +				goto out_unlock;
> +			}
> +		}
>  
> -	return 0;
> +		ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +		if (!ent) {
> +			err = -ENOMEM;
> +			goto out_unlock;
> +		}
> +
> +		fsc->fscache = fscache_acquire_cookie(
> +						ceph_cache_netfs.primary_index,
> +						&ceph_fscache_fsid_object_def,
> +						fsc, true);
> +
> +		if (fsc->fscache) {
> +			memcpy(&ent->fsid, fsid, sizeof(*fsid));
> +			ent->fscache = fsc->fscache;
> +			list_add_tail(&ent->list, &ceph_fscache_fsid_list);
> +		} else {
> +			kfree(ent);
> +			pr_err("Unable to register fsid: %p "
> +			       "fscache cookie\n", fsc);
> +		}
> +out_unlock:
> +		mutex_unlock(&ceph_fscache_fsid_lock);
> +	}
> +	return err;
>  }
>  
>  static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
> @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
>  
>  void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
>  {
> -	fscache_relinquish_cookie(fsc->fscache, 0);
> +	if (fscache_cookie_valid(fsc->fscache)) {
> +		if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
> +			const struct ceph_fsid *fsid = &fsc->client->fsid;
> +			struct ceph_fscache_fsid *ent, *found = NULL;
> +
> +			mutex_lock(&ceph_fscache_fsid_lock);
> +			list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +				if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +					found = ent;
> +					break;
> +				}
> +			}
> +			if (found) {
> +				WARN_ON_ONCE(found->fscache != fsc->fscache);
> +				list_del(&found->list);
> +				kfree(found);
> +			} else {
> +				WARN_ON_ONCE(true);
> +			}
> +			mutex_unlock(&ceph_fscache_fsid_lock);
> +		}
> +		__fscache_relinquish_cookie(fsc->fscache, 0);
> +	}
>  	fsc->fscache = NULL;
>  }
>  
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 14e78dd..bb6dd7f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -134,6 +134,7 @@ enum {
>  	Opt_ino32,
>  	Opt_noino32,
>  	Opt_fscache,
> +	Opt_tmpfscache,
>  	Opt_nofscache,
>  	Opt_poolperm,
>  	Opt_nopoolperm,
> @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
>  	{Opt_ino32, "ino32"},
>  	{Opt_noino32, "noino32"},
>  	{Opt_fscache, "fsc"},
> +	{Opt_tmpfscache, "tmpfsc"},
>  	{Opt_nofscache, "nofsc"},
>  	{Opt_poolperm, "poolperm"},
>  	{Opt_nopoolperm, "nopoolperm"},
> @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
>  	case Opt_fscache:
>  		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
>  		break;
> +	case Opt_tmpfscache:
> +		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
> +				CEPH_MOUNT_OPT_TMPFSCACHE;
> +		break;
>  	case Opt_nofscache:
>  		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
>  		break;
> @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>  		seq_puts(m, ",noasyncreaddir");
>  	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
>  		seq_puts(m, ",nodcache");
> -	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
> -		seq_puts(m, ",fsc");
> +	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +		if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
> +			seq_puts(m, ",tmpfsc");
> +		else
> +			seq_puts(m, ",fsc");
> +	}
>  	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
>  		seq_puts(m, ",nopoolperm");
>  
> @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>  	if (!fsc->wb_pagevec_pool)
>  		goto fail_trunc_wq;
>  
> -	/* setup fscache */
> -	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
> -	    (ceph_fscache_register_fs(fsc) != 0))
> -		goto fail_fscache;
> -
>  	/* caps */
>  	fsc->min_caps = fsopt->max_readdir;
>  
>  	return fsc;
>  
> -fail_fscache:
> -	ceph_fscache_unregister_fs(fsc);
>  fail_trunc_wq:
>  	destroy_workqueue(fsc->trunc_wq);
>  fail_pg_inv_wq:
> @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>  {
>  	dout("destroy_fs_client %p\n", fsc);
>  
> -	ceph_fscache_unregister_fs(fsc);
> -
>  	destroy_workqueue(fsc->wb_wq);
>  	destroy_workqueue(fsc->pg_inv_wq);
>  	destroy_workqueue(fsc->trunc_wq);
> @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
>  		if (err < 0)
>  			goto out;
>  
> +		/* setup fscache */
> +		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +			err = ceph_fscache_register_fs(fsc);
> +			if (err < 0)
> +				goto out;
> +		}
> +
>  		if (!fsc->mount_options->server_path) {
>  			path = "";
>  			dout("mount opening path \\t\n");
> @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
>  	fsc->client->extra_mon_dispatch = NULL;
>  	ceph_fs_debugfs_cleanup(fsc);
>  
> +	ceph_fscache_unregister_fs(fsc);
> +
>  	ceph_mdsc_destroy(fsc);
>  
>  	destroy_fs_client(fsc);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index f8a0aba..21e5562 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -36,8 +36,9 @@
>  #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
>  #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
>  #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
> -#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
> -#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
> +#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
> +#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
> +#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<13) /* mount waits if no mds is up */
>  
>  #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: new mount option that control fscache data are indexed
  2017-06-27 15:08 ` Jeff Layton
@ 2017-06-28  3:01   ` Yan, Zheng
  0 siblings, 0 replies; 5+ messages in thread
From: Yan, Zheng @ 2017-06-28  3:01 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Ceph Development, Milosz Tanski, Anish Gupta, David Howells


> On 27 Jun 2017, at 23:08, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Tue, 2017-06-27 at 12:23 +0800, Yan, Zheng wrote:
>> Current ceph uses FSID as primary index key of fscache data. This
>> allows ceph to retain cached data across remount. But this causes
>> problem (kernel opps, fscache does not support sharing data) when
>> a filesystem get mounted (with fscache enabled) several times.
>> 
>> The fix is adding a new mount option, which makes ceph use client
>> ID as primary index key. Client ID is unique for each mount. For
>> the old fscache mount option, only allow one fscache instance for
>> each filesystem.
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> 
> Yuck. I hope this will be well documented. An admin will have no idea
> what this does otherwise.
> 
> FWIW, the kernel nfs client solves this by unifying the pagecache
> between mounts. You have to explicitly disable cache sharing if you want
> different cache objects ("nosharecache”).

ceph tries unifying the pagecache between mounts too. It creates separate mds_client
when mount option does not match the exiting one or “onshare” mount option is provided.
I checked nfs code, It seems nfs does similar things.

Regards
Yan, Zheng

> 
> That could be done with ceph too, but it would take some restructuring.
> 
>> ---
>> fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
>> fs/ceph/super.c |  32 ++++++++++------
>> fs/ceph/super.h |   5 ++-
>> 3 files changed, 131 insertions(+), 23 deletions(-)
>> 
>> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
>> index 4e7421c..b4956b4 100644
>> --- a/fs/ceph/cache.c
>> +++ b/fs/ceph/cache.c
>> @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
>> 	.version	= 0,
>> };
>> 
>> -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>> -					     void *buffer, uint16_t maxbuf)
>> +static DEFINE_MUTEX(ceph_fscache_fsid_lock);
>> +static LIST_HEAD(ceph_fscache_fsid_list);
>> +
>> +struct ceph_fscache_fsid {
>> +	struct list_head list;
>> +	struct fscache_cookie *fscache;
>> +	struct ceph_fsid fsid;
>> +};
>> +
>> +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
>> +					  void *buffer, uint16_t maxbuf)
>> {
>> 	const struct ceph_fs_client* fsc = cookie_netfs_data;
>> 	uint16_t klen;
>> @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>> static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
>> 	.name		= "CEPH.fsid",
>> 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
>> -	.get_key	= ceph_fscache_session_get_key,
>> +	.get_key	= ceph_fscache_fsid_get_key,
>> +};
>> +
>> +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
>> +					    void *buffer, uint16_t maxbuf)
>> +{
>> +	const struct ceph_fs_client* fsc = cookie_netfs_data;
>> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
>> +	u64 client_id = fsc->client->monc.auth->global_id;
>> +	uint16_t fsid_len, key_len;
>> +
>> +	fsid_len = sizeof(*fsid);
>> +	key_len = fsid_len + sizeof(client_id);
>> +	if (key_len > maxbuf)
>> +		return 0;
>> +
>> +	memcpy(buffer, fsid, fsid_len);
>> +	memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
>> +
>> +	return key_len;
>> +}
>> +
>> +static const struct fscache_cookie_def ceph_fscache_client_object_def = {
>> +	.name		= "CEPH.client",
>> +	.type		= FSCACHE_COOKIE_TYPE_INDEX,
>> +	.get_key	= ceph_fscache_client_get_key,
>> };
>> 
>> int ceph_fscache_register(void)
>> @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
>> 
>> int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
>> {
>> -	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
>> -					      &ceph_fscache_fsid_object_def,
>> -					      fsc, true);
>> -	if (!fsc->fscache)
>> -		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
>> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
>> +	struct ceph_fscache_fsid *ent;
>> +	int err = 0;
>> +
>> +	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
>> +		fsc->fscache = fscache_acquire_cookie(
>> +						ceph_cache_netfs.primary_index,
>> +						&ceph_fscache_client_object_def,
>> +						fsc, true);
>> +		if (!fsc->fscache)
>> +			pr_err("Unable to register fsid: %p "
>> +			       "fscache cookie\n", fsc);
>> +	} else {
>> +		mutex_lock(&ceph_fscache_fsid_lock);
>> +		list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
>> +			if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
>> +				pr_err("fscache cookie already registered "
>> +				       "for fsid %pU\n", fsid);
>> +				pr_err("  use tmpfsc mount option instead\n");
>> +				err = -EBUSY;
>> +				goto out_unlock;
>> +			}
>> +		}
>> 
>> -	return 0;
>> +		ent = kzalloc(sizeof(*ent), GFP_KERNEL);
>> +		if (!ent) {
>> +			err = -ENOMEM;
>> +			goto out_unlock;
>> +		}
>> +
>> +		fsc->fscache = fscache_acquire_cookie(
>> +						ceph_cache_netfs.primary_index,
>> +						&ceph_fscache_fsid_object_def,
>> +						fsc, true);
>> +
>> +		if (fsc->fscache) {
>> +			memcpy(&ent->fsid, fsid, sizeof(*fsid));
>> +			ent->fscache = fsc->fscache;
>> +			list_add_tail(&ent->list, &ceph_fscache_fsid_list);
>> +		} else {
>> +			kfree(ent);
>> +			pr_err("Unable to register fsid: %p "
>> +			       "fscache cookie\n", fsc);
>> +		}
>> +out_unlock:
>> +		mutex_unlock(&ceph_fscache_fsid_lock);
>> +	}
>> +	return err;
>> }
>> 
>> static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
>> @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
>> 
>> void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
>> {
>> -	fscache_relinquish_cookie(fsc->fscache, 0);
>> +	if (fscache_cookie_valid(fsc->fscache)) {
>> +		if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
>> +			const struct ceph_fsid *fsid = &fsc->client->fsid;
>> +			struct ceph_fscache_fsid *ent, *found = NULL;
>> +
>> +			mutex_lock(&ceph_fscache_fsid_lock);
>> +			list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
>> +				if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
>> +					found = ent;
>> +					break;
>> +				}
>> +			}
>> +			if (found) {
>> +				WARN_ON_ONCE(found->fscache != fsc->fscache);
>> +				list_del(&found->list);
>> +				kfree(found);
>> +			} else {
>> +				WARN_ON_ONCE(true);
>> +			}
>> +			mutex_unlock(&ceph_fscache_fsid_lock);
>> +		}
>> +		__fscache_relinquish_cookie(fsc->fscache, 0);
>> +	}
>> 	fsc->fscache = NULL;
>> }
>> 
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 14e78dd..bb6dd7f 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -134,6 +134,7 @@ enum {
>> 	Opt_ino32,
>> 	Opt_noino32,
>> 	Opt_fscache,
>> +	Opt_tmpfscache,
>> 	Opt_nofscache,
>> 	Opt_poolperm,
>> 	Opt_nopoolperm,
>> @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
>> 	{Opt_ino32, "ino32"},
>> 	{Opt_noino32, "noino32"},
>> 	{Opt_fscache, "fsc"},
>> +	{Opt_tmpfscache, "tmpfsc"},
> 
> Maybe allowing the fsc option to take an optional argument would be
> cleaner?
> 
>    fsc=tmp
> 
> That would also leave open the option to allow other flavors in the
> future.
> 
> 
>> 	{Opt_nofscache, "nofsc"},
>> 	{Opt_poolperm, "poolperm"},
>> 	{Opt_nopoolperm, "nopoolperm"},
>> @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
>> 	case Opt_fscache:
>> 		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
>> 		break;
>> +	case Opt_tmpfscache:
>> +		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
>> +				CEPH_MOUNT_OPT_TMPFSCACHE;
>> +		break;
>> 	case Opt_nofscache:
>> 		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
>> 		break;
>> @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>> 		seq_puts(m, ",noasyncreaddir");
>> 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
>> 		seq_puts(m, ",nodcache");
>> -	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
>> -		seq_puts(m, ",fsc");
>> +	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
>> +		if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
>> +			seq_puts(m, ",tmpfsc");
>> +		else
>> +			seq_puts(m, ",fsc");
>> +	}
>> 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
>> 		seq_puts(m, ",nopoolperm");
>> 
>> @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>> 	if (!fsc->wb_pagevec_pool)
>> 		goto fail_trunc_wq;
>> 
>> -	/* setup fscache */
>> -	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
>> -	    (ceph_fscache_register_fs(fsc) != 0))
>> -		goto fail_fscache;
>> -
>> 	/* caps */
>> 	fsc->min_caps = fsopt->max_readdir;
>> 
>> 	return fsc;
>> 
>> -fail_fscache:
>> -	ceph_fscache_unregister_fs(fsc);
>> fail_trunc_wq:
>> 	destroy_workqueue(fsc->trunc_wq);
>> fail_pg_inv_wq:
>> @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>> {
>> 	dout("destroy_fs_client %p\n", fsc);
>> 
>> -	ceph_fscache_unregister_fs(fsc);
>> -
>> 	destroy_workqueue(fsc->wb_wq);
>> 	destroy_workqueue(fsc->pg_inv_wq);
>> 	destroy_workqueue(fsc->trunc_wq);
>> @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
>> 		if (err < 0)
>> 			goto out;
>> 
>> +		/* setup fscache */
>> +		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
>> +			err = ceph_fscache_register_fs(fsc);
>> +			if (err < 0)
>> +				goto out;
>> +		}
>> +
>> 		if (!fsc->mount_options->server_path) {
>> 			path = "";
>> 			dout("mount opening path \\t\n");
>> @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
>> 	fsc->client->extra_mon_dispatch = NULL;
>> 	ceph_fs_debugfs_cleanup(fsc);
>> 
>> +	ceph_fscache_unregister_fs(fsc);
>> +
>> 	ceph_mdsc_destroy(fsc);
>> 
>> 	destroy_fs_client(fsc);
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index f8a0aba..21e5562 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -36,8 +36,9 @@
>> #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
>> #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
>> #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
>> -#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
>> -#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
>> +#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
>> +#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
>> +#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<13) /* mount waits if no mds is up */
>> 
>> #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
>> 
> 
> -- 
> Jeff Layton <jlayton@redhat.com>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ceph: new mount option that control fscache data are indexed
       [not found] ` <32744905.807639.1498691427197@mail.yahoo.com>
@ 2017-06-29 13:39   ` Yan, Zheng
  0 siblings, 0 replies; 5+ messages in thread
From: Yan, Zheng @ 2017-06-29 13:39 UTC (permalink / raw)
  To: Anish Gupta; +Cc: Ceph Development, Milosz Tanski, Jeff Layton, David Howells


> On 29 Jun 2017, at 07:10, Anish Gupta <anish_gupta@yahoo.com> wrote:
> 
> Zheng,
> 
> Few comments on the patch:
> 
> 1) the option  "tempfsc" sounds yucky. Can it be renamed to "client_fsc" or something more meaningful and changed everywhere.
> 2) few inline comments with [AG]
> 

I wrote a new patch "ceph: new mount option that specifies fscache uniquifier”, which let user to specify a uniquifier 

Regards
Yan, Zheng

On Monday, June 26, 2017, 9:23:15 PM PDT, Yan, Zheng <zyan@redhat.com> wrote:
> 
> 
> Current ceph uses FSID as primary index key of fscache data. This
> allows ceph to retain cached data across remount. But this causes
> problem (kernel opps, fscache does not support sharing data) when
> a filesystem get mounted (with fscache enabled) several times.
> 
> The fix is adding a new mount option, which makes ceph use client
> ID as primary index key. Client ID is unique for each mount. For
> the old fscache mount option, only allow one fscache instance for
> each filesystem.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
> fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
> fs/ceph/super.c |  32 ++++++++++------
> fs/ceph/super.h |  5 ++-
> 3 files changed, 131 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
> index 4e7421c..b4956b4 100644
> --- a/fs/ceph/cache.c
> +++ b/fs/ceph/cache.c
> @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
>     .version    = 0,
> };
> 
> -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
> -                        void *buffer, uint16_t maxbuf)
> +static DEFINE_MUTEX(ceph_fscache_fsid_lock);
> +static LIST_HEAD(ceph_fscache_fsid_list);
> +
> +struct ceph_fscache_fsid {
> +    struct list_head list;
> +    struct fscache_cookie *fscache;
> +    struct ceph_fsid fsid;
> +};
> +
> +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
> +                      void *buffer, uint16_t maxbuf)
> {
>     const struct ceph_fs_client* fsc = cookie_netfs_data;
>     uint16_t klen;
> @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
> static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
>     .name        = "CEPH.fsid",
>     .type        = FSCACHE_COOKIE_TYPE_INDEX,
> -    .get_key    = ceph_fscache_session_get_key,
> +    .get_key    = ceph_fscache_fsid_get_key,
> +};
> +
> +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
> +                        void *buffer, uint16_t maxbuf)
> +{
> +    const struct ceph_fs_client* fsc = cookie_netfs_data;
> +    const struct ceph_fsid *fsid = &fsc->client->fsid;
> +    u64 client_id = fsc->client->monc.auth->global_id;
> +    uint16_t fsid_len, key_len;
> +
> +    fsid_len = sizeof(*fsid);
> +    key_len = fsid_len + sizeof(client_id);
> +    if (key_len > maxbuf)
> +        return 0;
> +
> +    memcpy(buffer, fsid, fsid_len);
> +    memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
> +
> +    return key_len;
> +}
> +
> +static const struct fscache_cookie_def ceph_fscache_client_object_def = {
> +    .name        = "CEPH.client",
> +    .type        = FSCACHE_COOKIE_TYPE_INDEX,
> +    .get_key    = ceph_fscache_client_get_key,
> };
> 
> int ceph_fscache_register(void)
> @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
> 
> int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
> {
> -    fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
> -                          &ceph_fscache_fsid_object_def,
> -                          fsc, true);
> -    if (!fsc->fscache)
> -        pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
> +    const struct ceph_fsid *fsid = &fsc->client->fsid;
> +    struct ceph_fscache_fsid *ent;
> +    int err = 0;
> +
> +    if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
> +        fsc->fscache = fscache_acquire_cookie(
> +                        ceph_cache_netfs.primary_index,
> +                        &ceph_fscache_client_object_def,
> +                        fsc, true);
> +        if (!fsc->fscache)
> +            pr_err("Unable to register fsid: %p "
> +                  "fscache cookie\n", fsc);
> 
> [AG] return an error here?
> 
> +    } else {
> +        mutex_lock(&ceph_fscache_fsid_lock);
> 
> 
> [AG] For client entries (ie "CEPH.client"),  where are you adding them to ceph_fscache_fsid_list?
> Code much below is only doing that for "CEPH.fsid" entries. 
> If there is no list for "CEPH.client" entries, why walk the list here?
> IMHO, we should have a list of fscached CLIENT entries too.
> 
> +        list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +            if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +                pr_err("fscache cookie already registered "
> +                      "for fsid %pU\n", fsid);
> +                pr_err("  use tmpfsc mount option instead\n");
> +                err = -EBUSY;
> +                goto out_unlock;
> +            }
> +        }
> 
> [AG] Missing a mutex_unlock() before the return 
> 
> -    return 0;
> +        ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +        if (!ent) {
> +            err = -ENOMEM;
> +            goto out_unlock;
> +        }
> +
> +        fsc->fscache = fscache_acquire_cookie(
> +                        ceph_cache_netfs.primary_index,
> +                        &ceph_fscache_fsid_object_def,
> +                        fsc, true);
> +
> +        if (fsc->fscache) {
> +            memcpy(&ent->fsid, fsid, sizeof(*fsid));
> +            ent->fscache = fsc->fscache;
> +            list_add_tail(&ent->list, &ceph_fscache_fsid_list);
> +        } else {
> +            kfree(ent);
> +            pr_err("Unable to register fsid: %p "
> +                  "fscache cookie\n", fsc);
> +        }
> +out_unlock:
> +        mutex_unlock(&ceph_fscache_fsid_lock);
> +    }
> +    return err;
> }
> 
> static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
> @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
> 
> void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
> {
> -    fscache_relinquish_cookie(fsc->fscache, 0);
> +    if (fscache_cookie_valid(fsc->fscache)) {
> +        if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
> 
> [AG] How/Where do you handle ceph_fscace_client_object_def entries?
> 
> +            const struct ceph_fsid *fsid = &fsc->client->fsid;
> +            struct ceph_fscache_fsid *ent, *found = NULL;
> +
> +            mutex_lock(&ceph_fscache_fsid_lock);
> +            list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +                if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +                    found = ent;
> +                    break;
> +                }
> +            }
> +            if (found) {
> +                WARN_ON_ONCE(found->fscache != fsc->fscache);
> +                list_del(&found->list);
> +                kfree(found);
> 
> [AG] return from here? 
> If there are additional entries in the list, aren't you setting fsc->fscache = NULL?
> 
> 
> 
> +            } else {
> +                WARN_ON_ONCE(true);
> +            }
> +            mutex_unlock(&ceph_fscache_fsid_lock);
> +        }
> +        __fscache_relinquish_cookie(fsc->fscache, 0);
> +    }
>     fsc->fscache = NULL;
> }
> 
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 14e78dd..bb6dd7f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -134,6 +134,7 @@ enum {
>     Opt_ino32,
>     Opt_noino32,
>     Opt_fscache,
> +    Opt_tmpfscache,
>     Opt_nofscache,
>     Opt_poolperm,
>     Opt_nopoolperm,
> @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
>     {Opt_ino32, "ino32"},
>     {Opt_noino32, "noino32"},
>     {Opt_fscache, "fsc"},
> +    {Opt_tmpfscache, "tmpfsc"},
> 
> [AG] Rename "tmpfsc" please.
> 
> 
>     {Opt_nofscache, "nofsc"},
> 
> [AG] Does "nofsc" work for both the "fsc"  options?
> 
>     {Opt_poolperm, "poolperm"},
>     {Opt_nopoolperm, "nopoolperm"},
> @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
>     case Opt_fscache:
>         fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
>         break;
> +    case Opt_tmpfscache:
> +        fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
> +                CEPH_MOUNT_OPT_TMPFSCACHE;
> +        break;
>     case Opt_nofscache:
>         fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
>         break;
> @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>         seq_puts(m, ",noasyncreaddir");
>     if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
>         seq_puts(m, ",nodcache");
> -    if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
> -        seq_puts(m, ",fsc");
> +    if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +        if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
> +            seq_puts(m, ",tmpfsc");
> +        else
> +            seq_puts(m, ",fsc");
> +    }
>     if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
>         seq_puts(m, ",nopoolperm");
> 
> @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>     if (!fsc->wb_pagevec_pool)
>         goto fail_trunc_wq;
> 
> -    /* setup fscache */
> -    if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
> -        (ceph_fscache_register_fs(fsc) != 0))
> -        goto fail_fscache;
> -
>     /* caps */
>     fsc->min_caps = fsopt->max_readdir;
> 
>     return fsc;
> 
> -fail_fscache:
> -    ceph_fscache_unregister_fs(fsc);
> fail_trunc_wq:
>     destroy_workqueue(fsc->trunc_wq);
> fail_pg_inv_wq:
> @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
> {
>     dout("destroy_fs_client %p\n", fsc);
> 
> -    ceph_fscache_unregister_fs(fsc);
> -
>     destroy_workqueue(fsc->wb_wq);
>     destroy_workqueue(fsc->pg_inv_wq);
>     destroy_workqueue(fsc->trunc_wq);
> @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
>         if (err < 0)
>             goto out;
> 
> +        /* setup fscache */
> +        if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +            err = ceph_fscache_register_fs(fsc);
> +            if (err < 0)
> +                goto out;
> +        }
> 
> 
> [AG] How does one unregister "tmpfsc" entries?
> 
> +
>         if (!fsc->mount_options->server_path) {
>             path = "";
>             dout("mount opening path \\t\n");
> @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
>     fsc->client->extra_mon_dispatch = NULL;
>     ceph_fs_debugfs_cleanup(fsc);
> 
> +    ceph_fscache_unregister_fs(fsc);
> +
>     ceph_mdsc_destroy(fsc);
> 
>     destroy_fs_client(fsc);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index f8a0aba..21e5562 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -36,8 +36,9 @@
> #define CEPH_MOUNT_OPT_INO32          (1<<8) /* 32 bit inos */
> #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
> #define CEPH_MOUNT_OPT_FSCACHE        (1<<10) /* use fscache */
> -#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
> -#define CEPH_MOUNT_OPT_MOUNTWAIT      (1<<12) /* mount waits if no mds is up */
> +#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
> 
> [AG] Rename  "tmpfsc" please.
> 
> thank you,
> Anish
> 
> 
> +#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
> +#define CEPH_MOUNT_OPT_MOUNTWAIT      (1<<13) /* mount waits if no mds is up */
> 
> #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
> 
> -- 
> 2.9.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-06-29 13:39 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-27  4:23 [PATCH] ceph: new mount option that control fscache data are indexed Yan, Zheng
2017-06-27 15:08 ` Jeff Layton
2017-06-28  3:01   ` Yan, Zheng
2017-06-27 16:20 ` Luis Henriques
     [not found] ` <32744905.807639.1498691427197@mail.yahoo.com>
2017-06-29 13:39   ` Yan, Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.