[PATCH v2] ceph: fs add reconfiguring superblock parameters support

From: xiubli@redhat.com
To: jlayton@kernel.org, idryomov@gmail.com
Cc: sage@redhat.com, zyan@redhat.com, pdonnell@redhat.com,
	ceph-devel@vger.kernel.org, Xiubo Li <xiubli@redhat.com>
Subject: [PATCH v2] ceph: fs add reconfiguring superblock parameters support
Date: Thu, 13 Feb 2020 21:18:25 -0500	[thread overview]
Message-ID: <20200214021825.1799-1-xiubli@redhat.com> (raw)

From: Xiubo Li <xiubli@redhat.com>

This will enable the remount and reconfiguring superblock params
for the fs. Currently some mount options are not allowed to be
reconfigured.

It will working like:
$ mount.ceph :/ /mnt/cephfs -o remount,mount_timeout=100

URL:https://tracker.ceph.com/issues/44071
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

Changed in V2:
- remove low level options from reconfiguration.
- switch to seqlock

 fs/ceph/addr.c       |  10 +++-
 fs/ceph/caps.c       |  14 +++--
 fs/ceph/mds_client.c |   5 +-
 fs/ceph/super.c      | 137 ++++++++++++++++++++++++++++++++++++-------
 fs/ceph/super.h      |   2 +
 5 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index d14392b58f16..5c0e6c2d3fde 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -882,6 +882,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	struct ceph_writeback_ctl ceph_wbc;
 	bool should_loop, range_whole = false;
 	bool done = false;
+	unsigned int seq;
 
 	dout("writepages_start %p (mode=%s)\n", inode,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
@@ -896,8 +897,13 @@ static int ceph_writepages_start(struct address_space *mapping,
 		mapping_set_error(mapping, -EIO);
 		return -EIO; /* we're in a forced umount, don't write! */
 	}
-	if (fsc->mount_options->wsize < wsize)
-		wsize = fsc->mount_options->wsize;
+
+	do {
+		seq = read_seqbegin(&fsc->mount_options->opt_seqlock);
+
+		if (fsc->mount_options->wsize < wsize)
+			wsize = fsc->mount_options->wsize;
+	} while (read_seqretry(&fsc->mount_options->opt_seqlock, seq));
 
 	pagevec_init(&pvec);
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b4f122eb74bb..daa7eef13e3a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -490,11 +490,17 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
 	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&opt->opt_seqlock);
+
+		ci->i_hold_caps_min = round_jiffies(jiffies +
+					opt->caps_wanted_delay_min * HZ);
+		ci->i_hold_caps_max = round_jiffies(jiffies +
+					opt->caps_wanted_delay_max * HZ);
+	} while (read_seqretry(&opt->opt_seqlock, seq));
 
-	ci->i_hold_caps_min = round_jiffies(jiffies +
-					    opt->caps_wanted_delay_min * HZ);
-	ci->i_hold_caps_max = round_jiffies(jiffies +
-					    opt->caps_wanted_delay_max * HZ);
 	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
 	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 376e7cf1685f..451c3727cd0b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2099,6 +2099,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
 	struct ceph_inode_info *ci = ceph_inode(dir);
 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
 	struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
+	unsigned int max_readdir = opt->max_readdir;
 	size_t size = sizeof(struct ceph_mds_reply_dir_entry);
 	unsigned int num_entries;
 	int order;
@@ -2107,7 +2108,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
 	num_entries = ci->i_files + ci->i_subdirs;
 	spin_unlock(&ci->i_ceph_lock);
 	num_entries = max(num_entries, 1U);
-	num_entries = min(num_entries, opt->max_readdir);
+	num_entries = min(num_entries, max_readdir);
 
 	order = get_order(size * num_entries);
 	while (order >= 0) {
@@ -2122,7 +2123,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
 		return -ENOMEM;
 
 	num_entries = (PAGE_SIZE << order) / size;
-	num_entries = min(num_entries, opt->max_readdir);
+	num_entries = min(num_entries, max_readdir);
 
 	rinfo->dir_buf_size = PAGE_SIZE << order;
 	req->r_num_caps = num_entries + 1;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7cb62d4cf812..500c0209041f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -271,8 +271,14 @@ static int ceph_parse_mount_param(struct fs_context *fc,
 	int token, ret;
 
 	ret = ceph_parse_param(param, pctx->copts, fc);
-	if (ret != -ENOPARAM)
-		return ret;
+	if (ret != -ENOPARAM) {
+		/* Low level options are not reconfigurable */
+		if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+			return invalf(fc, "ceph: reconfiguration of %s not allowed",
+				      param->key);
+		else
+			return ret;
+	}
 
 	token = fs_parse(fc, &ceph_mount_parameters, param, &result);
 	dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
@@ -1070,14 +1076,17 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc)
  */
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
-static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
+static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc,
+			  bool update)
 {
 	int err;
 
-	err = super_setup_bdi_name(sb, "ceph-%ld",
-				   atomic_long_inc_return(&bdi_seq));
-	if (err)
-		return err;
+	if (!update) {
+		err = super_setup_bdi_name(sb, "ceph-%ld",
+					   atomic_long_inc_return(&bdi_seq));
+		if (err)
+			return err;
+	}
 
 	/* set ra_pages based on rasize mount option? */
 	sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
@@ -1133,7 +1142,7 @@ static int ceph_get_tree(struct fs_context *fc)
 		dout("get_sb got existing client %p\n", fsc);
 	} else {
 		dout("get_sb using new client %p\n", fsc);
-		err = ceph_setup_bdi(sb, fsc);
+		err = ceph_setup_bdi(sb, fsc, false);
 		if (err < 0)
 			goto out_splat;
 	}
@@ -1178,7 +1187,52 @@ static void ceph_free_fc(struct fs_context *fc)
 
 static int ceph_reconfigure_fc(struct fs_context *fc)
 {
-	sync_filesystem(fc->root->d_sb);
+	struct super_block *sb = fc->root->d_sb;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_mount_options *fsopt = fsc->mount_options;
+	struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+	struct ceph_mount_options *new_fsopt = pctx->opts;
+
+	sync_filesystem(sb);
+
+	if (strcmp_null(new_fsopt->snapdir_name, fsopt->snapdir_name))
+		return invalf(fc, "ceph: reconfiguration of snapdir_name not allowed");
+
+	if (strcmp_null(new_fsopt->mds_namespace, fsopt->mds_namespace))
+		return invalf(fc, "ceph: reconfiguration of mds_namespace not allowed");
+
+	fsopt->rsize = new_fsopt->rsize;
+	fsopt->rasize = new_fsopt->rasize;
+	ceph_setup_bdi(sb, fsc, true);
+
+	write_seqlock(&fsopt->opt_seqlock);
+	fsopt->wsize = new_fsopt->wsize;
+	fsopt->caps_wanted_delay_min = new_fsopt->caps_wanted_delay_min;
+	fsopt->caps_wanted_delay_max = new_fsopt->caps_wanted_delay_max;
+	write_sequnlock(&fsopt->opt_seqlock);
+
+#ifdef CONFIG_CEPH_FSCACHE
+	if (strcmp_null(new_fsopt->fscache_uniq, fsopt->fscache_uniq) ||
+	    ((new_fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) !=
+	     (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)))
+		return invalf(fc, "ceph: reconfiguration of fscache not allowed");
+#endif
+
+	fsopt->flags = new_fsopt->flags;
+
+	fsopt->max_readdir_bytes = new_fsopt->max_readdir_bytes;
+	fsopt->congestion_kb = new_fsopt->congestion_kb;
+
+	fsopt->caps_max = new_fsopt->caps_max;
+	fsopt->max_readdir = new_fsopt->max_readdir;
+	ceph_adjust_caps_max_min(fsc->mdsc, fsopt);
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	if (fc->sb_flags & SB_POSIXACL)
+		sb->s_flags |= SB_POSIXACL;
+	else
+		sb->s_flags &= ~SB_POSIXACL;
+#endif
 	return 0;
 }
 
@@ -1209,25 +1263,64 @@ static int ceph_init_fs_context(struct fs_context *fc)
 	if (!pctx->opts)
 		goto nomem;
 
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	fc->sb_flags |= SB_POSIXACL;
+#endif
+
 	fsopt = pctx->opts;
-	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
 
-	fsopt->wsize = CEPH_MAX_WRITE_SIZE;
-	fsopt->rsize = CEPH_MAX_READ_SIZE;
-	fsopt->rasize = CEPH_RASIZE_DEFAULT;
-	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
-	if (!fsopt->snapdir_name)
-		goto nomem;
+	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+		struct super_block *sb = fc->root->d_sb;
+		struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+		struct ceph_mount_options *old = fsc->mount_options;
 
-	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
-	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
-	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
-	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
-	fsopt->congestion_kb = default_congestion_kb();
+		fsopt->flags = old->flags;
+
+		fsopt->wsize = old->wsize;
+		fsopt->rsize = old->rsize;
+		fsopt->rasize = old->rasize;
+
+		if (old->fscache_uniq) {
+			fsopt->fscache_uniq = kstrdup(old->fscache_uniq,
+						      GFP_KERNEL);
+			if (!fsopt->fscache_uniq)
+				goto nomem;
+		}
+
+		fsopt->snapdir_name = kstrdup(old->snapdir_name, GFP_KERNEL);
+		if (!fsopt->snapdir_name)
+			goto nomem;
+
+		fsopt->caps_wanted_delay_min = old->caps_wanted_delay_min;
+		fsopt->caps_wanted_delay_max = old->caps_wanted_delay_max;
+		fsopt->max_readdir = old->max_readdir;
+		fsopt->max_readdir_bytes = old->max_readdir_bytes;
+		fsopt->congestion_kb = old->congestion_kb;
+		fsopt->caps_max = old->caps_max;
+		fsopt->max_readdir = old->max_readdir;
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
-	fc->sb_flags |= SB_POSIXACL;
+		if (!(sb->s_flags & SB_POSIXACL))
+			fc->sb_flags &= ~SB_POSIXACL;
 #endif
+	} else {
+		fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
+
+		fsopt->wsize = CEPH_MAX_WRITE_SIZE;
+		fsopt->rsize = CEPH_MAX_READ_SIZE;
+		fsopt->rasize = CEPH_RASIZE_DEFAULT;
+
+		fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+		if (!fsopt->snapdir_name)
+			goto nomem;
+
+		fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
+		fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
+		fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+		fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+		fsopt->congestion_kb = default_congestion_kb();
+		seqlock_init(&fsopt->opt_seqlock);
+	}
 
 	fc->fs_private = pctx;
 	fc->ops = &ceph_context_ops;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2acc9cc2d23a..aa0e7217a62f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -96,6 +96,8 @@ struct ceph_mount_options {
 	char *mds_namespace;  /* default NULL */
 	char *server_path;    /* default  "/" */
 	char *fscache_uniq;   /* default NULL */
+
+	seqlock_t opt_seqlock;
 };
 
 struct ceph_fs_client {
-- 
2.21.0