All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
@ 2022-05-25 17:24 Luís Henriques
  2022-05-26  4:52 ` Xiubo Li
  2022-05-26 18:39 ` Jeff Layton
  0 siblings, 2 replies; 9+ messages in thread
From: Luís Henriques @ 2022-05-25 17:24 UTC (permalink / raw)
  To: Jeff Layton, Xiubo Li, Ilya Dryomov
  Cc: ceph-devel, linux-kernel, Luís Henriques

The MDS tries to enforce a limit on the total key/values in extended
attributes.  However, this limit is enforced only if doing a synchronous
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
doesn't have a chance to enforce these limits.

This patch adds support for an extra feature bit that will allow the
client to get the MDS max_xattr_pairs_size setting in the session message.
Then, when setting an xattr, the kernel will revert to do a synchronous
operation if that maximum size is exceeded.

While there, fix a dout() that would trigger a printk warning:

[   98.718078] ------------[ cut here ]------------
[   98.719012] precision 65536 too large
[   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
...

URL: https://tracker.ceph.com/issues/55725
Signed-off-by: Luís Henriques <lhenriques@suse.de>
---
 fs/ceph/mds_client.c | 12 ++++++++++++
 fs/ceph/mds_client.h | 15 ++++++++++++++-
 fs/ceph/xattr.c      | 12 ++++++++----
 3 files changed, 34 insertions(+), 5 deletions(-)

* Changes since v1

Added support for new feature bit to get the MDS max_xattr_pairs_size
setting.

Also note that this patch relies on a patch that hasn't been merged yet
("ceph: use correct index when encoding client supported features"),
otherwise the new feature bit won't be correctly encoded.

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 35597fafb48c..87a25b7cf496 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
 	struct ceph_mds_session_head *h;
 	u32 op;
 	u64 seq, features = 0;
+	u64 max_xattr_pairs_size = 0;
 	int wake = 0;
 	bool blocklisted = false;
 
@@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
 		}
 	}
 
+	if (msg_version >= 6)
+		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
+
 	mutex_lock(&mdsc->mutex);
 	if (op == CEPH_SESSION_CLOSE) {
 		ceph_get_mds_session(session);
@@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
 	}
 	/* FIXME: this ttl calculation is generous */
 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
+
+	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
+		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
+		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
+		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
+	}
 	mutex_unlock(&mdsc->mutex);
 
 	mutex_lock(&session->s_mutex);
@@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	strscpy(mdsc->nodename, utsname()->nodename,
 		sizeof(mdsc->nodename));
 
+	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
+
 	fsc->mdsc = mdsc;
 	return 0;
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ca32f26f5eed..3db777df6d88 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -29,8 +29,11 @@ enum ceph_feature_type {
 	CEPHFS_FEATURE_MULTI_RECONNECT,
 	CEPHFS_FEATURE_DELEG_INO,
 	CEPHFS_FEATURE_METRIC_COLLECT,
+	CEPHFS_FEATURE_ALTERNATE_NAME,
+	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
 
-	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
 };
 
 /*
@@ -45,9 +48,16 @@ enum ceph_feature_type {
 	CEPHFS_FEATURE_MULTI_RECONNECT,		\
 	CEPHFS_FEATURE_DELEG_INO,		\
 	CEPHFS_FEATURE_METRIC_COLLECT,		\
+	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
 }
 #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
 
+/*
+ * Maximum size of xattrs the MDS can handle per inode by default.  This
+ * includes the attribute name and 4+4 bytes for the key/value sizes.
+ */
+#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
+
 /*
  * Some lock dependencies:
  *
@@ -404,6 +414,9 @@ struct ceph_mds_client {
 	struct rb_root		quotarealms_inodes;
 	struct mutex		quotarealms_inodes_mutex;
 
+	/* maximum aggregate size of extended attributes on a file */
+	u64			max_xattr_pairs_size;
+
 	/*
 	 * snap_rwsem will cover cap linkage into snaprealms, and
 	 * realm snap contexts.  (later, we can do per-realm snap
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c2dc2c762a4..175a8c1449aa 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 			flags |= CEPH_XATTR_REMOVE;
 	}
 
-	dout("setxattr value=%.*s\n", (int)size, value);
+	dout("setxattr value size: %ld\n", size);
 
 	/* do request */
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 	spin_lock(&ci->i_ceph_lock);
 retry:
 	issued = __ceph_caps_issued(ci, NULL);
-	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
+	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
+	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {
+		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
+		     __func__, ci->i_xattrs.version, required_blob_size,
+		     mdsc->max_xattr_pairs_size);
 		goto do_sync;
+	}
 
 	if (!lock_snap_rwsem && !ci->i_head_snapc) {
 		lock_snap_rwsem = true;
@@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 	     ceph_cap_string(issued));
 	__build_xattrs(inode);
 
-	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
-
 	if (!ci->i_xattrs.prealloc_blob ||
 	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
 		struct ceph_buffer *blob;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-25 17:24 [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size Luís Henriques
@ 2022-05-26  4:52 ` Xiubo Li
  2022-05-26 18:39 ` Jeff Layton
  1 sibling, 0 replies; 9+ messages in thread
From: Xiubo Li @ 2022-05-26  4:52 UTC (permalink / raw)
  To: Luís Henriques, Jeff Layton, Ilya Dryomov; +Cc: ceph-devel, linux-kernel


On 5/26/22 1:24 AM, Luís Henriques wrote:
> The MDS tries to enforce a limit on the total key/values in extended
> attributes.  However, this limit is enforced only if doing a synchronous
> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
> doesn't have a chance to enforce these limits.
>
> This patch adds support for an extra feature bit that will allow the
> client to get the MDS max_xattr_pairs_size setting in the session message.
> Then, when setting an xattr, the kernel will revert to do a synchronous
> operation if that maximum size is exceeded.
>
> While there, fix a dout() that would trigger a printk warning:
>
> [   98.718078] ------------[ cut here ]------------
> [   98.719012] precision 65536 too large
> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
> ...
>
> URL: https://tracker.ceph.com/issues/55725
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
>   fs/ceph/mds_client.c | 12 ++++++++++++
>   fs/ceph/mds_client.h | 15 ++++++++++++++-
>   fs/ceph/xattr.c      | 12 ++++++++----
>   3 files changed, 34 insertions(+), 5 deletions(-)
>
> * Changes since v1
>
> Added support for new feature bit to get the MDS max_xattr_pairs_size
> setting.
>
> Also note that this patch relies on a patch that hasn't been merged yet
> ("ceph: use correct index when encoding client supported features"),
> otherwise the new feature bit won't be correctly encoded.
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 35597fafb48c..87a25b7cf496 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>   	struct ceph_mds_session_head *h;
>   	u32 op;
>   	u64 seq, features = 0;
> +	u64 max_xattr_pairs_size = 0;
>   	int wake = 0;
>   	bool blocklisted = false;
>   
> @@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
>   		}
>   	}
>   
> +	if (msg_version >= 6)
> +		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
> +
>   	mutex_lock(&mdsc->mutex);
>   	if (op == CEPH_SESSION_CLOSE) {
>   		ceph_get_mds_session(session);
> @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
>   	}
>   	/* FIXME: this ttl calculation is generous */
>   	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
> +
> +	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
> +		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
> +		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
> +		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
> +	}

Is there any case that in the ceph cluster some MDSes are still using 
the default size, while some have changed the size ?

In that case IMO we should make sure the mdsc->max_xattr_pairs_size is 
always the smallest size.


>   	mutex_unlock(&mdsc->mutex);
>   
>   	mutex_lock(&session->s_mutex);
> @@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>   	strscpy(mdsc->nodename, utsname()->nodename,
>   		sizeof(mdsc->nodename));
>   
> +	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
> +
>   	fsc->mdsc = mdsc;
>   	return 0;
>   
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index ca32f26f5eed..3db777df6d88 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -29,8 +29,11 @@ enum ceph_feature_type {
>   	CEPHFS_FEATURE_MULTI_RECONNECT,
>   	CEPHFS_FEATURE_DELEG_INO,
>   	CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_ALTERNATE_NAME,
> +	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>   
> -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>   };
>   
>   /*
> @@ -45,9 +48,16 @@ enum ceph_feature_type {
>   	CEPHFS_FEATURE_MULTI_RECONNECT,		\
>   	CEPHFS_FEATURE_DELEG_INO,		\
>   	CEPHFS_FEATURE_METRIC_COLLECT,		\
> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
>   }
>   #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
>   
> +/*
> + * Maximum size of xattrs the MDS can handle per inode by default.  This
> + * includes the attribute name and 4+4 bytes for the key/value sizes.
> + */
> +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
> +
>   /*
>    * Some lock dependencies:
>    *
> @@ -404,6 +414,9 @@ struct ceph_mds_client {
>   	struct rb_root		quotarealms_inodes;
>   	struct mutex		quotarealms_inodes_mutex;
>   
> +	/* maximum aggregate size of extended attributes on a file */
> +	u64			max_xattr_pairs_size;
> +
>   	/*
>   	 * snap_rwsem will cover cap linkage into snaprealms, and
>   	 * realm snap contexts.  (later, we can do per-realm snap
> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> index 8c2dc2c762a4..175a8c1449aa 100644
> --- a/fs/ceph/xattr.c
> +++ b/fs/ceph/xattr.c
> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>   			flags |= CEPH_XATTR_REMOVE;
>   	}
>   
> -	dout("setxattr value=%.*s\n", (int)size, value);
> +	dout("setxattr value size: %ld\n", size);
>   
>   	/* do request */
>   	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>   	spin_lock(&ci->i_ceph_lock);
>   retry:
>   	issued = __ceph_caps_issued(ci, NULL);
> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
> +	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {

required_blob_size > mdsc->max_xattr_pairs_size ?

Thanks,

-- Xiubo
> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
> +		     __func__, ci->i_xattrs.version, required_blob_size,
> +		     mdsc->max_xattr_pairs_size);
>   		goto do_sync;
> +	}
>   
>   	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>   		lock_snap_rwsem = true;
> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>   	     ceph_cap_string(issued));
>   	__build_xattrs(inode);
>   
> -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> -
>   	if (!ci->i_xattrs.prealloc_blob ||
>   	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>   		struct ceph_buffer *blob;
>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-25 17:24 [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size Luís Henriques
  2022-05-26  4:52 ` Xiubo Li
@ 2022-05-26 18:39 ` Jeff Layton
  2022-05-27  0:36   ` Xiubo Li
  1 sibling, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2022-05-26 18:39 UTC (permalink / raw)
  To: Luís Henriques, Xiubo Li, Ilya Dryomov; +Cc: ceph-devel, linux-kernel

On Wed, 2022-05-25 at 18:24 +0100, Luís Henriques wrote:
> The MDS tries to enforce a limit on the total key/values in extended
> attributes.  However, this limit is enforced only if doing a synchronous
> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
> doesn't have a chance to enforce these limits.
> 
> This patch adds support for an extra feature bit that will allow the
> client to get the MDS max_xattr_pairs_size setting in the session message.
> Then, when setting an xattr, the kernel will revert to do a synchronous
> operation if that maximum size is exceeded.
> 
> While there, fix a dout() that would trigger a printk warning:
> 
> [   98.718078] ------------[ cut here ]------------
> [   98.719012] precision 65536 too large
> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
> ...
> 
> URL: https://tracker.ceph.com/issues/55725
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
>  fs/ceph/mds_client.c | 12 ++++++++++++
>  fs/ceph/mds_client.h | 15 ++++++++++++++-
>  fs/ceph/xattr.c      | 12 ++++++++----
>  3 files changed, 34 insertions(+), 5 deletions(-)
> 
> * Changes since v1
> 
> Added support for new feature bit to get the MDS max_xattr_pairs_size
> setting.
> 
> Also note that this patch relies on a patch that hasn't been merged yet
> ("ceph: use correct index when encoding client supported features"),
> otherwise the new feature bit won't be correctly encoded.
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 35597fafb48c..87a25b7cf496 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>  	struct ceph_mds_session_head *h;
>  	u32 op;
>  	u64 seq, features = 0;
> +	u64 max_xattr_pairs_size = 0;
>  	int wake = 0;
>  	bool blocklisted = false;
>  
> @@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
>  		}
>  	}
>  
> +	if (msg_version >= 6)
> +		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
> +
>  	mutex_lock(&mdsc->mutex);
>  	if (op == CEPH_SESSION_CLOSE) {
>  		ceph_get_mds_session(session);
> @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
>  	}
>  	/* FIXME: this ttl calculation is generous */
>  	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
> +
> +	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
> +		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
> +		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
> +		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
> +	}
>  	mutex_unlock(&mdsc->mutex);
>  
>  	mutex_lock(&session->s_mutex);
> @@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>  	strscpy(mdsc->nodename, utsname()->nodename,
>  		sizeof(mdsc->nodename));
>  
> +	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
> +
>  	fsc->mdsc = mdsc;
>  	return 0;
>  
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index ca32f26f5eed..3db777df6d88 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -29,8 +29,11 @@ enum ceph_feature_type {
>  	CEPHFS_FEATURE_MULTI_RECONNECT,
>  	CEPHFS_FEATURE_DELEG_INO,
>  	CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_ALTERNATE_NAME,
> +	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,

Having to make this feature-bit-dependent kind of sucks. I wonder if it
could be avoided...

A question:

How do the MDS's discover this setting? Do they get it from the mons? If
so, I wonder if there is a way for the clients to query the mon for this
instead of having to extend the MDS protocol?

>  
> -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>  };
>  
>  /*
> @@ -45,9 +48,16 @@ enum ceph_feature_type {
>  	CEPHFS_FEATURE_MULTI_RECONNECT,		\
>  	CEPHFS_FEATURE_DELEG_INO,		\
>  	CEPHFS_FEATURE_METRIC_COLLECT,		\
> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
>  }
>  #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
>  
> +/*
> + * Maximum size of xattrs the MDS can handle per inode by default.  This
> + * includes the attribute name and 4+4 bytes for the key/value sizes.
> + */
> +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
> +
>  /*
>   * Some lock dependencies:
>   *
> @@ -404,6 +414,9 @@ struct ceph_mds_client {
>  	struct rb_root		quotarealms_inodes;
>  	struct mutex		quotarealms_inodes_mutex;
>  
> +	/* maximum aggregate size of extended attributes on a file */
> +	u64			max_xattr_pairs_size;
> +
>  	/*
>  	 * snap_rwsem will cover cap linkage into snaprealms, and
>  	 * realm snap contexts.  (later, we can do per-realm snap
> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> index 8c2dc2c762a4..175a8c1449aa 100644
> --- a/fs/ceph/xattr.c
> +++ b/fs/ceph/xattr.c
> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>  			flags |= CEPH_XATTR_REMOVE;
>  	}
>  
> -	dout("setxattr value=%.*s\n", (int)size, value);
> +	dout("setxattr value size: %ld\n", size);
>  
>  	/* do request */
>  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>  	spin_lock(&ci->i_ceph_lock);
>  retry:
>  	issued = __ceph_caps_issued(ci, NULL);
> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
> +	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {
> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
> +		     __func__, ci->i_xattrs.version, required_blob_size,
> +		     mdsc->max_xattr_pairs_size);
>  		goto do_sync;
> +	}
>  
>  	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>  		lock_snap_rwsem = true;
> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>  	     ceph_cap_string(issued));
>  	__build_xattrs(inode);
>  
> -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> -
>  	if (!ci->i_xattrs.prealloc_blob ||
>  	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>  		struct ceph_buffer *blob;

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-26 18:39 ` Jeff Layton
@ 2022-05-27  0:36   ` Xiubo Li
  2022-05-27  0:44     ` Jeff Layton
  0 siblings, 1 reply; 9+ messages in thread
From: Xiubo Li @ 2022-05-27  0:36 UTC (permalink / raw)
  To: Jeff Layton, Luís Henriques, Ilya Dryomov; +Cc: ceph-devel, linux-kernel


On 5/27/22 2:39 AM, Jeff Layton wrote:
> On Wed, 2022-05-25 at 18:24 +0100, Luís Henriques wrote:
>> The MDS tries to enforce a limit on the total key/values in extended
>> attributes.  However, this limit is enforced only if doing a synchronous
>> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
>> doesn't have a chance to enforce these limits.
>>
>> This patch adds support for an extra feature bit that will allow the
>> client to get the MDS max_xattr_pairs_size setting in the session message.
>> Then, when setting an xattr, the kernel will revert to do a synchronous
>> operation if that maximum size is exceeded.
>>
>> While there, fix a dout() that would trigger a printk warning:
>>
>> [   98.718078] ------------[ cut here ]------------
>> [   98.719012] precision 65536 too large
>> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
>> ...
>>
>> URL: https://tracker.ceph.com/issues/55725
>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>> ---
>>   fs/ceph/mds_client.c | 12 ++++++++++++
>>   fs/ceph/mds_client.h | 15 ++++++++++++++-
>>   fs/ceph/xattr.c      | 12 ++++++++----
>>   3 files changed, 34 insertions(+), 5 deletions(-)
>>
>> * Changes since v1
>>
>> Added support for new feature bit to get the MDS max_xattr_pairs_size
>> setting.
>>
>> Also note that this patch relies on a patch that hasn't been merged yet
>> ("ceph: use correct index when encoding client supported features"),
>> otherwise the new feature bit won't be correctly encoded.
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 35597fafb48c..87a25b7cf496 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>>   	struct ceph_mds_session_head *h;
>>   	u32 op;
>>   	u64 seq, features = 0;
>> +	u64 max_xattr_pairs_size = 0;
>>   	int wake = 0;
>>   	bool blocklisted = false;
>>   
>> @@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
>>   		}
>>   	}
>>   
>> +	if (msg_version >= 6)
>> +		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
>> +
>>   	mutex_lock(&mdsc->mutex);
>>   	if (op == CEPH_SESSION_CLOSE) {
>>   		ceph_get_mds_session(session);
>> @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
>>   	}
>>   	/* FIXME: this ttl calculation is generous */
>>   	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
>> +
>> +	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
>> +		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
>> +		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
>> +		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
>> +	}
>>   	mutex_unlock(&mdsc->mutex);
>>   
>>   	mutex_lock(&session->s_mutex);
>> @@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>>   	strscpy(mdsc->nodename, utsname()->nodename,
>>   		sizeof(mdsc->nodename));
>>   
>> +	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
>> +
>>   	fsc->mdsc = mdsc;
>>   	return 0;
>>   
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index ca32f26f5eed..3db777df6d88 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -29,8 +29,11 @@ enum ceph_feature_type {
>>   	CEPHFS_FEATURE_MULTI_RECONNECT,
>>   	CEPHFS_FEATURE_DELEG_INO,
>>   	CEPHFS_FEATURE_METRIC_COLLECT,
>> +	CEPHFS_FEATURE_ALTERNATE_NAME,
>> +	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
>> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
> Having to make this feature-bit-dependent kind of sucks. I wonder if it
> could be avoided...
>
> A question:
>
> How do the MDS's discover this setting? Do they get it from the mons? If
> so, I wonder if there is a way for the clients to query the mon for this
> instead of having to extend the MDS protocol?

It sounds like what the "max_file_size" does, which will be recorded in 
the 'mdsmap'.

While currently the "max_xattr_pairs_size" is one MDS's option for each 
daemon and could set different values for each MDS.

-- Xiubo

>>   
>> -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
>> +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>>   };
>>   
>>   /*
>> @@ -45,9 +48,16 @@ enum ceph_feature_type {
>>   	CEPHFS_FEATURE_MULTI_RECONNECT,		\
>>   	CEPHFS_FEATURE_DELEG_INO,		\
>>   	CEPHFS_FEATURE_METRIC_COLLECT,		\
>> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
>>   }
>>   #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
>>   
>> +/*
>> + * Maximum size of xattrs the MDS can handle per inode by default.  This
>> + * includes the attribute name and 4+4 bytes for the key/value sizes.
>> + */
>> +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
>> +
>>   /*
>>    * Some lock dependencies:
>>    *
>> @@ -404,6 +414,9 @@ struct ceph_mds_client {
>>   	struct rb_root		quotarealms_inodes;
>>   	struct mutex		quotarealms_inodes_mutex;
>>   
>> +	/* maximum aggregate size of extended attributes on a file */
>> +	u64			max_xattr_pairs_size;
>> +
>>   	/*
>>   	 * snap_rwsem will cover cap linkage into snaprealms, and
>>   	 * realm snap contexts.  (later, we can do per-realm snap
>> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
>> index 8c2dc2c762a4..175a8c1449aa 100644
>> --- a/fs/ceph/xattr.c
>> +++ b/fs/ceph/xattr.c
>> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>>   			flags |= CEPH_XATTR_REMOVE;
>>   	}
>>   
>> -	dout("setxattr value=%.*s\n", (int)size, value);
>> +	dout("setxattr value size: %ld\n", size);
>>   
>>   	/* do request */
>>   	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>   	spin_lock(&ci->i_ceph_lock);
>>   retry:
>>   	issued = __ceph_caps_issued(ci, NULL);
>> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
>> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
>> +	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {
>> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
>> +		     __func__, ci->i_xattrs.version, required_blob_size,
>> +		     mdsc->max_xattr_pairs_size);
>>   		goto do_sync;
>> +	}
>>   
>>   	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>>   		lock_snap_rwsem = true;
>> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>   	     ceph_cap_string(issued));
>>   	__build_xattrs(inode);
>>   
>> -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>> -
>>   	if (!ci->i_xattrs.prealloc_blob ||
>>   	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>>   		struct ceph_buffer *blob;


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-27  0:36   ` Xiubo Li
@ 2022-05-27  0:44     ` Jeff Layton
  2022-05-27  1:08       ` Xiubo Li
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2022-05-27  0:44 UTC (permalink / raw)
  To: Xiubo Li, Luís Henriques, Ilya Dryomov; +Cc: ceph-devel, linux-kernel

On Fri, 2022-05-27 at 08:36 +0800, Xiubo Li wrote:
> On 5/27/22 2:39 AM, Jeff Layton wrote:
> > On Wed, 2022-05-25 at 18:24 +0100, Luís Henriques wrote:
> > > The MDS tries to enforce a limit on the total key/values in extended
> > > attributes.  However, this limit is enforced only if doing a synchronous
> > > operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
> > > doesn't have a chance to enforce these limits.
> > > 
> > > This patch adds support for an extra feature bit that will allow the
> > > client to get the MDS max_xattr_pairs_size setting in the session message.
> > > Then, when setting an xattr, the kernel will revert to do a synchronous
> > > operation if that maximum size is exceeded.
> > > 
> > > While there, fix a dout() that would trigger a printk warning:
> > > 
> > > [   98.718078] ------------[ cut here ]------------
> > > [   98.719012] precision 65536 too large
> > > [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
> > > ...
> > > 
> > > URL: https://tracker.ceph.com/issues/55725
> > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > ---
> > >   fs/ceph/mds_client.c | 12 ++++++++++++
> > >   fs/ceph/mds_client.h | 15 ++++++++++++++-
> > >   fs/ceph/xattr.c      | 12 ++++++++----
> > >   3 files changed, 34 insertions(+), 5 deletions(-)
> > > 
> > > * Changes since v1
> > > 
> > > Added support for new feature bit to get the MDS max_xattr_pairs_size
> > > setting.
> > > 
> > > Also note that this patch relies on a patch that hasn't been merged yet
> > > ("ceph: use correct index when encoding client supported features"),
> > > otherwise the new feature bit won't be correctly encoded.
> > > 
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index 35597fafb48c..87a25b7cf496 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
> > >   	struct ceph_mds_session_head *h;
> > >   	u32 op;
> > >   	u64 seq, features = 0;
> > > +	u64 max_xattr_pairs_size = 0;
> > >   	int wake = 0;
> > >   	bool blocklisted = false;
> > >   
> > > @@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
> > >   		}
> > >   	}
> > >   
> > > +	if (msg_version >= 6)
> > > +		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
> > > +
> > >   	mutex_lock(&mdsc->mutex);
> > >   	if (op == CEPH_SESSION_CLOSE) {
> > >   		ceph_get_mds_session(session);
> > > @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
> > >   	}
> > >   	/* FIXME: this ttl calculation is generous */
> > >   	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
> > > +
> > > +	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
> > > +		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
> > > +		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
> > > +		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
> > > +	}
> > >   	mutex_unlock(&mdsc->mutex);
> > >   
> > >   	mutex_lock(&session->s_mutex);
> > > @@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
> > >   	strscpy(mdsc->nodename, utsname()->nodename,
> > >   		sizeof(mdsc->nodename));
> > >   
> > > +	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
> > > +
> > >   	fsc->mdsc = mdsc;
> > >   	return 0;
> > >   
> > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > > index ca32f26f5eed..3db777df6d88 100644
> > > --- a/fs/ceph/mds_client.h
> > > +++ b/fs/ceph/mds_client.h
> > > @@ -29,8 +29,11 @@ enum ceph_feature_type {
> > >   	CEPHFS_FEATURE_MULTI_RECONNECT,
> > >   	CEPHFS_FEATURE_DELEG_INO,
> > >   	CEPHFS_FEATURE_METRIC_COLLECT,
> > > +	CEPHFS_FEATURE_ALTERNATE_NAME,
> > > +	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
> > > +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
> > Having to make this feature-bit-dependent kind of sucks. I wonder if it
> > could be avoided...
> > 
> > A question:
> > 
> > How do the MDS's discover this setting? Do they get it from the mons? If
> > so, I wonder if there is a way for the clients to query the mon for this
> > instead of having to extend the MDS protocol?
> 
> It sounds like what the "max_file_size" does, which will be recorded in 
> the 'mdsmap'.
> 
> While currently the "max_xattr_pairs_size" is one MDS's option for each 
> daemon and could set different values for each MDS.
> 
> 

Right, but the MDS's in general don't use local config files. Where are
these settings stored? Could the client (potentially) query for them?

I'm pretty sure the client does fetch and parse the mdsmap. If it's
there then it could grab the setting for all of the MDS's at mount time
and settle on the lowest one.

I think a solution like that might be more resilient than having to
fiddle with feature bits and such...

> > >   
> > > -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
> > > +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
> > >   };
> > >   
> > >   /*
> > > @@ -45,9 +48,16 @@ enum ceph_feature_type {
> > >   	CEPHFS_FEATURE_MULTI_RECONNECT,		\
> > >   	CEPHFS_FEATURE_DELEG_INO,		\
> > >   	CEPHFS_FEATURE_METRIC_COLLECT,		\
> > > +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
> > >   }
> > >   #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
> > >   
> > > +/*
> > > + * Maximum size of xattrs the MDS can handle per inode by default.  This
> > > + * includes the attribute name and 4+4 bytes for the key/value sizes.
> > > + */
> > > +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
> > > +
> > >   /*
> > >    * Some lock dependencies:
> > >    *
> > > @@ -404,6 +414,9 @@ struct ceph_mds_client {
> > >   	struct rb_root		quotarealms_inodes;
> > >   	struct mutex		quotarealms_inodes_mutex;
> > >   
> > > +	/* maximum aggregate size of extended attributes on a file */
> > > +	u64			max_xattr_pairs_size;
> > > +
> > >   	/*
> > >   	 * snap_rwsem will cover cap linkage into snaprealms, and
> > >   	 * realm snap contexts.  (later, we can do per-realm snap
> > > diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> > > index 8c2dc2c762a4..175a8c1449aa 100644
> > > --- a/fs/ceph/xattr.c
> > > +++ b/fs/ceph/xattr.c
> > > @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
> > >   			flags |= CEPH_XATTR_REMOVE;
> > >   	}
> > >   
> > > -	dout("setxattr value=%.*s\n", (int)size, value);
> > > +	dout("setxattr value size: %ld\n", size);
> > >   
> > >   	/* do request */
> > >   	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> > > @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
> > >   	spin_lock(&ci->i_ceph_lock);
> > >   retry:
> > >   	issued = __ceph_caps_issued(ci, NULL);
> > > -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
> > > +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> > > +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
> > > +	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {
> > > +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
> > > +		     __func__, ci->i_xattrs.version, required_blob_size,
> > > +		     mdsc->max_xattr_pairs_size);
> > >   		goto do_sync;
> > > +	}
> > >   
> > >   	if (!lock_snap_rwsem && !ci->i_head_snapc) {
> > >   		lock_snap_rwsem = true;
> > > @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
> > >   	     ceph_cap_string(issued));
> > >   	__build_xattrs(inode);
> > >   
> > > -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> > > -
> > >   	if (!ci->i_xattrs.prealloc_blob ||
> > >   	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
> > >   		struct ceph_buffer *blob;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-27  0:44     ` Jeff Layton
@ 2022-05-27  1:08       ` Xiubo Li
  2022-05-27  3:23         ` Gregory Farnum
  0 siblings, 1 reply; 9+ messages in thread
From: Xiubo Li @ 2022-05-27  1:08 UTC (permalink / raw)
  To: Jeff Layton, Luís Henriques, Ilya Dryomov; +Cc: ceph-devel, linux-kernel


On 5/27/22 8:44 AM, Jeff Layton wrote:
> On Fri, 2022-05-27 at 08:36 +0800, Xiubo Li wrote:
>> On 5/27/22 2:39 AM, Jeff Layton wrote:
>>> On Wed, 2022-05-25 at 18:24 +0100, Luís Henriques wrote:
>>>> The MDS tries to enforce a limit on the total key/values in extended
>>>> attributes.  However, this limit is enforced only if doing a synchronous
>>>> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
>>>> doesn't have a chance to enforce these limits.
>>>>
>>>> This patch adds support for an extra feature bit that will allow the
>>>> client to get the MDS max_xattr_pairs_size setting in the session message.
>>>> Then, when setting an xattr, the kernel will revert to do a synchronous
>>>> operation if that maximum size is exceeded.
>>>>
>>>> While there, fix a dout() that would trigger a printk warning:
>>>>
>>>> [   98.718078] ------------[ cut here ]------------
>>>> [   98.719012] precision 65536 too large
>>>> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
>>>> ...
>>>>
>>>> URL: https://tracker.ceph.com/issues/55725
>>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>>> ---
>>>>    fs/ceph/mds_client.c | 12 ++++++++++++
>>>>    fs/ceph/mds_client.h | 15 ++++++++++++++-
>>>>    fs/ceph/xattr.c      | 12 ++++++++----
>>>>    3 files changed, 34 insertions(+), 5 deletions(-)
>>>>
>>>> * Changes since v1
>>>>
>>>> Added support for new feature bit to get the MDS max_xattr_pairs_size
>>>> setting.
>>>>
>>>> Also note that this patch relies on a patch that hasn't been merged yet
>>>> ("ceph: use correct index when encoding client supported features"),
>>>> otherwise the new feature bit won't be correctly encoded.
>>>>
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index 35597fafb48c..87a25b7cf496 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>>>>    	struct ceph_mds_session_head *h;
>>>>    	u32 op;
>>>>    	u64 seq, features = 0;
>>>> +	u64 max_xattr_pairs_size = 0;
>>>>    	int wake = 0;
>>>>    	bool blocklisted = false;
>>>>    
>>>> @@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
>>>>    		}
>>>>    	}
>>>>    
>>>> +	if (msg_version >= 6)
>>>> +		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
>>>> +
>>>>    	mutex_lock(&mdsc->mutex);
>>>>    	if (op == CEPH_SESSION_CLOSE) {
>>>>    		ceph_get_mds_session(session);
>>>> @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
>>>>    	}
>>>>    	/* FIXME: this ttl calculation is generous */
>>>>    	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
>>>> +
>>>> +	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
>>>> +		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
>>>> +		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
>>>> +		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
>>>> +	}
>>>>    	mutex_unlock(&mdsc->mutex);
>>>>    
>>>>    	mutex_lock(&session->s_mutex);
>>>> @@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>>>>    	strscpy(mdsc->nodename, utsname()->nodename,
>>>>    		sizeof(mdsc->nodename));
>>>>    
>>>> +	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
>>>> +
>>>>    	fsc->mdsc = mdsc;
>>>>    	return 0;
>>>>    
>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>> index ca32f26f5eed..3db777df6d88 100644
>>>> --- a/fs/ceph/mds_client.h
>>>> +++ b/fs/ceph/mds_client.h
>>>> @@ -29,8 +29,11 @@ enum ceph_feature_type {
>>>>    	CEPHFS_FEATURE_MULTI_RECONNECT,
>>>>    	CEPHFS_FEATURE_DELEG_INO,
>>>>    	CEPHFS_FEATURE_METRIC_COLLECT,
>>>> +	CEPHFS_FEATURE_ALTERNATE_NAME,
>>>> +	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
>>>> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>>> Having to make this feature-bit-dependent kind of sucks. I wonder if it
>>> could be avoided...
>>>
>>> A question:
>>>
>>> How do the MDS's discover this setting? Do they get it from the mons? If
>>> so, I wonder if there is a way for the clients to query the mon for this
>>> instead of having to extend the MDS protocol?
>> It sounds like what the "max_file_size" does, which will be recorded in
>> the 'mdsmap'.
>>
>> While currently the "max_xattr_pairs_size" is one MDS's option for each
>> daemon and could set different values for each MDS.
>>
>>
> Right, but the MDS's in general don't use local config files. Where are
> these settings stored? Could the client (potentially) query for them?

AFAIK, each process in ceph it will have its own copy of the 
"CephContext". I don't know how to query all of them but I know there 
have some API such as "rados_conf_set/get" could do similar things.

Not sure whether will it work in our case.

>
> I'm pretty sure the client does fetch and parse the mdsmap. If it's
> there then it could grab the setting for all of the MDS's at mount time
> and settle on the lowest one.
>
> I think a solution like that might be more resilient than having to
> fiddle with feature bits and such...

Yeah, IMO just making this option to be like the "max_file_size" is more 
appropriate.

-- Xiubo

>
>>>>    
>>>> -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
>>>> +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>>>>    };
>>>>    
>>>>    /*
>>>> @@ -45,9 +48,16 @@ enum ceph_feature_type {
>>>>    	CEPHFS_FEATURE_MULTI_RECONNECT,		\
>>>>    	CEPHFS_FEATURE_DELEG_INO,		\
>>>>    	CEPHFS_FEATURE_METRIC_COLLECT,		\
>>>> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
>>>>    }
>>>>    #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
>>>>    
>>>> +/*
>>>> + * Maximum size of xattrs the MDS can handle per inode by default.  This
>>>> + * includes the attribute name and 4+4 bytes for the key/value sizes.
>>>> + */
>>>> +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
>>>> +
>>>>    /*
>>>>     * Some lock dependencies:
>>>>     *
>>>> @@ -404,6 +414,9 @@ struct ceph_mds_client {
>>>>    	struct rb_root		quotarealms_inodes;
>>>>    	struct mutex		quotarealms_inodes_mutex;
>>>>    
>>>> +	/* maximum aggregate size of extended attributes on a file */
>>>> +	u64			max_xattr_pairs_size;
>>>> +
>>>>    	/*
>>>>    	 * snap_rwsem will cover cap linkage into snaprealms, and
>>>>    	 * realm snap contexts.  (later, we can do per-realm snap
>>>> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
>>>> index 8c2dc2c762a4..175a8c1449aa 100644
>>>> --- a/fs/ceph/xattr.c
>>>> +++ b/fs/ceph/xattr.c
>>>> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>>>>    			flags |= CEPH_XATTR_REMOVE;
>>>>    	}
>>>>    
>>>> -	dout("setxattr value=%.*s\n", (int)size, value);
>>>> +	dout("setxattr value size: %ld\n", size);
>>>>    
>>>>    	/* do request */
>>>>    	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>>>> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>>>    	spin_lock(&ci->i_ceph_lock);
>>>>    retry:
>>>>    	issued = __ceph_caps_issued(ci, NULL);
>>>> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
>>>> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>>>> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
>>>> +	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {
>>>> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
>>>> +		     __func__, ci->i_xattrs.version, required_blob_size,
>>>> +		     mdsc->max_xattr_pairs_size);
>>>>    		goto do_sync;
>>>> +	}
>>>>    
>>>>    	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>>>>    		lock_snap_rwsem = true;
>>>> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>>>    	     ceph_cap_string(issued));
>>>>    	__build_xattrs(inode);
>>>>    
>>>> -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>>>> -
>>>>    	if (!ci->i_xattrs.prealloc_blob ||
>>>>    	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>>>>    		struct ceph_buffer *blob;


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-27  1:08       ` Xiubo Li
@ 2022-05-27  3:23         ` Gregory Farnum
  2022-05-27  9:14           ` Luís Henriques
  0 siblings, 1 reply; 9+ messages in thread
From: Gregory Farnum @ 2022-05-27  3:23 UTC (permalink / raw)
  To: Xiubo Li
  Cc: Jeff Layton, Luís Henriques, Ilya Dryomov, ceph-devel, linux-kernel

On Thu, May 26, 2022 at 6:10 PM Xiubo Li <xiubli@redhat.com> wrote:
>
>
> On 5/27/22 8:44 AM, Jeff Layton wrote:
> > On Fri, 2022-05-27 at 08:36 +0800, Xiubo Li wrote:
> >> On 5/27/22 2:39 AM, Jeff Layton wrote:
> >>> A question:
> >>>
> >>> How do the MDS's discover this setting? Do they get it from the mons? If
> >>> so, I wonder if there is a way for the clients to query the mon for this
> >>> instead of having to extend the MDS protocol?
> >> It sounds like what the "max_file_size" does, which will be recorded in
> >> the 'mdsmap'.
> >>
> >> While currently the "max_xattr_pairs_size" is one MDS's option for each
> >> daemon and could set different values for each MDS.
> >>
> >>
> > Right, but the MDS's in general don't use local config files. Where are
> > these settings stored? Could the client (potentially) query for them?
>
> AFAIK, each process in ceph it will have its own copy of the
> "CephContext". I don't know how to query all of them but I know there
> have some API such as "rados_conf_set/get" could do similar things.
>
> Not sure whether will it work in our case.
>
> >
> > I'm pretty sure the client does fetch and parse the mdsmap. If it's
> > there then it could grab the setting for all of the MDS's at mount time
> > and settle on the lowest one.
> >
> > I think a solution like that might be more resilient than having to
> > fiddle with feature bits and such...
>
> Yeah, IMO just making this option to be like the "max_file_size" is more
> appropriate.

Makes sense to me — this is really a property of the filesystem, not a
daemon, so it should be propagated through common filesystem state.
I guess Luis' https://github.com/ceph/ceph/pull/46357 should be
updated to do it that way? I see some discussion there about handling
old clients which don't recognize these limits as well.
-Greg


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-27  3:23         ` Gregory Farnum
@ 2022-05-27  9:14           ` Luís Henriques
  2022-05-27  9:28             ` Xiubo Li
  0 siblings, 1 reply; 9+ messages in thread
From: Luís Henriques @ 2022-05-27  9:14 UTC (permalink / raw)
  To: Gregory Farnum
  Cc: Xiubo Li, Jeff Layton, Ilya Dryomov, ceph-devel, linux-kernel

Gregory Farnum <gfarnum@redhat.com> writes:

> On Thu, May 26, 2022 at 6:10 PM Xiubo Li <xiubli@redhat.com> wrote:
>>
>>
>> On 5/27/22 8:44 AM, Jeff Layton wrote:
>> > On Fri, 2022-05-27 at 08:36 +0800, Xiubo Li wrote:
>> >> On 5/27/22 2:39 AM, Jeff Layton wrote:
>> >>> A question:
>> >>>
>> >>> How do the MDS's discover this setting? Do they get it from the mons? If
>> >>> so, I wonder if there is a way for the clients to query the mon for this
>> >>> instead of having to extend the MDS protocol?
>> >> It sounds like what the "max_file_size" does, which will be recorded in
>> >> the 'mdsmap'.
>> >>
>> >> While currently the "max_xattr_pairs_size" is one MDS's option for each
>> >> daemon and could set different values for each MDS.
>> >>
>> >>
>> > Right, but the MDS's in general don't use local config files. Where are
>> > these settings stored? Could the client (potentially) query for them?
>>
>> AFAIK, each process in ceph it will have its own copy of the
>> "CephContext". I don't know how to query all of them but I know there
>> have some API such as "rados_conf_set/get" could do similar things.
>>
>> Not sure whether will it work in our case.
>>
>> >
>> > I'm pretty sure the client does fetch and parse the mdsmap. If it's
>> > there then it could grab the setting for all of the MDS's at mount time
>> > and settle on the lowest one.
>> >
>> > I think a solution like that might be more resilient than having to
>> > fiddle with feature bits and such...
>>
>> Yeah, IMO just making this option to be like the "max_file_size" is more
>> appropriate.
>
> Makes sense to me — this is really a property of the filesystem, not a
> daemon, so it should be propagated through common filesystem state.

Right now the max_xattr_pairs_size seems to be something that can be set
on each MDS, so definitely not a filesystem property.  To be honest, I
think it's nasty to have this knob in the first place because it will
allow an admin to set it to a value that will allow clients to blowup the
MDS cluster.

> I guess Luis' https://github.com/ceph/ceph/pull/46357 should be
> updated to do it that way?

Just to confirm, by "to do it that way" you mean to move that setting into
the mdsmap, right?

> I see some discussion there about handling
> old clients which don't recognize these limits as well.

Yeah, this is where the feature bit came from.  This would allow old
clients to be identified so that the MDS would not give them 'Xx'
capabilities.  Old clients would be able to set xattrs but not to buffer
them, i.e. they'd be forced to do the SETXATTR synchronously.

Cheers,
-- 
Luís

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-05-27  9:14           ` Luís Henriques
@ 2022-05-27  9:28             ` Xiubo Li
  0 siblings, 0 replies; 9+ messages in thread
From: Xiubo Li @ 2022-05-27  9:28 UTC (permalink / raw)
  To: Luís Henriques, Gregory Farnum
  Cc: Jeff Layton, Ilya Dryomov, ceph-devel, linux-kernel


On 5/27/22 5:14 PM, Luís Henriques wrote:
> Gregory Farnum <gfarnum@redhat.com> writes:
>
>> On Thu, May 26, 2022 at 6:10 PM Xiubo Li <xiubli@redhat.com> wrote:
>>>
>>> On 5/27/22 8:44 AM, Jeff Layton wrote:
>>>> On Fri, 2022-05-27 at 08:36 +0800, Xiubo Li wrote:
>>>>> On 5/27/22 2:39 AM, Jeff Layton wrote:
>>>>>> A question:
>>>>>>
>>>>>> How do the MDS's discover this setting? Do they get it from the mons? If
>>>>>> so, I wonder if there is a way for the clients to query the mon for this
>>>>>> instead of having to extend the MDS protocol?
>>>>> It sounds like what the "max_file_size" does, which will be recorded in
>>>>> the 'mdsmap'.
>>>>>
>>>>> While currently the "max_xattr_pairs_size" is one MDS's option for each
>>>>> daemon and could set different values for each MDS.
>>>>>
>>>>>
>>>> Right, but the MDS's in general don't use local config files. Where are
>>>> these settings stored? Could the client (potentially) query for them?
>>> AFAIK, each process in ceph it will have its own copy of the
>>> "CephContext". I don't know how to query all of them but I know there
>>> have some API such as "rados_conf_set/get" could do similar things.
>>>
>>> Not sure whether will it work in our case.
>>>
>>>> I'm pretty sure the client does fetch and parse the mdsmap. If it's
>>>> there then it could grab the setting for all of the MDS's at mount time
>>>> and settle on the lowest one.
>>>>
>>>> I think a solution like that might be more resilient than having to
>>>> fiddle with feature bits and such...
>>> Yeah, IMO just making this option to be like the "max_file_size" is more
>>> appropriate.
>> Makes sense to me — this is really a property of the filesystem, not a
>> daemon, so it should be propagated through common filesystem state.
> Right now the max_xattr_pairs_size seems to be something that can be set
> on each MDS, so definitely not a filesystem property.  To be honest, I
> think it's nasty to have this knob in the first place because it will
> allow an admin to set it to a value that will allow clients to blowup the
> MDS cluster.
>
>> I guess Luis' https://github.com/ceph/ceph/pull/46357 should be
>> updated to do it that way?
> Just to confirm, by "to do it that way" you mean to move that setting into
> the mdsmap, right?

Yeah, I think so.

-- XIubo

>
>> I see some discussion there about handling
>> old clients which don't recognize these limits as well.
> Yeah, this is where the feature bit came from.  This would allow old
> clients to be identified so that the MDS would not give them 'Xx'
> capabilities.  Old clients would be able to set xattrs but not to buffer
> them, i.e. they'd be forced to do the SETXATTR synchronously.
>
> Cheers,


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-05-27  9:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-25 17:24 [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size Luís Henriques
2022-05-26  4:52 ` Xiubo Li
2022-05-26 18:39 ` Jeff Layton
2022-05-27  0:36   ` Xiubo Li
2022-05-27  0:44     ` Jeff Layton
2022-05-27  1:08       ` Xiubo Li
2022-05-27  3:23         ` Gregory Farnum
2022-05-27  9:14           ` Luís Henriques
2022-05-27  9:28             ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.