All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
@ 2022-06-01 16:29 Luís Henriques
  2022-06-01 20:27 ` kernel test robot
  2022-06-02  2:33 ` Xiubo Li
  0 siblings, 2 replies; 7+ messages in thread
From: Luís Henriques @ 2022-06-01 16:29 UTC (permalink / raw)
  To: Jeff Layton, Xiubo Li, Ilya Dryomov, Gregory Farnum
  Cc: ceph-devel, linux-kernel, Luís Henriques

The MDS tries to enforce a limit on the total key/values in extended
attributes.  However, this limit is enforced only if doing a synchronous
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
doesn't have a chance to enforce these limits.

This patch adds support for decoding the xattrs maximum size setting that is
distributed in the mdsmap.  Then, when setting an xattr, the kernel client
will revert to do a synchronous operation if that maximum size is exceeded.

While there, fix a dout() that would trigger a printk warning:

[   98.718078] ------------[ cut here ]------------
[   98.719012] precision 65536 too large
[   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
...

URL: https://tracker.ceph.com/issues/55725
Signed-off-by: Luís Henriques <lhenriques@suse.de>
---
 fs/ceph/mdsmap.c            | 27 +++++++++++++++++++++++----
 fs/ceph/xattr.c             | 12 ++++++++----
 include/linux/ceph/mdsmap.h |  1 +
 3 files changed, 32 insertions(+), 8 deletions(-)

* Changes since v2

Well, a lot has changed since v2!  Now the xattr max value setting is
obtained through the mdsmap, which needs to be decoded, and the feature
that was used in the previous revision was dropped.  The drawback is that
the MDS isn't unable to know in advance if a client is aware of this xattr
max value.

* Changes since v1

Added support for new feature bit to get the MDS max_xattr_pairs_size
setting.

Also note that this patch relies on a patch that hasn't been merged yet
("ceph: use correct index when encoding client supported features"),
otherwise the new feature bit won't be correctly encoded.

diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 30387733765d..36b2bc18ca2a 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -13,6 +13,12 @@
 
 #include "super.h"
 
+/*
+ * Maximum size of xattrs the MDS can handle per inode by default.  This
+ * includes the attribute name and 4+4 bytes for the key/value sizes.
+ */
+#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
+
 #define CEPH_MDS_IS_READY(i, ignore_laggy) \
 	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
 
@@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 		__decode_and_drop_type(p, end, u8, bad_ext);
 	}
 	if (mdsmap_ev >= 8) {
-		u32 name_len;
 		/* enabled */
 		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
-		ceph_decode_32_safe(p, end, name_len, bad_ext);
-		ceph_decode_need(p, end, name_len, bad_ext);
-		*p += name_len;
+		/* fs_name */
+		ceph_decode_skip_string(p, end, bad_ext);
 	}
 	/* damaged */
 	if (mdsmap_ev >= 9) {
@@ -370,6 +374,21 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 	} else {
 		m->m_damaged = false;
 	}
+	if (mdsmap_ev >= 17) {
+		/* balancer */
+		ceph_decode_skip_string(p, end, bad_ext);
+		/* standby_count_wanted */
+		ceph_decode_skip_32(p, end, bad_ext);
+		/* old_max_mds */
+		ceph_decode_skip_32(p, end, bad_ext);
+		/* min_compat_client */
+		ceph_decode_skip_8(p, end, bad_ext);
+		/* required_client_features */
+		ceph_decode_skip_set(p, end, 64, bad_ext);
+		ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
+	} else {
+		m->m_max_xattr_size = MDS_MAX_XATTR_SIZE;
+	}
 bad_ext:
 	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
 	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c2dc2c762a4..67f046dac35c 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 			flags |= CEPH_XATTR_REMOVE;
 	}
 
-	dout("setxattr value=%.*s\n", (int)size, value);
+	dout("setxattr value size: %ld\n", size);
 
 	/* do request */
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 	spin_lock(&ci->i_ceph_lock);
 retry:
 	issued = __ceph_caps_issued(ci, NULL);
-	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
+	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
+	    (required_blob_size >= mdsc->mdsmap->m_max_xattr_size)) {
+		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
+		     __func__, ci->i_xattrs.version, required_blob_size,
+		     mdsc->mdsmap->m_max_xattr_size);
 		goto do_sync;
+	}
 
 	if (!lock_snap_rwsem && !ci->i_head_snapc) {
 		lock_snap_rwsem = true;
@@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 	     ceph_cap_string(issued));
 	__build_xattrs(inode);
 
-	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
-
 	if (!ci->i_xattrs.prealloc_blob ||
 	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
 		struct ceph_buffer *blob;
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 523fd0452856..4c3e0648dc27 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -25,6 +25,7 @@ struct ceph_mdsmap {
 	u32 m_session_timeout;          /* seconds */
 	u32 m_session_autoclose;        /* seconds */
 	u64 m_max_file_size;
+	u64 m_max_xattr_size;		/* maximum size for xattrs blob */
 	u32 m_max_mds;			/* expected up:active mds number */
 	u32 m_num_active_mds;		/* actual up:active mds number */
 	u32 possible_max_rank;		/* possible max rank index */

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-06-01 16:29 [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size Luís Henriques
@ 2022-06-01 20:27 ` kernel test robot
  2022-06-02  2:33 ` Xiubo Li
  1 sibling, 0 replies; 7+ messages in thread
From: kernel test robot @ 2022-06-01 20:27 UTC (permalink / raw)
  To: Luís Henriques; +Cc: llvm, kbuild-all

Hi "Luís,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on ceph-client/for-linus]
[also build test WARNING on v5.18 next-20220601]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Lu-s-Henriques/ceph-prevent-a-client-from-exceeding-the-MDS-maximum-xattr-size/20220602-002950
base:   https://github.com/ceph/ceph-client.git for-linus
config: i386-randconfig-a006 (https://download.01.org/0day-ci/archive/20220602/202206020411.AHeREyCE-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project c825abd6b0198fb088d9752f556a70705bc99dfd)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/052d43ff323eb7d212ed64d7cd3e2af4589f4596
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Lu-s-Henriques/ceph-prevent-a-client-from-exceeding-the-MDS-maximum-xattr-size/20220602-002950
        git checkout 052d43ff323eb7d212ed64d7cd3e2af4589f4596
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash fs/ceph/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> fs/ceph/xattr.c:1081:37: warning: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Wformat]
           dout("setxattr value size: %ld\n", size);
                                      ~~~     ^~~~
                                      %zu
   include/linux/ceph/ceph_debug.h:35:45: note: expanded from macro 'dout'
   # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__)
                                        ~~~    ^~~~~~~~~~~
   include/linux/printk.h:576:38: note: expanded from macro 'pr_debug'
           no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
                                       ~~~     ^~~~~~~~~~~
   include/linux/printk.h:132:17: note: expanded from macro 'no_printk'
                   printk(fmt, ##__VA_ARGS__);             \
                          ~~~    ^~~~~~~~~~~
   include/linux/printk.h:446:60: note: expanded from macro 'printk'
   #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
                                                       ~~~    ^~~~~~~~~~~
   include/linux/printk.h:418:19: note: expanded from macro 'printk_index_wrap'
                   _p_func(_fmt, ##__VA_ARGS__);                           \
                           ~~~~    ^~~~~~~~~~~
   1 warning generated.


vim +1081 fs/ceph/xattr.c

  1052	
  1053	static int ceph_sync_setxattr(struct inode *inode, const char *name,
  1054				      const char *value, size_t size, int flags)
  1055	{
  1056		struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
  1057		struct ceph_inode_info *ci = ceph_inode(inode);
  1058		struct ceph_mds_request *req;
  1059		struct ceph_mds_client *mdsc = fsc->mdsc;
  1060		struct ceph_osd_client *osdc = &fsc->client->osdc;
  1061		struct ceph_pagelist *pagelist = NULL;
  1062		int op = CEPH_MDS_OP_SETXATTR;
  1063		int err;
  1064	
  1065		if (size > 0) {
  1066			/* copy value into pagelist */
  1067			pagelist = ceph_pagelist_alloc(GFP_NOFS);
  1068			if (!pagelist)
  1069				return -ENOMEM;
  1070	
  1071			err = ceph_pagelist_append(pagelist, value, size);
  1072			if (err)
  1073				goto out;
  1074		} else if (!value) {
  1075			if (flags & CEPH_XATTR_REPLACE)
  1076				op = CEPH_MDS_OP_RMXATTR;
  1077			else
  1078				flags |= CEPH_XATTR_REMOVE;
  1079		}
  1080	
> 1081		dout("setxattr value size: %ld\n", size);
  1082	
  1083		/* do request */
  1084		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
  1085		if (IS_ERR(req)) {
  1086			err = PTR_ERR(req);
  1087			goto out;
  1088		}
  1089	
  1090		req->r_path2 = kstrdup(name, GFP_NOFS);
  1091		if (!req->r_path2) {
  1092			ceph_mdsc_put_request(req);
  1093			err = -ENOMEM;
  1094			goto out;
  1095		}
  1096	
  1097		if (op == CEPH_MDS_OP_SETXATTR) {
  1098			req->r_args.setxattr.flags = cpu_to_le32(flags);
  1099			req->r_args.setxattr.osdmap_epoch =
  1100				cpu_to_le32(osdc->osdmap->epoch);
  1101			req->r_pagelist = pagelist;
  1102			pagelist = NULL;
  1103		}
  1104	
  1105		req->r_inode = inode;
  1106		ihold(inode);
  1107		req->r_num_caps = 1;
  1108		req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  1109	
  1110		dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
  1111		err = ceph_mdsc_do_request(mdsc, NULL, req);
  1112		ceph_mdsc_put_request(req);
  1113		dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
  1114	
  1115	out:
  1116		if (pagelist)
  1117			ceph_pagelist_release(pagelist);
  1118		return err;
  1119	}
  1120	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-06-01 16:29 [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size Luís Henriques
  2022-06-01 20:27 ` kernel test robot
@ 2022-06-02  2:33 ` Xiubo Li
  2022-06-02  9:26   ` Luís Henriques
  1 sibling, 1 reply; 7+ messages in thread
From: Xiubo Li @ 2022-06-02  2:33 UTC (permalink / raw)
  To: Luís Henriques, Jeff Layton, Ilya Dryomov, Gregory Farnum
  Cc: ceph-devel, linux-kernel


On 6/2/22 12:29 AM, Luís Henriques wrote:
> The MDS tries to enforce a limit on the total key/values in extended
> attributes.  However, this limit is enforced only if doing a synchronous
> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
> doesn't have a chance to enforce these limits.
>
> This patch adds support for decoding the xattrs maximum size setting that is
> distributed in the mdsmap.  Then, when setting an xattr, the kernel client
> will revert to do a synchronous operation if that maximum size is exceeded.
>
> While there, fix a dout() that would trigger a printk warning:
>
> [   98.718078] ------------[ cut here ]------------
> [   98.719012] precision 65536 too large
> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
> ...
>
> URL: https://tracker.ceph.com/issues/55725
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
>   fs/ceph/mdsmap.c            | 27 +++++++++++++++++++++++----
>   fs/ceph/xattr.c             | 12 ++++++++----
>   include/linux/ceph/mdsmap.h |  1 +
>   3 files changed, 32 insertions(+), 8 deletions(-)
>
> * Changes since v2
>
> Well, a lot has changed since v2!  Now the xattr max value setting is
> obtained through the mdsmap, which needs to be decoded, and the feature
> that was used in the previous revision was dropped.  The drawback is that
> the MDS isn't unable to know in advance if a client is aware of this xattr
> max value.
>
> * Changes since v1
>
> Added support for new feature bit to get the MDS max_xattr_pairs_size
> setting.
>
> Also note that this patch relies on a patch that hasn't been merged yet
> ("ceph: use correct index when encoding client supported features"),
> otherwise the new feature bit won't be correctly encoded.
>
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 30387733765d..36b2bc18ca2a 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -13,6 +13,12 @@
>   
>   #include "super.h"
>   
> +/*
> + * Maximum size of xattrs the MDS can handle per inode by default.  This
> + * includes the attribute name and 4+4 bytes for the key/value sizes.
> + */
> +#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
> +
>   #define CEPH_MDS_IS_READY(i, ignore_laggy) \
>   	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
>   
> @@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
>   		__decode_and_drop_type(p, end, u8, bad_ext);
>   	}
>   	if (mdsmap_ev >= 8) {
> -		u32 name_len;
>   		/* enabled */
>   		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
> -		ceph_decode_32_safe(p, end, name_len, bad_ext);
> -		ceph_decode_need(p, end, name_len, bad_ext);
> -		*p += name_len;
> +		/* fs_name */
> +		ceph_decode_skip_string(p, end, bad_ext);
>   	}
>   	/* damaged */
>   	if (mdsmap_ev >= 9) {
> @@ -370,6 +374,21 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
>   	} else {
>   		m->m_damaged = false;
>   	}
> +	if (mdsmap_ev >= 17) {
> +		/* balancer */
> +		ceph_decode_skip_string(p, end, bad_ext);
> +		/* standby_count_wanted */
> +		ceph_decode_skip_32(p, end, bad_ext);
> +		/* old_max_mds */
> +		ceph_decode_skip_32(p, end, bad_ext);
> +		/* min_compat_client */
> +		ceph_decode_skip_8(p, end, bad_ext);

This is incorrect.

If mdsmap_ev == 15 the min_compat_client will be a feature_bitset_t 
instead of int8_t.


> +		/* required_client_features */
> +		ceph_decode_skip_set(p, end, 64, bad_ext);
> +		ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
> +	} else {
> +		m->m_max_xattr_size = MDS_MAX_XATTR_SIZE;
> +	}
>   bad_ext:
>   	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
>   	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> index 8c2dc2c762a4..67f046dac35c 100644
> --- a/fs/ceph/xattr.c
> +++ b/fs/ceph/xattr.c
> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>   			flags |= CEPH_XATTR_REMOVE;
>   	}
>   
> -	dout("setxattr value=%.*s\n", (int)size, value);
> +	dout("setxattr value size: %ld\n", size);
>   
>   	/* do request */
>   	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>   	spin_lock(&ci->i_ceph_lock);
>   retry:
>   	issued = __ceph_caps_issued(ci, NULL);
> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
> +	    (required_blob_size >= mdsc->mdsmap->m_max_xattr_size)) {

Shouldn't it be '>' instead ?

We'd better always force to do a sync request with old ceph. Just check 
if the mdsmap_ev < 17. It's not safe to buffer it because it maybe 
discarded as your ceph PR does.

-- Xiubo

> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
> +		     __func__, ci->i_xattrs.version, required_blob_size,
> +		     mdsc->mdsmap->m_max_xattr_size);
>   		goto do_sync;
> +	}
>   
>   	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>   		lock_snap_rwsem = true;
> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>   	     ceph_cap_string(issued));
>   	__build_xattrs(inode);
>   
> -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> -
>   	if (!ci->i_xattrs.prealloc_blob ||
>   	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>   		struct ceph_buffer *blob;
> diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
> index 523fd0452856..4c3e0648dc27 100644
> --- a/include/linux/ceph/mdsmap.h
> +++ b/include/linux/ceph/mdsmap.h
> @@ -25,6 +25,7 @@ struct ceph_mdsmap {
>   	u32 m_session_timeout;          /* seconds */
>   	u32 m_session_autoclose;        /* seconds */
>   	u64 m_max_file_size;
> +	u64 m_max_xattr_size;		/* maximum size for xattrs blob */
>   	u32 m_max_mds;			/* expected up:active mds number */
>   	u32 m_num_active_mds;		/* actual up:active mds number */
>   	u32 possible_max_rank;		/* possible max rank index */
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-06-02  2:33 ` Xiubo Li
@ 2022-06-02  9:26   ` Luís Henriques
  2022-06-02  9:42     ` Xiubo Li
  0 siblings, 1 reply; 7+ messages in thread
From: Luís Henriques @ 2022-06-02  9:26 UTC (permalink / raw)
  To: Xiubo Li
  Cc: Jeff Layton, Ilya Dryomov, Gregory Farnum, ceph-devel, linux-kernel

Xiubo Li <xiubli@redhat.com> writes:

> On 6/2/22 12:29 AM, Luís Henriques wrote:
>> The MDS tries to enforce a limit on the total key/values in extended
>> attributes.  However, this limit is enforced only if doing a synchronous
>> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
>> doesn't have a chance to enforce these limits.
>>
>> This patch adds support for decoding the xattrs maximum size setting that is
>> distributed in the mdsmap.  Then, when setting an xattr, the kernel client
>> will revert to do a synchronous operation if that maximum size is exceeded.
>>
>> While there, fix a dout() that would trigger a printk warning:
>>
>> [   98.718078] ------------[ cut here ]------------
>> [   98.719012] precision 65536 too large
>> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
>> ...
>>
>> URL: https://tracker.ceph.com/issues/55725
>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>> ---
>>   fs/ceph/mdsmap.c            | 27 +++++++++++++++++++++++----
>>   fs/ceph/xattr.c             | 12 ++++++++----
>>   include/linux/ceph/mdsmap.h |  1 +
>>   3 files changed, 32 insertions(+), 8 deletions(-)
>>
>> * Changes since v2
>>
>> Well, a lot has changed since v2!  Now the xattr max value setting is
>> obtained through the mdsmap, which needs to be decoded, and the feature
>> that was used in the previous revision was dropped.  The drawback is that
>> the MDS isn't unable to know in advance if a client is aware of this xattr
>> max value.
>>
>> * Changes since v1
>>
>> Added support for new feature bit to get the MDS max_xattr_pairs_size
>> setting.
>>
>> Also note that this patch relies on a patch that hasn't been merged yet
>> ("ceph: use correct index when encoding client supported features"),
>> otherwise the new feature bit won't be correctly encoded.
>>
>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>> index 30387733765d..36b2bc18ca2a 100644
>> --- a/fs/ceph/mdsmap.c
>> +++ b/fs/ceph/mdsmap.c
>> @@ -13,6 +13,12 @@
>>     #include "super.h"
>>   +/*
>> + * Maximum size of xattrs the MDS can handle per inode by default.  This
>> + * includes the attribute name and 4+4 bytes for the key/value sizes.
>> + */
>> +#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
>> +
>>   #define CEPH_MDS_IS_READY(i, ignore_laggy) \
>>   	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
>>   @@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void
>> *end, bool msgr2)
>>   		__decode_and_drop_type(p, end, u8, bad_ext);
>>   	}
>>   	if (mdsmap_ev >= 8) {
>> -		u32 name_len;
>>   		/* enabled */
>>   		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
>> -		ceph_decode_32_safe(p, end, name_len, bad_ext);
>> -		ceph_decode_need(p, end, name_len, bad_ext);
>> -		*p += name_len;
>> +		/* fs_name */
>> +		ceph_decode_skip_string(p, end, bad_ext);
>>   	}
>>   	/* damaged */
>>   	if (mdsmap_ev >= 9) {
>> @@ -370,6 +374,21 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
>>   	} else {
>>   		m->m_damaged = false;
>>   	}
>> +	if (mdsmap_ev >= 17) {
>> +		/* balancer */
>> +		ceph_decode_skip_string(p, end, bad_ext);
>> +		/* standby_count_wanted */
>> +		ceph_decode_skip_32(p, end, bad_ext);
>> +		/* old_max_mds */
>> +		ceph_decode_skip_32(p, end, bad_ext);
>> +		/* min_compat_client */
>> +		ceph_decode_skip_8(p, end, bad_ext);
>
> This is incorrect.
>
> If mdsmap_ev == 15 the min_compat_client will be a feature_bitset_t instead of
> int8_t.

Hmm... can you point me at where that's done in the code?  As usual, I'm
confused with that code and simply can't see that.

Also, if that happens only when mdsmap_ev == 15, then there's no problem
because that branch is only taken if it's >= 17.

>
>
>> +		/* required_client_features */
>> +		ceph_decode_skip_set(p, end, 64, bad_ext);
>> +		ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
>> +	} else {
>> +		m->m_max_xattr_size = MDS_MAX_XATTR_SIZE;
>> +	}
>>   bad_ext:
>>   	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
>>   	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
>> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
>> index 8c2dc2c762a4..67f046dac35c 100644
>> --- a/fs/ceph/xattr.c
>> +++ b/fs/ceph/xattr.c
>> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>>   			flags |= CEPH_XATTR_REMOVE;
>>   	}
>>   -	dout("setxattr value=%.*s\n", (int)size, value);
>> +	dout("setxattr value size: %ld\n", size);
>>     	/* do request */
>>   	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>   	spin_lock(&ci->i_ceph_lock);
>>   retry:
>>   	issued = __ceph_caps_issued(ci, NULL);
>> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
>> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
>> +	    (required_blob_size >= mdsc->mdsmap->m_max_xattr_size)) {
>
> Shouldn't it be '>' instead ?

Ok, I'll fix that.

> We'd better always force to do a sync request with old ceph. Just check if the
> mdsmap_ev < 17. It's not safe to buffer it because it maybe discarded as your
> ceph PR does.

Right, that can be done.  So, I can simply set the m_max_xattr_size to '0'
if mdsmap_ev < 17.  Then, this 'if' condition will always be evaluated to
true because required_blob_size will be > 0.  Does that sound OK?

Cheers,
-- 
Luís


> -- Xiubo
>
>> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
>> +		     __func__, ci->i_xattrs.version, required_blob_size,
>> +		     mdsc->mdsmap->m_max_xattr_size);
>>   		goto do_sync;
>> +	}
>>     	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>>   		lock_snap_rwsem = true;
>> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>   	     ceph_cap_string(issued));
>>   	__build_xattrs(inode);
>>   -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>> -
>>   	if (!ci->i_xattrs.prealloc_blob ||
>>   	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>>   		struct ceph_buffer *blob;
>> diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
>> index 523fd0452856..4c3e0648dc27 100644
>> --- a/include/linux/ceph/mdsmap.h
>> +++ b/include/linux/ceph/mdsmap.h
>> @@ -25,6 +25,7 @@ struct ceph_mdsmap {
>>   	u32 m_session_timeout;          /* seconds */
>>   	u32 m_session_autoclose;        /* seconds */
>>   	u64 m_max_file_size;
>> +	u64 m_max_xattr_size;		/* maximum size for xattrs blob */
>>   	u32 m_max_mds;			/* expected up:active mds number */
>>   	u32 m_num_active_mds;		/* actual up:active mds number */
>>   	u32 possible_max_rank;		/* possible max rank index */
>>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-06-02  9:26   ` Luís Henriques
@ 2022-06-02  9:42     ` Xiubo Li
  2022-06-02 10:28       ` Luís Henriques
  0 siblings, 1 reply; 7+ messages in thread
From: Xiubo Li @ 2022-06-02  9:42 UTC (permalink / raw)
  To: Luís Henriques
  Cc: Jeff Layton, Ilya Dryomov, Gregory Farnum, ceph-devel, linux-kernel


On 6/2/22 5:26 PM, Luís Henriques wrote:
> Xiubo Li <xiubli@redhat.com> writes:
>
>> On 6/2/22 12:29 AM, Luís Henriques wrote:
>>> The MDS tries to enforce a limit on the total key/values in extended
>>> attributes.  However, this limit is enforced only if doing a synchronous
>>> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
>>> doesn't have a chance to enforce these limits.
>>>
>>> This patch adds support for decoding the xattrs maximum size setting that is
>>> distributed in the mdsmap.  Then, when setting an xattr, the kernel client
>>> will revert to do a synchronous operation if that maximum size is exceeded.
>>>
>>> While there, fix a dout() that would trigger a printk warning:
>>>
>>> [   98.718078] ------------[ cut here ]------------
>>> [   98.719012] precision 65536 too large
>>> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
>>> ...
>>>
>>> URL: https://tracker.ceph.com/issues/55725
>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>> ---
>>>    fs/ceph/mdsmap.c            | 27 +++++++++++++++++++++++----
>>>    fs/ceph/xattr.c             | 12 ++++++++----
>>>    include/linux/ceph/mdsmap.h |  1 +
>>>    3 files changed, 32 insertions(+), 8 deletions(-)
>>>
>>> * Changes since v2
>>>
>>> Well, a lot has changed since v2!  Now the xattr max value setting is
>>> obtained through the mdsmap, which needs to be decoded, and the feature
>>> that was used in the previous revision was dropped.  The drawback is that
>>> the MDS isn't unable to know in advance if a client is aware of this xattr
>>> max value.
>>>
>>> * Changes since v1
>>>
>>> Added support for new feature bit to get the MDS max_xattr_pairs_size
>>> setting.
>>>
>>> Also note that this patch relies on a patch that hasn't been merged yet
>>> ("ceph: use correct index when encoding client supported features"),
>>> otherwise the new feature bit won't be correctly encoded.
>>>
>>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>>> index 30387733765d..36b2bc18ca2a 100644
>>> --- a/fs/ceph/mdsmap.c
>>> +++ b/fs/ceph/mdsmap.c
>>> @@ -13,6 +13,12 @@
>>>      #include "super.h"
>>>    +/*
>>> + * Maximum size of xattrs the MDS can handle per inode by default.  This
>>> + * includes the attribute name and 4+4 bytes for the key/value sizes.
>>> + */
>>> +#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
>>> +
>>>    #define CEPH_MDS_IS_READY(i, ignore_laggy) \
>>>    	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
>>>    @@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void
>>> *end, bool msgr2)
>>>    		__decode_and_drop_type(p, end, u8, bad_ext);
>>>    	}
>>>    	if (mdsmap_ev >= 8) {
>>> -		u32 name_len;
>>>    		/* enabled */
>>>    		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
>>> -		ceph_decode_32_safe(p, end, name_len, bad_ext);
>>> -		ceph_decode_need(p, end, name_len, bad_ext);
>>> -		*p += name_len;
>>> +		/* fs_name */
>>> +		ceph_decode_skip_string(p, end, bad_ext);
>>>    	}
>>>    	/* damaged */
>>>    	if (mdsmap_ev >= 9) {
>>> @@ -370,6 +374,21 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
>>>    	} else {
>>>    		m->m_damaged = false;
>>>    	}
>>> +	if (mdsmap_ev >= 17) {
>>> +		/* balancer */
>>> +		ceph_decode_skip_string(p, end, bad_ext);
>>> +		/* standby_count_wanted */
>>> +		ceph_decode_skip_32(p, end, bad_ext);
>>> +		/* old_max_mds */
>>> +		ceph_decode_skip_32(p, end, bad_ext);
>>> +		/* min_compat_client */
>>> +		ceph_decode_skip_8(p, end, bad_ext);
>> This is incorrect.
>>
>> If mdsmap_ev == 15 the min_compat_client will be a feature_bitset_t instead of
>> int8_t.
> Hmm... can you point me at where that's done in the code?  As usual, I'm
> confused with that code and simply can't see that.
>
> Also, if that happens only when mdsmap_ev == 15, then there's no problem
> because that branch is only taken if it's >= 17.

Yeah, so you should skip 32 or 32+64 bits instead here, just likes:

3536                 /* version >= 3, feature bits */
3537                 ceph_decode_32_safe(&p, end, len, bad);
3538                 if (len) {
3539                         ceph_decode_64_safe(&p, end, features, bad);
3540                         p += len - sizeof(features);
3541                 }

For the ceph code please see:

Please see https://github.com/ceph/ceph/blob/main/src/mds/MDSMap.cc#L925.

>>
>>> +		/* required_client_features */
>>> +		ceph_decode_skip_set(p, end, 64, bad_ext);
>>> +		ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
>>> +	} else {
>>> +		m->m_max_xattr_size = MDS_MAX_XATTR_SIZE;
>>> +	}
>>>    bad_ext:
>>>    	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
>>>    	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
>>> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
>>> index 8c2dc2c762a4..67f046dac35c 100644
>>> --- a/fs/ceph/xattr.c
>>> +++ b/fs/ceph/xattr.c
>>> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>>>    			flags |= CEPH_XATTR_REMOVE;
>>>    	}
>>>    -	dout("setxattr value=%.*s\n", (int)size, value);
>>> +	dout("setxattr value size: %ld\n", size);
>>>      	/* do request */
>>>    	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>>> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>>    	spin_lock(&ci->i_ceph_lock);
>>>    retry:
>>>    	issued = __ceph_caps_issued(ci, NULL);
>>> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
>>> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>>> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
>>> +	    (required_blob_size >= mdsc->mdsmap->m_max_xattr_size)) {
>> Shouldn't it be '>' instead ?
> Ok, I'll fix that.
>
>> We'd better always force to do a sync request with old ceph. Just check if the
>> mdsmap_ev < 17. It's not safe to buffer it because it maybe discarded as your
>> ceph PR does.
> Right, that can be done.  So, I can simply set the m_max_xattr_size to '0'
> if mdsmap_ev < 17.  Then, this 'if' condition will always be evaluated to
> true because required_blob_size will be > 0.  Does that sound OK?

Yeah, sounds good.

-- Xiubo


>
> Cheers,


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-06-02  9:42     ` Xiubo Li
@ 2022-06-02 10:28       ` Luís Henriques
  2022-06-02 10:57         ` Xiubo Li
  0 siblings, 1 reply; 7+ messages in thread
From: Luís Henriques @ 2022-06-02 10:28 UTC (permalink / raw)
  To: Xiubo Li
  Cc: Jeff Layton, Ilya Dryomov, Gregory Farnum, ceph-devel, linux-kernel

Xiubo Li <xiubli@redhat.com> writes:

> On 6/2/22 5:26 PM, Luís Henriques wrote:
>> Xiubo Li <xiubli@redhat.com> writes:
>>
>>> On 6/2/22 12:29 AM, Luís Henriques wrote:
>>>> The MDS tries to enforce a limit on the total key/values in extended
>>>> attributes.  However, this limit is enforced only if doing a synchronous
>>>> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
>>>> doesn't have a chance to enforce these limits.
>>>>
>>>> This patch adds support for decoding the xattrs maximum size setting that is
>>>> distributed in the mdsmap.  Then, when setting an xattr, the kernel client
>>>> will revert to do a synchronous operation if that maximum size is exceeded.
>>>>
>>>> While there, fix a dout() that would trigger a printk warning:
>>>>
>>>> [   98.718078] ------------[ cut here ]------------
>>>> [   98.719012] precision 65536 too large
>>>> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
>>>> ...
>>>>
>>>> URL: https://tracker.ceph.com/issues/55725
>>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>>> ---
>>>>    fs/ceph/mdsmap.c            | 27 +++++++++++++++++++++++----
>>>>    fs/ceph/xattr.c             | 12 ++++++++----
>>>>    include/linux/ceph/mdsmap.h |  1 +
>>>>    3 files changed, 32 insertions(+), 8 deletions(-)
>>>>
>>>> * Changes since v2
>>>>
>>>> Well, a lot has changed since v2!  Now the xattr max value setting is
>>>> obtained through the mdsmap, which needs to be decoded, and the feature
>>>> that was used in the previous revision was dropped.  The drawback is that
>>>> the MDS isn't unable to know in advance if a client is aware of this xattr
>>>> max value.
>>>>
>>>> * Changes since v1
>>>>
>>>> Added support for new feature bit to get the MDS max_xattr_pairs_size
>>>> setting.
>>>>
>>>> Also note that this patch relies on a patch that hasn't been merged yet
>>>> ("ceph: use correct index when encoding client supported features"),
>>>> otherwise the new feature bit won't be correctly encoded.
>>>>
>>>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>>>> index 30387733765d..36b2bc18ca2a 100644
>>>> --- a/fs/ceph/mdsmap.c
>>>> +++ b/fs/ceph/mdsmap.c
>>>> @@ -13,6 +13,12 @@
>>>>      #include "super.h"
>>>>    +/*
>>>> + * Maximum size of xattrs the MDS can handle per inode by default.  This
>>>> + * includes the attribute name and 4+4 bytes for the key/value sizes.
>>>> + */
>>>> +#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
>>>> +
>>>>    #define CEPH_MDS_IS_READY(i, ignore_laggy) \
>>>>    	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
>>>>    @@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void
>>>> *end, bool msgr2)
>>>>    		__decode_and_drop_type(p, end, u8, bad_ext);
>>>>    	}
>>>>    	if (mdsmap_ev >= 8) {
>>>> -		u32 name_len;
>>>>    		/* enabled */
>>>>    		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
>>>> -		ceph_decode_32_safe(p, end, name_len, bad_ext);
>>>> -		ceph_decode_need(p, end, name_len, bad_ext);
>>>> -		*p += name_len;
>>>> +		/* fs_name */
>>>> +		ceph_decode_skip_string(p, end, bad_ext);
>>>>    	}
>>>>    	/* damaged */
>>>>    	if (mdsmap_ev >= 9) {
>>>> @@ -370,6 +374,21 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
>>>>    	} else {
>>>>    		m->m_damaged = false;
>>>>    	}
>>>> +	if (mdsmap_ev >= 17) {
>>>> +		/* balancer */
>>>> +		ceph_decode_skip_string(p, end, bad_ext);
>>>> +		/* standby_count_wanted */
>>>> +		ceph_decode_skip_32(p, end, bad_ext);
>>>> +		/* old_max_mds */
>>>> +		ceph_decode_skip_32(p, end, bad_ext);
>>>> +		/* min_compat_client */
>>>> +		ceph_decode_skip_8(p, end, bad_ext);
>>> This is incorrect.
>>>
>>> If mdsmap_ev == 15 the min_compat_client will be a feature_bitset_t instead of
>>> int8_t.
>> Hmm... can you point me at where that's done in the code?  As usual, I'm
>> confused with that code and simply can't see that.
>>
>> Also, if that happens only when mdsmap_ev == 15, then there's no problem
>> because that branch is only taken if it's >= 17.
>
> Yeah, so you should skip 32 or 32+64 bits instead here, just likes:
>
> 3536                 /* version >= 3, feature bits */
> 3537                 ceph_decode_32_safe(&p, end, len, bad);
> 3538                 if (len) {
> 3539                         ceph_decode_64_safe(&p, end, features, bad);
> 3540                         p += len - sizeof(features);
> 3541                 }
>
> For the ceph code please see:
>
> Please see https://github.com/ceph/ceph/blob/main/src/mds/MDSMap.cc#L925.

I still don't see what your saying.  From what I understand, with <= 15 we
used to have 'min_compat_client', which is of type 'ceph_release_t',
defined in src/common/ceph_releases.h:

enum class ceph_release_t : std::uint8_t {
...
}

Then, starting with >= 16 the MDS ignores this 'min_compat_client' field
(but still encodes/decodes it), and it *adds* 'required_client_features',
which is a 'feature_bitset_t' and that is decoded immediately after (see
bellow, the ceph_decode_skip_set() call).

Cheers,
-- 
Luís

>>>
>>>> +		/* required_client_features */
>>>> +		ceph_decode_skip_set(p, end, 64, bad_ext);
>>>> +		ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
>>>> +	} else {
>>>> +		m->m_max_xattr_size = MDS_MAX_XATTR_SIZE;
>>>> +	}
>>>>    bad_ext:
>>>>    	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
>>>>    	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
>>>> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
>>>> index 8c2dc2c762a4..67f046dac35c 100644
>>>> --- a/fs/ceph/xattr.c
>>>> +++ b/fs/ceph/xattr.c
>>>> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>>>>    			flags |= CEPH_XATTR_REMOVE;
>>>>    	}
>>>>    -	dout("setxattr value=%.*s\n", (int)size, value);
>>>> +	dout("setxattr value size: %ld\n", size);
>>>>      	/* do request */
>>>>    	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>>>> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>>>>    	spin_lock(&ci->i_ceph_lock);
>>>>    retry:
>>>>    	issued = __ceph_caps_issued(ci, NULL);
>>>> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
>>>> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
>>>> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
>>>> +	    (required_blob_size >= mdsc->mdsmap->m_max_xattr_size)) {
>>> Shouldn't it be '>' instead ?
>> Ok, I'll fix that.
>>
>>> We'd better always force to do a sync request with old ceph. Just check if the
>>> mdsmap_ev < 17. It's not safe to buffer it because it maybe discarded as your
>>> ceph PR does.
>> Right, that can be done.  So, I can simply set the m_max_xattr_size to '0'
>> if mdsmap_ev < 17.  Then, this 'if' condition will always be evaluated to
>> true because required_blob_size will be > 0.  Does that sound OK?
>
> Yeah, sounds good.
>
> -- Xiubo
>
>
>>
>> Cheers,
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size
  2022-06-02 10:28       ` Luís Henriques
@ 2022-06-02 10:57         ` Xiubo Li
  0 siblings, 0 replies; 7+ messages in thread
From: Xiubo Li @ 2022-06-02 10:57 UTC (permalink / raw)
  To: Luís Henriques
  Cc: Jeff Layton, Ilya Dryomov, Gregory Farnum, ceph-devel, linux-kernel


On 6/2/22 6:28 PM, Luís Henriques wrote:
> Xiubo Li <xiubli@redhat.com> writes:
>
>> On 6/2/22 5:26 PM, Luís Henriques wrote:
>>> Xiubo Li <xiubli@redhat.com> writes:
>>>
>>>> On 6/2/22 12:29 AM, Luís Henriques wrote:
>>>>> The MDS tries to enforce a limit on the total key/values in extended
>>>>> attributes.  However, this limit is enforced only if doing a synchronous
>>>>> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
>>>>> doesn't have a chance to enforce these limits.
>>>>>
>>>>> This patch adds support for decoding the xattrs maximum size setting that is
>>>>> distributed in the mdsmap.  Then, when setting an xattr, the kernel client
>>>>> will revert to do a synchronous operation if that maximum size is exceeded.
>>>>>
>>>>> While there, fix a dout() that would trigger a printk warning:
>>>>>
>>>>> [   98.718078] ------------[ cut here ]------------
>>>>> [   98.719012] precision 65536 too large
>>>>> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
>>>>> ...
>>>>>
>>>>> URL: https://tracker.ceph.com/issues/55725
>>>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>>>> ---
>>>>>     fs/ceph/mdsmap.c            | 27 +++++++++++++++++++++++----
>>>>>     fs/ceph/xattr.c             | 12 ++++++++----
>>>>>     include/linux/ceph/mdsmap.h |  1 +
>>>>>     3 files changed, 32 insertions(+), 8 deletions(-)
>>>>>
>>>>> * Changes since v2
>>>>>
>>>>> Well, a lot has changed since v2!  Now the xattr max value setting is
>>>>> obtained through the mdsmap, which needs to be decoded, and the feature
>>>>> that was used in the previous revision was dropped.  The drawback is that
>>>>> the MDS isn't unable to know in advance if a client is aware of this xattr
>>>>> max value.
>>>>>
>>>>> * Changes since v1
>>>>>
>>>>> Added support for new feature bit to get the MDS max_xattr_pairs_size
>>>>> setting.
>>>>>
>>>>> Also note that this patch relies on a patch that hasn't been merged yet
>>>>> ("ceph: use correct index when encoding client supported features"),
>>>>> otherwise the new feature bit won't be correctly encoded.
>>>>>
>>>>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>>>>> index 30387733765d..36b2bc18ca2a 100644
>>>>> --- a/fs/ceph/mdsmap.c
>>>>> +++ b/fs/ceph/mdsmap.c
>>>>> @@ -13,6 +13,12 @@
>>>>>       #include "super.h"
>>>>>     +/*
>>>>> + * Maximum size of xattrs the MDS can handle per inode by default.  This
>>>>> + * includes the attribute name and 4+4 bytes for the key/value sizes.
>>>>> + */
>>>>> +#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */
>>>>> +
>>>>>     #define CEPH_MDS_IS_READY(i, ignore_laggy) \
>>>>>     	(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
>>>>>     @@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void
>>>>> *end, bool msgr2)
>>>>>     		__decode_and_drop_type(p, end, u8, bad_ext);
>>>>>     	}
>>>>>     	if (mdsmap_ev >= 8) {
>>>>> -		u32 name_len;
>>>>>     		/* enabled */
>>>>>     		ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
>>>>> -		ceph_decode_32_safe(p, end, name_len, bad_ext);
>>>>> -		ceph_decode_need(p, end, name_len, bad_ext);
>>>>> -		*p += name_len;
>>>>> +		/* fs_name */
>>>>> +		ceph_decode_skip_string(p, end, bad_ext);
>>>>>     	}
>>>>>     	/* damaged */
>>>>>     	if (mdsmap_ev >= 9) {
>>>>> @@ -370,6 +374,21 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
>>>>>     	} else {
>>>>>     		m->m_damaged = false;
>>>>>     	}
>>>>> +	if (mdsmap_ev >= 17) {
>>>>> +		/* balancer */
>>>>> +		ceph_decode_skip_string(p, end, bad_ext);
>>>>> +		/* standby_count_wanted */
>>>>> +		ceph_decode_skip_32(p, end, bad_ext);
>>>>> +		/* old_max_mds */
>>>>> +		ceph_decode_skip_32(p, end, bad_ext);
>>>>> +		/* min_compat_client */
>>>>> +		ceph_decode_skip_8(p, end, bad_ext);
>>>> This is incorrect.
>>>>
>>>> If mdsmap_ev == 15 the min_compat_client will be a feature_bitset_t instead of
>>>> int8_t.
>>> Hmm... can you point me at where that's done in the code?  As usual, I'm
>>> confused with that code and simply can't see that.
>>>
>>> Also, if that happens only when mdsmap_ev == 15, then there's no problem
>>> because that branch is only taken if it's >= 17.
>> Yeah, so you should skip 32 or 32+64 bits instead here, just likes:
>>
>> 3536                 /* version >= 3, feature bits */
>> 3537                 ceph_decode_32_safe(&p, end, len, bad);
>> 3538                 if (len) {
>> 3539                         ceph_decode_64_safe(&p, end, features, bad);
>> 3540                         p += len - sizeof(features);
>> 3541                 }
>>
>> For the ceph code please see:
>>
>> Please see https://github.com/ceph/ceph/blob/main/src/mds/MDSMap.cc#L925.
> I still don't see what your saying.  From what I understand, with <= 15 we
> used to have 'min_compat_client', which is of type 'ceph_release_t',
> defined in src/common/ceph_releases.h:
>
> enum class ceph_release_t : std::uint8_t {
> ...
> }

Okay, you are right.

I miss reading that code.

-- Xiubo


> Then, starting with >= 16 the MDS ignores this 'min_compat_client' field
> (but still encodes/decodes it), and it *adds* 'required_client_features',
> which is a 'feature_bitset_t' and that is decoded immediately after (see
> bellow, the ceph_decode_skip_set() call).
>
> Cheers,


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-06-02 10:57 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-01 16:29 [RFC PATCH v3] ceph: prevent a client from exceeding the MDS maximum xattr size Luís Henriques
2022-06-01 20:27 ` kernel test robot
2022-06-02  2:33 ` Xiubo Li
2022-06-02  9:26   ` Luís Henriques
2022-06-02  9:42     ` Xiubo Li
2022-06-02 10:28       ` Luís Henriques
2022-06-02 10:57         ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.