All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
@ 2022-03-09 13:59 xiubli
  2022-03-09 14:50 ` Jeff Layton
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: xiubli @ 2022-03-09 13:59 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, lhenriques, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

For the readdir request the dentries will be pasred and dencrypted
in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
get the dentry name from the dentry cache instead of parsing and
dencrypting them again. This could improve performance.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V7:
- Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.

V6:
- Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
  instead, since we are limiting the max length of snapshot name to
  240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).

V5:
- fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
- release the rde->dentry in destroy_reply_info



 fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
 fs/ceph/inode.c      |  7 ++++++
 fs/ceph/mds_client.c |  1 +
 fs/ceph/mds_client.h |  1 +
 4 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6df2a91af236..2397c34e9173 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	int err;
 	unsigned frag = -1;
 	struct ceph_mds_reply_info_parsed *rinfo;
-	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
-	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+	char *dentry_name = NULL;
 
 	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
 	if (dfi->file_info.flags & CEPH_F_ATEND)
@@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		spin_unlock(&ci->i_ceph_lock);
 	}
 
-	err = ceph_fname_alloc_buffer(inode, &tname);
-	if (err < 0)
-		goto out;
-
-	err = ceph_fname_alloc_buffer(inode, &oname);
-	if (err < 0)
-		goto out;
-
 	/* proceed with a normal readdir */
 more:
 	/* do we have the correct frag content buffered? */
@@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 			}
 		}
 	}
+
+	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
+	if (!dentry_name) {
+		err = -ENOMEM;
+		ceph_mdsc_put_request(dfi->last_readdir);
+		dfi->last_readdir = NULL;
+		goto out;
+	}
+
 	for (; i < rinfo->dir_nr; i++) {
 		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
-		struct ceph_fname fname = { .dir	= inode,
-					    .name	= rde->name,
-					    .name_len	= rde->name_len,
-					    .ctext	= rde->altname,
-					    .ctext_len	= rde->altname_len };
-		u32 olen = oname.len;
-
-		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
-		if (err) {
-			pr_err("%s unable to decode %.*s, got %d\n", __func__,
-			       rde->name_len, rde->name, err);
-			goto out;
-		}
+		struct dentry *dn = rde->dentry;
+		int name_len;
 
 		BUG_ON(rde->offset < ctx->pos);
 		BUG_ON(!rde->inode.in);
+		BUG_ON(!rde->dentry);
 
 		ctx->pos = rde->offset;
-		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
-		     i, rinfo->dir_nr, ctx->pos,
-		     rde->name_len, rde->name, &rde->inode.in);
 
-		if (!dir_emit(ctx, oname.name, oname.len,
+		spin_lock(&dn->d_lock);
+		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
+		name_len = dn->d_name.len;
+		spin_unlock(&dn->d_lock);
+
+		dentry_name[name_len] = '\0';
+		dout("readdir (%d/%d) -> %llx '%s' %p\n",
+		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
+
+		if (!dir_emit(ctx, dentry_name, name_len,
 			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
 			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
 			/*
@@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 			goto out;
 		}
 
-		/* Reset the lengths to their original allocated vals */
-		oname.len = olen;
 		ctx->pos++;
 	}
 
@@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	err = 0;
 	dout("readdir %p file %p done.\n", inode, file);
 out:
-	ceph_fname_free_buffer(inode, &tname);
-	ceph_fname_free_buffer(inode, &oname);
+	if (dentry_name)
+		kfree(dentry_name);
 	return err;
 }
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b573a0f33450..19e5275eae1c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 			goto out;
 		}
 
+		rde->dentry = NULL;
 		dname.name = oname.name;
 		dname.len = oname.len;
 		dname.hash = full_name_hash(parent, dname.name, dname.len);
@@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 			goto retry_lookup;
 		}
 
+		/*
+		 * ceph_readdir will use the dentry to get the name
+		 * to avoid doing the dencrypt again there.
+		 */
+		rde->dentry = dget(dn);
+
 		/* inode */
 		if (d_really_is_positive(dn)) {
 			in = d_inode(dn);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8d704ddd7291..9e0a51ef1dfa 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
 
 		kfree(rde->inode.fscrypt_auth);
 		kfree(rde->inode.fscrypt_file);
+		dput(rde->dentry);
 	}
 	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
 }
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 0dfe24f94567..663d7754d57d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
 };
 
 struct ceph_mds_reply_dir_entry {
+	struct dentry		      *dentry;
 	char                          *name;
 	u8			      *altname;
 	u32                           name_len;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-09 13:59 [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir xiubli
@ 2022-03-09 14:50 ` Jeff Layton
  2022-03-10  0:20   ` Xiubo Li
  2022-03-10  6:08 ` Xiubo Li
  2022-03-10  8:22 ` Xiubo Li
  2 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2022-03-09 14:50 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Wed, 2022-03-09 at 21:59 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> For the readdir request the dentries will be pasred and dencrypted
> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> get the dentry name from the dentry cache instead of parsing and
> dencrypting them again. This could improve performance.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> 
> V7:
> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> 
> V6:
> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>   instead, since we are limiting the max length of snapshot name to
>   240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> 
> V5:
> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> - release the rde->dentry in destroy_reply_info
> 
> 
> 
>  fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>  fs/ceph/inode.c      |  7 ++++++
>  fs/ceph/mds_client.c |  1 +
>  fs/ceph/mds_client.h |  1 +
>  4 files changed, 34 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 6df2a91af236..2397c34e9173 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>  	int err;
>  	unsigned frag = -1;
>  	struct ceph_mds_reply_info_parsed *rinfo;
> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> +	char *dentry_name = NULL;
>  
>  	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>  	if (dfi->file_info.flags & CEPH_F_ATEND)
> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>  		spin_unlock(&ci->i_ceph_lock);
>  	}
>  
> -	err = ceph_fname_alloc_buffer(inode, &tname);
> -	if (err < 0)
> -		goto out;
> -
> -	err = ceph_fname_alloc_buffer(inode, &oname);
> -	if (err < 0)
> -		goto out;
> -
>  	/* proceed with a normal readdir */
>  more:
>  	/* do we have the correct frag content buffered? */
> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>  			}
>  		}
>  	}
> +
> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> +	if (!dentry_name) {
> +		err = -ENOMEM;
> +		ceph_mdsc_put_request(dfi->last_readdir);
> +		dfi->last_readdir = NULL;
> +		goto out;
> +	}
> +
>  	for (; i < rinfo->dir_nr; i++) {
>  		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> -		struct ceph_fname fname = { .dir	= inode,
> -					    .name	= rde->name,
> -					    .name_len	= rde->name_len,
> -					    .ctext	= rde->altname,
> -					    .ctext_len	= rde->altname_len };
> -		u32 olen = oname.len;
> -
> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> -		if (err) {
> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> -			       rde->name_len, rde->name, err);
> -			goto out;
> -		}
> +		struct dentry *dn = rde->dentry;
> +		int name_len;
>  
>  		BUG_ON(rde->offset < ctx->pos);
>  		BUG_ON(!rde->inode.in);
> +		BUG_ON(!rde->dentry);
>  
>  		ctx->pos = rde->offset;
> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> -		     i, rinfo->dir_nr, ctx->pos,
> -		     rde->name_len, rde->name, &rde->inode.in);
>  
> -		if (!dir_emit(ctx, oname.name, oname.len,
> +		spin_lock(&dn->d_lock);
> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> +		name_len = dn->d_name.len;
> +		spin_unlock(&dn->d_lock);
> +
> +		dentry_name[name_len] = '\0';
> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> +
> +		if (!dir_emit(ctx, dentry_name, name_len,
>  			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>  			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>  			/*
> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>  			goto out;
>  		}
>  
> -		/* Reset the lengths to their original allocated vals */
> -		oname.len = olen;
>  		ctx->pos++;
>  	}
>  
> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>  	err = 0;
>  	dout("readdir %p file %p done.\n", inode, file);
>  out:
> -	ceph_fname_free_buffer(inode, &tname);
> -	ceph_fname_free_buffer(inode, &oname);
> +	if (dentry_name)
> +		kfree(dentry_name);
>  	return err;
>  }
>  
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index b573a0f33450..19e5275eae1c 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>  			goto out;
>  		}
>  
> +		rde->dentry = NULL;
>  		dname.name = oname.name;
>  		dname.len = oname.len;
>  		dname.hash = full_name_hash(parent, dname.name, dname.len);
> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>  			goto retry_lookup;
>  		}
>  
> +		/*
> +		 * ceph_readdir will use the dentry to get the name
> +		 * to avoid doing the dencrypt again there.
> +		 */
> +		rde->dentry = dget(dn);
> +
>  		/* inode */
>  		if (d_really_is_positive(dn)) {
>  			in = d_inode(dn);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 8d704ddd7291..9e0a51ef1dfa 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>  
>  		kfree(rde->inode.fscrypt_auth);
>  		kfree(rde->inode.fscrypt_file);
> +		dput(rde->dentry);
>  	}
>  	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>  }
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 0dfe24f94567..663d7754d57d 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>  };
>  
>  struct ceph_mds_reply_dir_entry {
> +	struct dentry		      *dentry;
>  	char                          *name;
>  	u8			      *altname;
>  	u32                           name_len;


Still buggy. This time generic/013 triggered this:

[ 1970.839019] run fstests generic/013 at 2022-03-09 09:48:42
[ 2001.133838] ==================================================================
[ 2001.138595] BUG: KASAN: slab-out-of-bounds in ceph_readdir+0x3f4/0x1dd0 [ceph]
[ 2001.141997] Write of size 1 at addr ffff888120070aff by task fsstress/8682
[ 2001.144897] 
[ 2001.145670] CPU: 7 PID: 8682 Comm: fsstress Tainted: G            E     5.17.0-rc6+ #172
[ 2001.149132] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014
[ 2001.152477] Call Trace:
[ 2001.153609]  <TASK>
[ 2001.154482]  dump_stack_lvl+0x59/0x73
[ 2001.155697]  print_address_description.constprop.0+0x1f/0x150
[ 2001.158021]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
[ 2001.159654]  kasan_report.cold+0x7f/0x11b
[ 2001.160951]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
[ 2001.168084]  ceph_readdir+0x3f4/0x1dd0 [ceph]
[ 2001.173258]  ? ceph_d_revalidate+0x7e0/0x7e0 [ceph]
[ 2001.178293]  ? down_write_killable+0xc7/0x130
[ 2001.182782]  ? __down_interruptible+0x1d0/0x1d0
[ 2001.187246]  iterate_dir+0x107/0x2e0
[ 2001.192677]  __x64_sys_getdents64+0xe2/0x1b0
[ 2001.197570]  ? filldir+0x270/0x270
[ 2001.202806]  ? __ia32_sys_getdents+0x1a0/0x1a0
[ 2001.207415]  ? lockdep_hardirqs_on_prepare+0x129/0x220
[ 2001.211782]  ? syscall_enter_from_user_mode+0x21/0x70
[ 2001.216466]  do_syscall_64+0x3b/0x90
[ 2001.220674]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 2001.225674] RIP: 0033:0x7f159a774fd7
[ 2001.230170] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 89 02 48
[ 2001.240821] RSP: 002b:00007ffff90c35d8 EFLAGS: 00000293 ORIG_RAX: 00000000000000d9
[ 2001.247670] RAX: ffffffffffffffda RBX: 0000000001186130 RCX: 00007f159a774fd7
[ 2001.255057] RDX: 0000000000010000 RSI: 0000000001186130 RDI: 0000000000000003
[ 2001.262428] RBP: 0000000001186104 R08: 0000000000000003 R09: 0000000000000000
[ 2001.269057] R10: 0000000000000000 R11: 0000000000000293 R12: ffffffffffffff80
[ 2001.275079] R13: 0000000000000019 R14: 0000000001186100 R15: 00007f159a6996c0
[ 2001.282429]  </TASK>
[ 2001.287900] 
[ 2001.291963] Allocated by task 8682:
[ 2001.296303]  kasan_save_stack+0x1e/0x40
[ 2001.300491]  __kasan_kmalloc+0xa9/0xd0
[ 2001.304792]  ceph_readdir+0x2ab/0x1dd0 [ceph]
[ 2001.309093]  iterate_dir+0x107/0x2e0
[ 2001.313291]  __x64_sys_getdents64+0xe2/0x1b0
[ 2001.317705]  do_syscall_64+0x3b/0x90
[ 2001.322210]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 2001.327112] 
[ 2001.331092] The buggy address belongs to the object at ffff888120070a00
[ 2001.331092]  which belongs to the cache kmalloc-256 of size 256
[ 2001.341495] The buggy address is located 255 bytes inside of
[ 2001.341495]  256-byte region [ffff888120070a00, ffff888120070b00)
[ 2001.352252] The buggy address belongs to the page:
[ 2001.357836] page:0000000089466360 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x120070
[ 2001.364225] head:0000000089466360 order:2 compound_mapcount:0 compound_pincount:0
[ 2001.370091] flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff)
[ 2001.375349] raw: 0017ffffc0010200 ffffea00048d7e00 dead000000000002 ffff888100042b40
[ 2001.380833] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[ 2001.386079] page dumped because: kasan: bad access detected
[ 2001.390919] 
[ 2001.395163] Memory state around the buggy address:
[ 2001.399901]  ffff888120070980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 2001.405479]  ffff888120070a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 2001.410888] >ffff888120070a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 07
[ 2001.416090]                                                                 ^
[ 2001.422577]  ffff888120070b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 2001.430562]  ffff888120070b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 2001.438474] ==================================================================
[ 2001.446565] Disabling lock debugging due to kernel taint

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-09 14:50 ` Jeff Layton
@ 2022-03-10  0:20   ` Xiubo Li
  2022-03-10  0:27     ` Jeff Layton
  0 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-03-10  0:20 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/9/22 10:50 PM, Jeff Layton wrote:
> On Wed, 2022-03-09 at 21:59 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> For the readdir request the dentries will be pasred and dencrypted
>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>> get the dentry name from the dentry cache instead of parsing and
>> dencrypting them again. This could improve performance.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V7:
>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>
>> V6:
>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>    instead, since we are limiting the max length of snapshot name to
>>    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>
>> V5:
>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>> - release the rde->dentry in destroy_reply_info
>>
>>
>>
>>   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>   fs/ceph/inode.c      |  7 ++++++
>>   fs/ceph/mds_client.c |  1 +
>>   fs/ceph/mds_client.h |  1 +
>>   4 files changed, 34 insertions(+), 31 deletions(-)
>>
>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>> index 6df2a91af236..2397c34e9173 100644
>> --- a/fs/ceph/dir.c
>> +++ b/fs/ceph/dir.c
>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>   	int err;
>>   	unsigned frag = -1;
>>   	struct ceph_mds_reply_info_parsed *rinfo;
>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>> +	char *dentry_name = NULL;
>>   
>>   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>   	if (dfi->file_info.flags & CEPH_F_ATEND)
>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>   		spin_unlock(&ci->i_ceph_lock);
>>   	}
>>   
>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>> -	if (err < 0)
>> -		goto out;
>> -
>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>> -	if (err < 0)
>> -		goto out;
>> -
>>   	/* proceed with a normal readdir */
>>   more:
>>   	/* do we have the correct frag content buffered? */
>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>   			}
>>   		}
>>   	}
>> +
>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>> +	if (!dentry_name) {
>> +		err = -ENOMEM;
>> +		ceph_mdsc_put_request(dfi->last_readdir);
>> +		dfi->last_readdir = NULL;
>> +		goto out;
>> +	}
>> +
>>   	for (; i < rinfo->dir_nr; i++) {
>>   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>> -		struct ceph_fname fname = { .dir	= inode,
>> -					    .name	= rde->name,
>> -					    .name_len	= rde->name_len,
>> -					    .ctext	= rde->altname,
>> -					    .ctext_len	= rde->altname_len };
>> -		u32 olen = oname.len;
>> -
>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>> -		if (err) {
>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>> -			       rde->name_len, rde->name, err);
>> -			goto out;
>> -		}
>> +		struct dentry *dn = rde->dentry;
>> +		int name_len;
>>   
>>   		BUG_ON(rde->offset < ctx->pos);
>>   		BUG_ON(!rde->inode.in);
>> +		BUG_ON(!rde->dentry);
>>   
>>   		ctx->pos = rde->offset;
>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>> -		     i, rinfo->dir_nr, ctx->pos,
>> -		     rde->name_len, rde->name, &rde->inode.in);
>>   
>> -		if (!dir_emit(ctx, oname.name, oname.len,
>> +		spin_lock(&dn->d_lock);
>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>> +		name_len = dn->d_name.len;
>> +		spin_unlock(&dn->d_lock);
>> +
>> +		dentry_name[name_len] = '\0';
>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>> +
>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>   			/*
>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>   			goto out;
>>   		}
>>   
>> -		/* Reset the lengths to their original allocated vals */
>> -		oname.len = olen;
>>   		ctx->pos++;
>>   	}
>>   
>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>   	err = 0;
>>   	dout("readdir %p file %p done.\n", inode, file);
>>   out:
>> -	ceph_fname_free_buffer(inode, &tname);
>> -	ceph_fname_free_buffer(inode, &oname);
>> +	if (dentry_name)
>> +		kfree(dentry_name);
>>   	return err;
>>   }
>>   
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index b573a0f33450..19e5275eae1c 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>   			goto out;
>>   		}
>>   
>> +		rde->dentry = NULL;
>>   		dname.name = oname.name;
>>   		dname.len = oname.len;
>>   		dname.hash = full_name_hash(parent, dname.name, dname.len);
>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>   			goto retry_lookup;
>>   		}
>>   
>> +		/*
>> +		 * ceph_readdir will use the dentry to get the name
>> +		 * to avoid doing the dencrypt again there.
>> +		 */
>> +		rde->dentry = dget(dn);
>> +
>>   		/* inode */
>>   		if (d_really_is_positive(dn)) {
>>   			in = d_inode(dn);
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 8d704ddd7291..9e0a51ef1dfa 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>   
>>   		kfree(rde->inode.fscrypt_auth);
>>   		kfree(rde->inode.fscrypt_file);
>> +		dput(rde->dentry);
>>   	}
>>   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>   }
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index 0dfe24f94567..663d7754d57d 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>   };
>>   
>>   struct ceph_mds_reply_dir_entry {
>> +	struct dentry		      *dentry;
>>   	char                          *name;
>>   	u8			      *altname;
>>   	u32                           name_len;
>
> Still buggy. This time generic/013 triggered this:
>
> [ 1970.839019] run fstests generic/013 at 2022-03-09 09:48:42
> [ 2001.133838] ==================================================================
> [ 2001.138595] BUG: KASAN: slab-out-of-bounds in ceph_readdir+0x3f4/0x1dd0 [ceph]
> [ 2001.141997] Write of size 1 at addr ffff888120070aff by task fsstress/8682
> [ 2001.144897]
> [ 2001.145670] CPU: 7 PID: 8682 Comm: fsstress Tainted: G            E     5.17.0-rc6+ #172
> [ 2001.149132] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014
> [ 2001.152477] Call Trace:
> [ 2001.153609]  <TASK>
> [ 2001.154482]  dump_stack_lvl+0x59/0x73
> [ 2001.155697]  print_address_description.constprop.0+0x1f/0x150
> [ 2001.158021]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
> [ 2001.159654]  kasan_report.cold+0x7f/0x11b
> [ 2001.160951]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
> [ 2001.168084]  ceph_readdir+0x3f4/0x1dd0 [ceph]
> [ 2001.173258]  ? ceph_d_revalidate+0x7e0/0x7e0 [ceph]
> [ 2001.178293]  ? down_write_killable+0xc7/0x130
> [ 2001.182782]  ? __down_interruptible+0x1d0/0x1d0
> [ 2001.187246]  iterate_dir+0x107/0x2e0
> [ 2001.192677]  __x64_sys_getdents64+0xe2/0x1b0
> [ 2001.197570]  ? filldir+0x270/0x270
> [ 2001.202806]  ? __ia32_sys_getdents+0x1a0/0x1a0
> [ 2001.207415]  ? lockdep_hardirqs_on_prepare+0x129/0x220
> [ 2001.211782]  ? syscall_enter_from_user_mode+0x21/0x70
> [ 2001.216466]  do_syscall_64+0x3b/0x90
> [ 2001.220674]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [ 2001.225674] RIP: 0033:0x7f159a774fd7
> [ 2001.230170] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 89 02 48
> [ 2001.240821] RSP: 002b:00007ffff90c35d8 EFLAGS: 00000293 ORIG_RAX: 00000000000000d9
> [ 2001.247670] RAX: ffffffffffffffda RBX: 0000000001186130 RCX: 00007f159a774fd7
> [ 2001.255057] RDX: 0000000000010000 RSI: 0000000001186130 RDI: 0000000000000003
> [ 2001.262428] RBP: 0000000001186104 R08: 0000000000000003 R09: 0000000000000000
> [ 2001.269057] R10: 0000000000000000 R11: 0000000000000293 R12: ffffffffffffff80
> [ 2001.275079] R13: 0000000000000019 R14: 0000000001186100 R15: 00007f159a6996c0
> [ 2001.282429]  </TASK>
> [ 2001.287900]
> [ 2001.291963] Allocated by task 8682:
> [ 2001.296303]  kasan_save_stack+0x1e/0x40
> [ 2001.300491]  __kasan_kmalloc+0xa9/0xd0
> [ 2001.304792]  ceph_readdir+0x2ab/0x1dd0 [ceph]
> [ 2001.309093]  iterate_dir+0x107/0x2e0
> [ 2001.313291]  __x64_sys_getdents64+0xe2/0x1b0
> [ 2001.317705]  do_syscall_64+0x3b/0x90
> [ 2001.322210]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [ 2001.327112]
> [ 2001.331092] The buggy address belongs to the object at ffff888120070a00
> [ 2001.331092]  which belongs to the cache kmalloc-256 of size 256
> [ 2001.341495] The buggy address is located 255 bytes inside of
> [ 2001.341495]  256-byte region [ffff888120070a00, ffff888120070b00)
> [ 2001.352252] The buggy address belongs to the page:
> [ 2001.357836] page:0000000089466360 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x120070
> [ 2001.364225] head:0000000089466360 order:2 compound_mapcount:0 compound_pincount:0
> [ 2001.370091] flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff)
> [ 2001.375349] raw: 0017ffffc0010200 ffffea00048d7e00 dead000000000002 ffff888100042b40
> [ 2001.380833] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
> [ 2001.386079] page dumped because: kasan: bad access detected
> [ 2001.390919]
> [ 2001.395163] Memory state around the buggy address:
> [ 2001.399901]  ffff888120070980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [ 2001.405479]  ffff888120070a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> [ 2001.410888] >ffff888120070a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 07
> [ 2001.416090]                                                                 ^
> [ 2001.422577]  ffff888120070b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [ 2001.430562]  ffff888120070b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [ 2001.438474] ==================================================================
> [ 2001.446565] Disabling lock debugging due to kernel taint
>
Hi Jeff,

Yesterday I also test this and others, and I couldn't reproduce it. Just 
now I tried several time still worked well.

[root@lxbceph1 xfstests]# ./check generic/013
FSTYP         -- ceph
PLATFORM      -- Linux/x86_64 lxbceph1 5.17.0-rc6+
MKFS_OPTIONS  -- 10.72.47.117:40437:/testB
MOUNT_OPTIONS -- -o 
test_dummy_encryption,name=admin,nowsync,copyfrom,rasize=4096 -o 
context=system_u:object_r:root_t:s0 10.72.47.117:40437:/testB /mnt/kcephfs.B

generic/013 102s ... 99s
Ran: generic/013
Passed all 1 tests

I there anything different with yours ? Such as the options, etc.

- Xiubo



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10  0:20   ` Xiubo Li
@ 2022-03-10  0:27     ` Jeff Layton
  2022-03-10  0:53       ` Xiubo Li
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2022-03-10  0:27 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Thu, 2022-03-10 at 08:20 +0800, Xiubo Li wrote:
> On 3/9/22 10:50 PM, Jeff Layton wrote:
> > On Wed, 2022-03-09 at 21:59 +0800, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > > 
> > > For the readdir request the dentries will be pasred and dencrypted
> > > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > > get the dentry name from the dentry cache instead of parsing and
> > > dencrypting them again. This could improve performance.
> > > 
> > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > ---
> > > 
> > > V7:
> > > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > > 
> > > V6:
> > > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> > >    instead, since we are limiting the max length of snapshot name to
> > >    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > > 
> > > V5:
> > > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > > - release the rde->dentry in destroy_reply_info
> > > 
> > > 
> > > 
> > >   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> > >   fs/ceph/inode.c      |  7 ++++++
> > >   fs/ceph/mds_client.c |  1 +
> > >   fs/ceph/mds_client.h |  1 +
> > >   4 files changed, 34 insertions(+), 31 deletions(-)
> > > 
> > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > index 6df2a91af236..2397c34e9173 100644
> > > --- a/fs/ceph/dir.c
> > > +++ b/fs/ceph/dir.c
> > > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > >   	int err;
> > >   	unsigned frag = -1;
> > >   	struct ceph_mds_reply_info_parsed *rinfo;
> > > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > > +	char *dentry_name = NULL;
> > >   
> > >   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> > >   	if (dfi->file_info.flags & CEPH_F_ATEND)
> > > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > >   		spin_unlock(&ci->i_ceph_lock);
> > >   	}
> > >   
> > > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > > -	if (err < 0)
> > > -		goto out;
> > > -
> > > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > > -	if (err < 0)
> > > -		goto out;
> > > -
> > >   	/* proceed with a normal readdir */
> > >   more:
> > >   	/* do we have the correct frag content buffered? */
> > > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > >   			}
> > >   		}
> > >   	}
> > > +
> > > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > > +	if (!dentry_name) {
> > > +		err = -ENOMEM;
> > > +		ceph_mdsc_put_request(dfi->last_readdir);
> > > +		dfi->last_readdir = NULL;
> > > +		goto out;
> > > +	}
> > > +
> > >   	for (; i < rinfo->dir_nr; i++) {
> > >   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > > -		struct ceph_fname fname = { .dir	= inode,
> > > -					    .name	= rde->name,
> > > -					    .name_len	= rde->name_len,
> > > -					    .ctext	= rde->altname,
> > > -					    .ctext_len	= rde->altname_len };
> > > -		u32 olen = oname.len;
> > > -
> > > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > > -		if (err) {
> > > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > > -			       rde->name_len, rde->name, err);
> > > -			goto out;
> > > -		}
> > > +		struct dentry *dn = rde->dentry;
> > > +		int name_len;
> > >   
> > >   		BUG_ON(rde->offset < ctx->pos);
> > >   		BUG_ON(!rde->inode.in);
> > > +		BUG_ON(!rde->dentry);
> > >   
> > >   		ctx->pos = rde->offset;
> > > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > > -		     i, rinfo->dir_nr, ctx->pos,
> > > -		     rde->name_len, rde->name, &rde->inode.in);
> > >   
> > > -		if (!dir_emit(ctx, oname.name, oname.len,
> > > +		spin_lock(&dn->d_lock);
> > > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > > +		name_len = dn->d_name.len;
> > > +		spin_unlock(&dn->d_lock);
> > > +
> > > +		dentry_name[name_len] = '\0';
> > > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > > +
> > > +		if (!dir_emit(ctx, dentry_name, name_len,
> > >   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> > >   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> > >   			/*
> > > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > >   			goto out;
> > >   		}
> > >   
> > > -		/* Reset the lengths to their original allocated vals */
> > > -		oname.len = olen;
> > >   		ctx->pos++;
> > >   	}
> > >   
> > > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > >   	err = 0;
> > >   	dout("readdir %p file %p done.\n", inode, file);
> > >   out:
> > > -	ceph_fname_free_buffer(inode, &tname);
> > > -	ceph_fname_free_buffer(inode, &oname);
> > > +	if (dentry_name)
> > > +		kfree(dentry_name);
> > >   	return err;
> > >   }
> > >   
> > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > index b573a0f33450..19e5275eae1c 100644
> > > --- a/fs/ceph/inode.c
> > > +++ b/fs/ceph/inode.c
> > > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > >   			goto out;
> > >   		}
> > >   
> > > +		rde->dentry = NULL;
> > >   		dname.name = oname.name;
> > >   		dname.len = oname.len;
> > >   		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > >   			goto retry_lookup;
> > >   		}
> > >   
> > > +		/*
> > > +		 * ceph_readdir will use the dentry to get the name
> > > +		 * to avoid doing the dencrypt again there.
> > > +		 */
> > > +		rde->dentry = dget(dn);
> > > +
> > >   		/* inode */
> > >   		if (d_really_is_positive(dn)) {
> > >   			in = d_inode(dn);
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index 8d704ddd7291..9e0a51ef1dfa 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> > >   
> > >   		kfree(rde->inode.fscrypt_auth);
> > >   		kfree(rde->inode.fscrypt_file);
> > > +		dput(rde->dentry);
> > >   	}
> > >   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> > >   }
> > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > > index 0dfe24f94567..663d7754d57d 100644
> > > --- a/fs/ceph/mds_client.h
> > > +++ b/fs/ceph/mds_client.h
> > > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> > >   };
> > >   
> > >   struct ceph_mds_reply_dir_entry {
> > > +	struct dentry		      *dentry;
> > >   	char                          *name;
> > >   	u8			      *altname;
> > >   	u32                           name_len;
> > 
> > Still buggy. This time generic/013 triggered this:
> > 
> > [ 1970.839019] run fstests generic/013 at 2022-03-09 09:48:42
> > [ 2001.133838] ==================================================================
> > [ 2001.138595] BUG: KASAN: slab-out-of-bounds in ceph_readdir+0x3f4/0x1dd0 [ceph]
> > [ 2001.141997] Write of size 1 at addr ffff888120070aff by task fsstress/8682
> > [ 2001.144897]
> > [ 2001.145670] CPU: 7 PID: 8682 Comm: fsstress Tainted: G            E     5.17.0-rc6+ #172
> > [ 2001.149132] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014
> > [ 2001.152477] Call Trace:
> > [ 2001.153609]  <TASK>
> > [ 2001.154482]  dump_stack_lvl+0x59/0x73
> > [ 2001.155697]  print_address_description.constprop.0+0x1f/0x150
> > [ 2001.158021]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
> > [ 2001.159654]  kasan_report.cold+0x7f/0x11b
> > [ 2001.160951]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
> > [ 2001.168084]  ceph_readdir+0x3f4/0x1dd0 [ceph]
> > [ 2001.173258]  ? ceph_d_revalidate+0x7e0/0x7e0 [ceph]
> > [ 2001.178293]  ? down_write_killable+0xc7/0x130
> > [ 2001.182782]  ? __down_interruptible+0x1d0/0x1d0
> > [ 2001.187246]  iterate_dir+0x107/0x2e0
> > [ 2001.192677]  __x64_sys_getdents64+0xe2/0x1b0
> > [ 2001.197570]  ? filldir+0x270/0x270
> > [ 2001.202806]  ? __ia32_sys_getdents+0x1a0/0x1a0
> > [ 2001.207415]  ? lockdep_hardirqs_on_prepare+0x129/0x220
> > [ 2001.211782]  ? syscall_enter_from_user_mode+0x21/0x70
> > [ 2001.216466]  do_syscall_64+0x3b/0x90
> > [ 2001.220674]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> > [ 2001.225674] RIP: 0033:0x7f159a774fd7
> > [ 2001.230170] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 89 02 48
> > [ 2001.240821] RSP: 002b:00007ffff90c35d8 EFLAGS: 00000293 ORIG_RAX: 00000000000000d9
> > [ 2001.247670] RAX: ffffffffffffffda RBX: 0000000001186130 RCX: 00007f159a774fd7
> > [ 2001.255057] RDX: 0000000000010000 RSI: 0000000001186130 RDI: 0000000000000003
> > [ 2001.262428] RBP: 0000000001186104 R08: 0000000000000003 R09: 0000000000000000
> > [ 2001.269057] R10: 0000000000000000 R11: 0000000000000293 R12: ffffffffffffff80
> > [ 2001.275079] R13: 0000000000000019 R14: 0000000001186100 R15: 00007f159a6996c0
> > [ 2001.282429]  </TASK>
> > [ 2001.287900]
> > [ 2001.291963] Allocated by task 8682:
> > [ 2001.296303]  kasan_save_stack+0x1e/0x40
> > [ 2001.300491]  __kasan_kmalloc+0xa9/0xd0
> > [ 2001.304792]  ceph_readdir+0x2ab/0x1dd0 [ceph]
> > [ 2001.309093]  iterate_dir+0x107/0x2e0
> > [ 2001.313291]  __x64_sys_getdents64+0xe2/0x1b0
> > [ 2001.317705]  do_syscall_64+0x3b/0x90
> > [ 2001.322210]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> > [ 2001.327112]
> > [ 2001.331092] The buggy address belongs to the object at ffff888120070a00
> > [ 2001.331092]  which belongs to the cache kmalloc-256 of size 256
> > [ 2001.341495] The buggy address is located 255 bytes inside of
> > [ 2001.341495]  256-byte region [ffff888120070a00, ffff888120070b00)
> > [ 2001.352252] The buggy address belongs to the page:
> > [ 2001.357836] page:0000000089466360 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x120070
> > [ 2001.364225] head:0000000089466360 order:2 compound_mapcount:0 compound_pincount:0
> > [ 2001.370091] flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff)
> > [ 2001.375349] raw: 0017ffffc0010200 ffffea00048d7e00 dead000000000002 ffff888100042b40
> > [ 2001.380833] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
> > [ 2001.386079] page dumped because: kasan: bad access detected
> > [ 2001.390919]
> > [ 2001.395163] Memory state around the buggy address:
> > [ 2001.399901]  ffff888120070980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> > [ 2001.405479]  ffff888120070a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> > [ 2001.410888] >ffff888120070a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 07
> > [ 2001.416090]                                                                 ^
> > [ 2001.422577]  ffff888120070b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> > [ 2001.430562]  ffff888120070b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> > [ 2001.438474] ==================================================================
> > [ 2001.446565] Disabling lock debugging due to kernel taint
> > 
> Hi Jeff,
> 
> Yesterday I also test this and others, and I couldn't reproduce it. Just 
> now I tried several time still worked well.
> 
> [root@lxbceph1 xfstests]# ./check generic/013
> FSTYP         -- ceph
> PLATFORM      -- Linux/x86_64 lxbceph1 5.17.0-rc6+
> MKFS_OPTIONS  -- 10.72.47.117:40437:/testB
> MOUNT_OPTIONS -- -o 
> test_dummy_encryption,name=admin,nowsync,copyfrom,rasize=4096 -o 
> context=system_u:object_r:root_t:s0 10.72.47.117:40437:/testB /mnt/kcephfs.B
> 
> generic/013 102s ... 99s
> Ran: generic/013
> Passed all 1 tests
> 
> I there anything different with yours ? Such as the options, etc.
> 

Do you have KASAN turned on? I generally run with that enabled for most
of my testing (for this reason).

Also, I was testing with '-o test_dummy_encryption' enabled as well.
That may be a factor here.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10  0:27     ` Jeff Layton
@ 2022-03-10  0:53       ` Xiubo Li
  0 siblings, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2022-03-10  0:53 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 8:27 AM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 08:20 +0800, Xiubo Li wrote:
>> On 3/9/22 10:50 PM, Jeff Layton wrote:
>>> On Wed, 2022-03-09 at 21:59 +0800, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> For the readdir request the dentries will be pasred and dencrypted
>>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>>> get the dentry name from the dentry cache instead of parsing and
>>>> dencrypting them again. This could improve performance.
>>>>
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>
>>>> V7:
>>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>>
>>>> V6:
>>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>>     instead, since we are limiting the max length of snapshot name to
>>>>     240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>>
>>>> V5:
>>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>>> - release the rde->dentry in destroy_reply_info
>>>>
>>>>
>>>>
>>>>    fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>>    fs/ceph/inode.c      |  7 ++++++
>>>>    fs/ceph/mds_client.c |  1 +
>>>>    fs/ceph/mds_client.h |  1 +
>>>>    4 files changed, 34 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>> index 6df2a91af236..2397c34e9173 100644
>>>> --- a/fs/ceph/dir.c
>>>> +++ b/fs/ceph/dir.c
>>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>    	int err;
>>>>    	unsigned frag = -1;
>>>>    	struct ceph_mds_reply_info_parsed *rinfo;
>>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>>> +	char *dentry_name = NULL;
>>>>    
>>>>    	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>>    	if (dfi->file_info.flags & CEPH_F_ATEND)
>>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>    		spin_unlock(&ci->i_ceph_lock);
>>>>    	}
>>>>    
>>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>>> -	if (err < 0)
>>>> -		goto out;
>>>> -
>>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>>> -	if (err < 0)
>>>> -		goto out;
>>>> -
>>>>    	/* proceed with a normal readdir */
>>>>    more:
>>>>    	/* do we have the correct frag content buffered? */
>>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>    			}
>>>>    		}
>>>>    	}
>>>> +
>>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>>> +	if (!dentry_name) {
>>>> +		err = -ENOMEM;
>>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>>> +		dfi->last_readdir = NULL;
>>>> +		goto out;
>>>> +	}
>>>> +
>>>>    	for (; i < rinfo->dir_nr; i++) {
>>>>    		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>>> -		struct ceph_fname fname = { .dir	= inode,
>>>> -					    .name	= rde->name,
>>>> -					    .name_len	= rde->name_len,
>>>> -					    .ctext	= rde->altname,
>>>> -					    .ctext_len	= rde->altname_len };
>>>> -		u32 olen = oname.len;
>>>> -
>>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>>> -		if (err) {
>>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>>> -			       rde->name_len, rde->name, err);
>>>> -			goto out;
>>>> -		}
>>>> +		struct dentry *dn = rde->dentry;
>>>> +		int name_len;
>>>>    
>>>>    		BUG_ON(rde->offset < ctx->pos);
>>>>    		BUG_ON(!rde->inode.in);
>>>> +		BUG_ON(!rde->dentry);
>>>>    
>>>>    		ctx->pos = rde->offset;
>>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>>> -		     i, rinfo->dir_nr, ctx->pos,
>>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>>    
>>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>>> +		spin_lock(&dn->d_lock);
>>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>>> +		name_len = dn->d_name.len;
>>>> +		spin_unlock(&dn->d_lock);
>>>> +
>>>> +		dentry_name[name_len] = '\0';
>>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>>> +
>>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>>    			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>>    			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>>    			/*
>>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>    			goto out;
>>>>    		}
>>>>    
>>>> -		/* Reset the lengths to their original allocated vals */
>>>> -		oname.len = olen;
>>>>    		ctx->pos++;
>>>>    	}
>>>>    
>>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>    	err = 0;
>>>>    	dout("readdir %p file %p done.\n", inode, file);
>>>>    out:
>>>> -	ceph_fname_free_buffer(inode, &tname);
>>>> -	ceph_fname_free_buffer(inode, &oname);
>>>> +	if (dentry_name)
>>>> +		kfree(dentry_name);
>>>>    	return err;
>>>>    }
>>>>    
>>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>>> index b573a0f33450..19e5275eae1c 100644
>>>> --- a/fs/ceph/inode.c
>>>> +++ b/fs/ceph/inode.c
>>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>    			goto out;
>>>>    		}
>>>>    
>>>> +		rde->dentry = NULL;
>>>>    		dname.name = oname.name;
>>>>    		dname.len = oname.len;
>>>>    		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>    			goto retry_lookup;
>>>>    		}
>>>>    
>>>> +		/*
>>>> +		 * ceph_readdir will use the dentry to get the name
>>>> +		 * to avoid doing the dencrypt again there.
>>>> +		 */
>>>> +		rde->dentry = dget(dn);
>>>> +
>>>>    		/* inode */
>>>>    		if (d_really_is_positive(dn)) {
>>>>    			in = d_inode(dn);
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>>    
>>>>    		kfree(rde->inode.fscrypt_auth);
>>>>    		kfree(rde->inode.fscrypt_file);
>>>> +		dput(rde->dentry);
>>>>    	}
>>>>    	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>>    }
>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>> index 0dfe24f94567..663d7754d57d 100644
>>>> --- a/fs/ceph/mds_client.h
>>>> +++ b/fs/ceph/mds_client.h
>>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>>    };
>>>>    
>>>>    struct ceph_mds_reply_dir_entry {
>>>> +	struct dentry		      *dentry;
>>>>    	char                          *name;
>>>>    	u8			      *altname;
>>>>    	u32                           name_len;
>>> Still buggy. This time generic/013 triggered this:
>>>
>>> [ 1970.839019] run fstests generic/013 at 2022-03-09 09:48:42
>>> [ 2001.133838] ==================================================================
>>> [ 2001.138595] BUG: KASAN: slab-out-of-bounds in ceph_readdir+0x3f4/0x1dd0 [ceph]
>>> [ 2001.141997] Write of size 1 at addr ffff888120070aff by task fsstress/8682
>>> [ 2001.144897]
>>> [ 2001.145670] CPU: 7 PID: 8682 Comm: fsstress Tainted: G            E     5.17.0-rc6+ #172
>>> [ 2001.149132] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014
>>> [ 2001.152477] Call Trace:
>>> [ 2001.153609]  <TASK>
>>> [ 2001.154482]  dump_stack_lvl+0x59/0x73
>>> [ 2001.155697]  print_address_description.constprop.0+0x1f/0x150
>>> [ 2001.158021]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
>>> [ 2001.159654]  kasan_report.cold+0x7f/0x11b
>>> [ 2001.160951]  ? ceph_readdir+0x3f4/0x1dd0 [ceph]
>>> [ 2001.168084]  ceph_readdir+0x3f4/0x1dd0 [ceph]
>>> [ 2001.173258]  ? ceph_d_revalidate+0x7e0/0x7e0 [ceph]
>>> [ 2001.178293]  ? down_write_killable+0xc7/0x130
>>> [ 2001.182782]  ? __down_interruptible+0x1d0/0x1d0
>>> [ 2001.187246]  iterate_dir+0x107/0x2e0
>>> [ 2001.192677]  __x64_sys_getdents64+0xe2/0x1b0
>>> [ 2001.197570]  ? filldir+0x270/0x270
>>> [ 2001.202806]  ? __ia32_sys_getdents+0x1a0/0x1a0
>>> [ 2001.207415]  ? lockdep_hardirqs_on_prepare+0x129/0x220
>>> [ 2001.211782]  ? syscall_enter_from_user_mode+0x21/0x70
>>> [ 2001.216466]  do_syscall_64+0x3b/0x90
>>> [ 2001.220674]  entry_SYSCALL_64_after_hwframe+0x44/0xae
>>> [ 2001.225674] RIP: 0033:0x7f159a774fd7
>>> [ 2001.230170] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 89 02 48
>>> [ 2001.240821] RSP: 002b:00007ffff90c35d8 EFLAGS: 00000293 ORIG_RAX: 00000000000000d9
>>> [ 2001.247670] RAX: ffffffffffffffda RBX: 0000000001186130 RCX: 00007f159a774fd7
>>> [ 2001.255057] RDX: 0000000000010000 RSI: 0000000001186130 RDI: 0000000000000003
>>> [ 2001.262428] RBP: 0000000001186104 R08: 0000000000000003 R09: 0000000000000000
>>> [ 2001.269057] R10: 0000000000000000 R11: 0000000000000293 R12: ffffffffffffff80
>>> [ 2001.275079] R13: 0000000000000019 R14: 0000000001186100 R15: 00007f159a6996c0
>>> [ 2001.282429]  </TASK>
>>> [ 2001.287900]
>>> [ 2001.291963] Allocated by task 8682:
>>> [ 2001.296303]  kasan_save_stack+0x1e/0x40
>>> [ 2001.300491]  __kasan_kmalloc+0xa9/0xd0
>>> [ 2001.304792]  ceph_readdir+0x2ab/0x1dd0 [ceph]
>>> [ 2001.309093]  iterate_dir+0x107/0x2e0
>>> [ 2001.313291]  __x64_sys_getdents64+0xe2/0x1b0
>>> [ 2001.317705]  do_syscall_64+0x3b/0x90
>>> [ 2001.322210]  entry_SYSCALL_64_after_hwframe+0x44/0xae
>>> [ 2001.327112]
>>> [ 2001.331092] The buggy address belongs to the object at ffff888120070a00
>>> [ 2001.331092]  which belongs to the cache kmalloc-256 of size 256
>>> [ 2001.341495] The buggy address is located 255 bytes inside of
>>> [ 2001.341495]  256-byte region [ffff888120070a00, ffff888120070b00)
>>> [ 2001.352252] The buggy address belongs to the page:
>>> [ 2001.357836] page:0000000089466360 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x120070
>>> [ 2001.364225] head:0000000089466360 order:2 compound_mapcount:0 compound_pincount:0
>>> [ 2001.370091] flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff)
>>> [ 2001.375349] raw: 0017ffffc0010200 ffffea00048d7e00 dead000000000002 ffff888100042b40
>>> [ 2001.380833] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
>>> [ 2001.386079] page dumped because: kasan: bad access detected
>>> [ 2001.390919]
>>> [ 2001.395163] Memory state around the buggy address:
>>> [ 2001.399901]  ffff888120070980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>>> [ 2001.405479]  ffff888120070a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>>> [ 2001.410888] >ffff888120070a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 07
>>> [ 2001.416090]                                                                 ^
>>> [ 2001.422577]  ffff888120070b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>>> [ 2001.430562]  ffff888120070b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>>> [ 2001.438474] ==================================================================
>>> [ 2001.446565] Disabling lock debugging due to kernel taint
>>>
>> Hi Jeff,
>>
>> Yesterday I also test this and others, and I couldn't reproduce it. Just
>> now I tried several time still worked well.
>>
>> [root@lxbceph1 xfstests]# ./check generic/013
>> FSTYP         -- ceph
>> PLATFORM      -- Linux/x86_64 lxbceph1 5.17.0-rc6+
>> MKFS_OPTIONS  -- 10.72.47.117:40437:/testB
>> MOUNT_OPTIONS -- -o
>> test_dummy_encryption,name=admin,nowsync,copyfrom,rasize=4096 -o
>> context=system_u:object_r:root_t:s0 10.72.47.117:40437:/testB /mnt/kcephfs.B
>>
>> generic/013 102s ... 99s
>> Ran: generic/013
>> Passed all 1 tests
>>
>> I there anything different with yours ? Such as the options, etc.
>>
> Do you have KASAN turned on? I generally run with that enabled for most
> of my testing (for this reason).

Yeah, me too. The config is:


8443 CONFIG_KASAN=y
8444 CONFIG_KASAN_GENERIC=y
8445 CONFIG_KASAN_OUTLINE=y
8446 # CONFIG_KASAN_INLINE is not set
8447 CONFIG_KASAN_STACK=y
8448 # CONFIG_KASAN_VMALLOC is not set
8449 # CONFIG_KASAN_MODULE_TEST is not set
8450 CONFIG_HAVE_ARCH_KFENCE=y
8451 # CONFIG_KFENCE is not set
8452 # end of Memory Debugging
8453
8454 CONFIG_DEBUG_SHIRQ=y



>
> Also, I was testing with '-o test_dummy_encryption' enabled as well.
> That may be a factor here.

Please see my above test I also added this.

BTW, could you attach your objdump for fs/ceph/dir.o ? Mine seem 
different with the dump stack you past.

[ 2001.168084]  ceph_readdir+0x3f4/0x1dd0 [ceph]

The totoal length is 0x1dd0 ? But mine is larger than this. Not sure 
whether there has other config is different with yours.

I base the latest wip-fscrypt branch.

- XIubo


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-09 13:59 [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir xiubli
  2022-03-09 14:50 ` Jeff Layton
@ 2022-03-10  6:08 ` Xiubo Li
  2022-03-10 10:36   ` Jeff Layton
  2022-03-10  8:22 ` Xiubo Li
  2 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-03-10  6:08 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> For the readdir request the dentries will be pasred and dencrypted
> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> get the dentry name from the dentry cache instead of parsing and
> dencrypting them again. This could improve performance.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V7:
> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>
> V6:
> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>    instead, since we are limiting the max length of snapshot name to
>    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>
> V5:
> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> - release the rde->dentry in destroy_reply_info
>
>
>
>   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>   fs/ceph/inode.c      |  7 ++++++
>   fs/ceph/mds_client.c |  1 +
>   fs/ceph/mds_client.h |  1 +
>   4 files changed, 34 insertions(+), 31 deletions(-)
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 6df2a91af236..2397c34e9173 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   	int err;
>   	unsigned frag = -1;
>   	struct ceph_mds_reply_info_parsed *rinfo;
> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> +	char *dentry_name = NULL;
>   
>   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>   	if (dfi->file_info.flags & CEPH_F_ATEND)
> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   		spin_unlock(&ci->i_ceph_lock);
>   	}
>   
> -	err = ceph_fname_alloc_buffer(inode, &tname);
> -	if (err < 0)
> -		goto out;
> -
> -	err = ceph_fname_alloc_buffer(inode, &oname);
> -	if (err < 0)
> -		goto out;
> -
>   	/* proceed with a normal readdir */
>   more:
>   	/* do we have the correct frag content buffered? */
> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   			}
>   		}
>   	}
> +
> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> +	if (!dentry_name) {
> +		err = -ENOMEM;
> +		ceph_mdsc_put_request(dfi->last_readdir);
> +		dfi->last_readdir = NULL;
> +		goto out;
> +	}
> +
>   	for (; i < rinfo->dir_nr; i++) {
>   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> -		struct ceph_fname fname = { .dir	= inode,
> -					    .name	= rde->name,
> -					    .name_len	= rde->name_len,
> -					    .ctext	= rde->altname,
> -					    .ctext_len	= rde->altname_len };
> -		u32 olen = oname.len;
> -
> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> -		if (err) {
> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> -			       rde->name_len, rde->name, err);
> -			goto out;
> -		}
> +		struct dentry *dn = rde->dentry;
> +		int name_len;
>   
>   		BUG_ON(rde->offset < ctx->pos);
>   		BUG_ON(!rde->inode.in);
> +		BUG_ON(!rde->dentry);
>   
>   		ctx->pos = rde->offset;
> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> -		     i, rinfo->dir_nr, ctx->pos,
> -		     rde->name_len, rde->name, &rde->inode.in);
>   
> -		if (!dir_emit(ctx, oname.name, oname.len,
> +		spin_lock(&dn->d_lock);
> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> +		name_len = dn->d_name.len;
> +		spin_unlock(&dn->d_lock);
> +

Hi Jeff,

BTW, does the dn->d_lock is must here ? From the code comments, the 
d_lock intends to protect the 'd_flag' and 'd_lockref.count'.

In ceph code I found some places are using the d_lock when accessing the 
d_name, some are not. And in none ceph code, they will almost never use 
the d_lock to protect the d_name.

- Xiubo


> +		dentry_name[name_len] = '\0';
> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> +
> +		if (!dir_emit(ctx, dentry_name, name_len,
>   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>   			/*
> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   			goto out;
>   		}
>   
> -		/* Reset the lengths to their original allocated vals */
> -		oname.len = olen;
>   		ctx->pos++;
>   	}
>   
> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   	err = 0;
>   	dout("readdir %p file %p done.\n", inode, file);
>   out:
> -	ceph_fname_free_buffer(inode, &tname);
> -	ceph_fname_free_buffer(inode, &oname);
> +	if (dentry_name)
> +		kfree(dentry_name);
>   	return err;
>   }
>   
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index b573a0f33450..19e5275eae1c 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>   			goto out;
>   		}
>   
> +		rde->dentry = NULL;
>   		dname.name = oname.name;
>   		dname.len = oname.len;
>   		dname.hash = full_name_hash(parent, dname.name, dname.len);
> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>   			goto retry_lookup;
>   		}
>   
> +		/*
> +		 * ceph_readdir will use the dentry to get the name
> +		 * to avoid doing the dencrypt again there.
> +		 */
> +		rde->dentry = dget(dn);
> +
>   		/* inode */
>   		if (d_really_is_positive(dn)) {
>   			in = d_inode(dn);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 8d704ddd7291..9e0a51ef1dfa 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>   
>   		kfree(rde->inode.fscrypt_auth);
>   		kfree(rde->inode.fscrypt_file);
> +		dput(rde->dentry);
>   	}
>   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>   }
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 0dfe24f94567..663d7754d57d 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>   };
>   
>   struct ceph_mds_reply_dir_entry {
> +	struct dentry		      *dentry;
>   	char                          *name;
>   	u8			      *altname;
>   	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-09 13:59 [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir xiubli
  2022-03-09 14:50 ` Jeff Layton
  2022-03-10  6:08 ` Xiubo Li
@ 2022-03-10  8:22 ` Xiubo Li
  2022-03-10 10:38   ` Jeff Layton
  2 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-03-10  8:22 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> For the readdir request the dentries will be pasred and dencrypted
> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> get the dentry name from the dentry cache instead of parsing and
> dencrypting them again. This could improve performance.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V7:
> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>
> V6:
> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>    instead, since we are limiting the max length of snapshot name to
>    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>
> V5:
> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> - release the rde->dentry in destroy_reply_info
>
>
>
>   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>   fs/ceph/inode.c      |  7 ++++++
>   fs/ceph/mds_client.c |  1 +
>   fs/ceph/mds_client.h |  1 +
>   4 files changed, 34 insertions(+), 31 deletions(-)
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 6df2a91af236..2397c34e9173 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   	int err;
>   	unsigned frag = -1;
>   	struct ceph_mds_reply_info_parsed *rinfo;
> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> +	char *dentry_name = NULL;
>   
>   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>   	if (dfi->file_info.flags & CEPH_F_ATEND)
> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   		spin_unlock(&ci->i_ceph_lock);
>   	}
>   
> -	err = ceph_fname_alloc_buffer(inode, &tname);
> -	if (err < 0)
> -		goto out;
> -
> -	err = ceph_fname_alloc_buffer(inode, &oname);
> -	if (err < 0)
> -		goto out;
> -
>   	/* proceed with a normal readdir */
>   more:
>   	/* do we have the correct frag content buffered? */
> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   			}
>   		}
>   	}
> +
> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> +	if (!dentry_name) {
> +		err = -ENOMEM;
> +		ceph_mdsc_put_request(dfi->last_readdir);
> +		dfi->last_readdir = NULL;
> +		goto out;
> +	}
> +

This should move up before the 'more' tag.


>   	for (; i < rinfo->dir_nr; i++) {
>   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> -		struct ceph_fname fname = { .dir	= inode,
> -					    .name	= rde->name,
> -					    .name_len	= rde->name_len,
> -					    .ctext	= rde->altname,
> -					    .ctext_len	= rde->altname_len };
> -		u32 olen = oname.len;
> -
> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> -		if (err) {
> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> -			       rde->name_len, rde->name, err);
> -			goto out;
> -		}
> +		struct dentry *dn = rde->dentry;
> +		int name_len;
>   
>   		BUG_ON(rde->offset < ctx->pos);
>   		BUG_ON(!rde->inode.in);
> +		BUG_ON(!rde->dentry);
>   
>   		ctx->pos = rde->offset;
> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> -		     i, rinfo->dir_nr, ctx->pos,
> -		     rde->name_len, rde->name, &rde->inode.in);
>   
> -		if (!dir_emit(ctx, oname.name, oname.len,
> +		spin_lock(&dn->d_lock);
> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> +		name_len = dn->d_name.len;
> +		spin_unlock(&dn->d_lock);
> +
> +		dentry_name[name_len] = '\0';

Possibly caused by this. Since it useless here and I will remove it.


> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> +
> +		if (!dir_emit(ctx, dentry_name, name_len,
>   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>   			/*
> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   			goto out;
>   		}
>   
> -		/* Reset the lengths to their original allocated vals */
> -		oname.len = olen;
>   		ctx->pos++;
>   	}
>   
> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>   	err = 0;
>   	dout("readdir %p file %p done.\n", inode, file);
>   out:
> -	ceph_fname_free_buffer(inode, &tname);
> -	ceph_fname_free_buffer(inode, &oname);
> +	if (dentry_name)
> +		kfree(dentry_name);
>   	return err;
>   }
>   
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index b573a0f33450..19e5275eae1c 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>   			goto out;
>   		}
>   
> +		rde->dentry = NULL;
>   		dname.name = oname.name;
>   		dname.len = oname.len;
>   		dname.hash = full_name_hash(parent, dname.name, dname.len);
> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>   			goto retry_lookup;
>   		}
>   
> +		/*
> +		 * ceph_readdir will use the dentry to get the name
> +		 * to avoid doing the dencrypt again there.
> +		 */
> +		rde->dentry = dget(dn);
> +
>   		/* inode */
>   		if (d_really_is_positive(dn)) {
>   			in = d_inode(dn);
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 8d704ddd7291..9e0a51ef1dfa 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>   
>   		kfree(rde->inode.fscrypt_auth);
>   		kfree(rde->inode.fscrypt_file);
> +		dput(rde->dentry);
>   	}
>   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>   }
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 0dfe24f94567..663d7754d57d 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>   };
>   
>   struct ceph_mds_reply_dir_entry {
> +	struct dentry		      *dentry;
>   	char                          *name;
>   	u8			      *altname;
>   	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10  6:08 ` Xiubo Li
@ 2022-03-10 10:36   ` Jeff Layton
  2022-03-10 10:54     ` Xiubo Li
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2022-03-10 10:36 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> > From: Xiubo Li <xiubli@redhat.com>
> > 
> > For the readdir request the dentries will be pasred and dencrypted
> > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > get the dentry name from the dentry cache instead of parsing and
> > dencrypting them again. This could improve performance.
> > 
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> > 
> > V7:
> > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > 
> > V6:
> > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> >    instead, since we are limiting the max length of snapshot name to
> >    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > 
> > V5:
> > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > - release the rde->dentry in destroy_reply_info
> > 
> > 
> > 
> >   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> >   fs/ceph/inode.c      |  7 ++++++
> >   fs/ceph/mds_client.c |  1 +
> >   fs/ceph/mds_client.h |  1 +
> >   4 files changed, 34 insertions(+), 31 deletions(-)
> > 
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 6df2a91af236..2397c34e9173 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   	int err;
> >   	unsigned frag = -1;
> >   	struct ceph_mds_reply_info_parsed *rinfo;
> > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > +	char *dentry_name = NULL;
> >   
> >   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> >   	if (dfi->file_info.flags & CEPH_F_ATEND)
> > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   		spin_unlock(&ci->i_ceph_lock);
> >   	}
> >   
> > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > -	if (err < 0)
> > -		goto out;
> > -
> > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > -	if (err < 0)
> > -		goto out;
> > -
> >   	/* proceed with a normal readdir */
> >   more:
> >   	/* do we have the correct frag content buffered? */
> > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   			}
> >   		}
> >   	}
> > +
> > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > +	if (!dentry_name) {
> > +		err = -ENOMEM;
> > +		ceph_mdsc_put_request(dfi->last_readdir);
> > +		dfi->last_readdir = NULL;
> > +		goto out;
> > +	}
> > +
> >   	for (; i < rinfo->dir_nr; i++) {
> >   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > -		struct ceph_fname fname = { .dir	= inode,
> > -					    .name	= rde->name,
> > -					    .name_len	= rde->name_len,
> > -					    .ctext	= rde->altname,
> > -					    .ctext_len	= rde->altname_len };
> > -		u32 olen = oname.len;
> > -
> > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > -		if (err) {
> > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > -			       rde->name_len, rde->name, err);
> > -			goto out;
> > -		}
> > +		struct dentry *dn = rde->dentry;
> > +		int name_len;
> >   
> >   		BUG_ON(rde->offset < ctx->pos);
> >   		BUG_ON(!rde->inode.in);
> > +		BUG_ON(!rde->dentry);
> >   
> >   		ctx->pos = rde->offset;
> > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > -		     i, rinfo->dir_nr, ctx->pos,
> > -		     rde->name_len, rde->name, &rde->inode.in);
> >   
> > -		if (!dir_emit(ctx, oname.name, oname.len,
> > +		spin_lock(&dn->d_lock);
> > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > +		name_len = dn->d_name.len;
> > +		spin_unlock(&dn->d_lock);
> > +
> 
> Hi Jeff,
> 
> BTW, does the dn->d_lock is must here ? From the code comments, the 
> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
> 
> In ceph code I found some places are using the d_lock when accessing the 
> d_name, some are not. And in none ceph code, they will almost never use 
> the d_lock to protect the d_name.
> 

It's probably not needed. The d_name can only change if there are no
other outstanding references to the dentry.

> 
> 
> > +		dentry_name[name_len] = '\0';
> > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > +
> > +		if (!dir_emit(ctx, dentry_name, name_len,
> >   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> >   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> >   			/*
> > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   			goto out;
> >   		}
> >   
> > -		/* Reset the lengths to their original allocated vals */
> > -		oname.len = olen;
> >   		ctx->pos++;
> >   	}
> >   
> > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   	err = 0;
> >   	dout("readdir %p file %p done.\n", inode, file);
> >   out:
> > -	ceph_fname_free_buffer(inode, &tname);
> > -	ceph_fname_free_buffer(inode, &oname);
> > +	if (dentry_name)
> > +		kfree(dentry_name);
> >   	return err;
> >   }
> >   
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index b573a0f33450..19e5275eae1c 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >   			goto out;
> >   		}
> >   
> > +		rde->dentry = NULL;
> >   		dname.name = oname.name;
> >   		dname.len = oname.len;
> >   		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >   			goto retry_lookup;
> >   		}
> >   
> > +		/*
> > +		 * ceph_readdir will use the dentry to get the name
> > +		 * to avoid doing the dencrypt again there.
> > +		 */
> > +		rde->dentry = dget(dn);
> > +
> >   		/* inode */
> >   		if (d_really_is_positive(dn)) {
> >   			in = d_inode(dn);
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 8d704ddd7291..9e0a51ef1dfa 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> >   
> >   		kfree(rde->inode.fscrypt_auth);
> >   		kfree(rde->inode.fscrypt_file);
> > +		dput(rde->dentry);
> >   	}
> >   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> >   }
> > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > index 0dfe24f94567..663d7754d57d 100644
> > --- a/fs/ceph/mds_client.h
> > +++ b/fs/ceph/mds_client.h
> > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> >   };
> >   
> >   struct ceph_mds_reply_dir_entry {
> > +	struct dentry		      *dentry;
> >   	char                          *name;
> >   	u8			      *altname;
> >   	u32                           name_len;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10  8:22 ` Xiubo Li
@ 2022-03-10 10:38   ` Jeff Layton
  2022-03-10 10:49     ` Xiubo Li
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2022-03-10 10:38 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Thu, 2022-03-10 at 16:22 +0800, Xiubo Li wrote:
> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> > From: Xiubo Li <xiubli@redhat.com>
> > 
> > For the readdir request the dentries will be pasred and dencrypted
> > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > get the dentry name from the dentry cache instead of parsing and
> > dencrypting them again. This could improve performance.
> > 
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> > 
> > V7:
> > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > 
> > V6:
> > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> >    instead, since we are limiting the max length of snapshot name to
> >    240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > 
> > V5:
> > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > - release the rde->dentry in destroy_reply_info
> > 
> > 
> > 
> >   fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> >   fs/ceph/inode.c      |  7 ++++++
> >   fs/ceph/mds_client.c |  1 +
> >   fs/ceph/mds_client.h |  1 +
> >   4 files changed, 34 insertions(+), 31 deletions(-)
> > 
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 6df2a91af236..2397c34e9173 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   	int err;
> >   	unsigned frag = -1;
> >   	struct ceph_mds_reply_info_parsed *rinfo;
> > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > +	char *dentry_name = NULL;
> >   
> >   	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> >   	if (dfi->file_info.flags & CEPH_F_ATEND)
> > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   		spin_unlock(&ci->i_ceph_lock);
> >   	}
> >   
> > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > -	if (err < 0)
> > -		goto out;
> > -
> > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > -	if (err < 0)
> > -		goto out;
> > -
> >   	/* proceed with a normal readdir */
> >   more:
> >   	/* do we have the correct frag content buffered? */
> > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   			}
> >   		}
> >   	}
> > +
> > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > +	if (!dentry_name) {
> > +		err = -ENOMEM;
> > +		ceph_mdsc_put_request(dfi->last_readdir);
> > +		dfi->last_readdir = NULL;
> > +		goto out;
> > +	}
> > +
> 
> This should move up before the 'more' tag.
> 

Yep, good catch.

> 
> >   	for (; i < rinfo->dir_nr; i++) {
> >   		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > -		struct ceph_fname fname = { .dir	= inode,
> > -					    .name	= rde->name,
> > -					    .name_len	= rde->name_len,
> > -					    .ctext	= rde->altname,
> > -					    .ctext_len	= rde->altname_len };
> > -		u32 olen = oname.len;
> > -
> > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > -		if (err) {
> > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > -			       rde->name_len, rde->name, err);
> > -			goto out;
> > -		}
> > +		struct dentry *dn = rde->dentry;
> > +		int name_len;
> >   
> >   		BUG_ON(rde->offset < ctx->pos);
> >   		BUG_ON(!rde->inode.in);
> > +		BUG_ON(!rde->dentry);
> >   
> >   		ctx->pos = rde->offset;
> > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > -		     i, rinfo->dir_nr, ctx->pos,
> > -		     rde->name_len, rde->name, &rde->inode.in);
> >   
> > -		if (!dir_emit(ctx, oname.name, oname.len,
> > +		spin_lock(&dn->d_lock);
> > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > +		name_len = dn->d_name.len;
> > +		spin_unlock(&dn->d_lock);
> > +
> > +		dentry_name[name_len] = '\0';
> 
> Possibly caused by this. Since it useless here and I will remove it.
> 
> 

Seems plausible. If you send a v8 patch, I can give it another test and
see if it fixes it.

> > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > +
> > +		if (!dir_emit(ctx, dentry_name, name_len,
> >   			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> >   			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> >   			/*
> > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   			goto out;
> >   		}
> >   
> > -		/* Reset the lengths to their original allocated vals */
> > -		oname.len = olen;
> >   		ctx->pos++;
> >   	}
> >   
> > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >   	err = 0;
> >   	dout("readdir %p file %p done.\n", inode, file);
> >   out:
> > -	ceph_fname_free_buffer(inode, &tname);
> > -	ceph_fname_free_buffer(inode, &oname);
> > +	if (dentry_name)
> > +		kfree(dentry_name);
> >   	return err;
> >   }
> >   
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index b573a0f33450..19e5275eae1c 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >   			goto out;
> >   		}
> >   
> > +		rde->dentry = NULL;
> >   		dname.name = oname.name;
> >   		dname.len = oname.len;
> >   		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> >   			goto retry_lookup;
> >   		}
> >   
> > +		/*
> > +		 * ceph_readdir will use the dentry to get the name
> > +		 * to avoid doing the dencrypt again there.
> > +		 */
> > +		rde->dentry = dget(dn);
> > +
> >   		/* inode */
> >   		if (d_really_is_positive(dn)) {
> >   			in = d_inode(dn);
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 8d704ddd7291..9e0a51ef1dfa 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> >   
> >   		kfree(rde->inode.fscrypt_auth);
> >   		kfree(rde->inode.fscrypt_file);
> > +		dput(rde->dentry);
> >   	}
> >   	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> >   }
> > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > index 0dfe24f94567..663d7754d57d 100644
> > --- a/fs/ceph/mds_client.h
> > +++ b/fs/ceph/mds_client.h
> > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> >   };
> >   
> >   struct ceph_mds_reply_dir_entry {
> > +	struct dentry		      *dentry;
> >   	char                          *name;
> >   	u8			      *altname;
> >   	u32                           name_len;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 10:38   ` Jeff Layton
@ 2022-03-10 10:49     ` Xiubo Li
  0 siblings, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2022-03-10 10:49 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 6:38 PM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 16:22 +0800, Xiubo Li wrote:
>> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> For the readdir request the dentries will be pasred and dencrypted
>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>> get the dentry name from the dentry cache instead of parsing and
>>> dencrypting them again. This could improve performance.
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>
>>> V7:
>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>
>>> V6:
>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>     instead, since we are limiting the max length of snapshot name to
>>>     240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>
>>> V5:
>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>> - release the rde->dentry in destroy_reply_info
>>>
>>>
>>>
>>>    fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>    fs/ceph/inode.c      |  7 ++++++
>>>    fs/ceph/mds_client.c |  1 +
>>>    fs/ceph/mds_client.h |  1 +
>>>    4 files changed, 34 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>> index 6df2a91af236..2397c34e9173 100644
>>> --- a/fs/ceph/dir.c
>>> +++ b/fs/ceph/dir.c
>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    	int err;
>>>    	unsigned frag = -1;
>>>    	struct ceph_mds_reply_info_parsed *rinfo;
>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>> +	char *dentry_name = NULL;
>>>    
>>>    	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>    	if (dfi->file_info.flags & CEPH_F_ATEND)
>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    		spin_unlock(&ci->i_ceph_lock);
>>>    	}
>>>    
>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>> -	if (err < 0)
>>> -		goto out;
>>> -
>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>> -	if (err < 0)
>>> -		goto out;
>>> -
>>>    	/* proceed with a normal readdir */
>>>    more:
>>>    	/* do we have the correct frag content buffered? */
>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    			}
>>>    		}
>>>    	}
>>> +
>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>> +	if (!dentry_name) {
>>> +		err = -ENOMEM;
>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>> +		dfi->last_readdir = NULL;
>>> +		goto out;
>>> +	}
>>> +
>> This should move up before the 'more' tag.
>>
> Yep, good catch.
>
>>>    	for (; i < rinfo->dir_nr; i++) {
>>>    		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>> -		struct ceph_fname fname = { .dir	= inode,
>>> -					    .name	= rde->name,
>>> -					    .name_len	= rde->name_len,
>>> -					    .ctext	= rde->altname,
>>> -					    .ctext_len	= rde->altname_len };
>>> -		u32 olen = oname.len;
>>> -
>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>> -		if (err) {
>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>> -			       rde->name_len, rde->name, err);
>>> -			goto out;
>>> -		}
>>> +		struct dentry *dn = rde->dentry;
>>> +		int name_len;
>>>    
>>>    		BUG_ON(rde->offset < ctx->pos);
>>>    		BUG_ON(!rde->inode.in);
>>> +		BUG_ON(!rde->dentry);
>>>    
>>>    		ctx->pos = rde->offset;
>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>> -		     i, rinfo->dir_nr, ctx->pos,
>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>    
>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>> +		spin_lock(&dn->d_lock);
>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>> +		name_len = dn->d_name.len;
>>> +		spin_unlock(&dn->d_lock);
>>> +
>>> +		dentry_name[name_len] = '\0';
>> Possibly caused by this. Since it useless here and I will remove it.
>>
>>
> Seems plausible. If you send a v8 patch, I can give it another test and
> see if it fixes it.

Sure, will send it soon.

And locally I have test several hours and haven't see any issue yet with V8.

- Xiubo

>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>> +
>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>    			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>    			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>    			/*
>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    			goto out;
>>>    		}
>>>    
>>> -		/* Reset the lengths to their original allocated vals */
>>> -		oname.len = olen;
>>>    		ctx->pos++;
>>>    	}
>>>    
>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    	err = 0;
>>>    	dout("readdir %p file %p done.\n", inode, file);
>>>    out:
>>> -	ceph_fname_free_buffer(inode, &tname);
>>> -	ceph_fname_free_buffer(inode, &oname);
>>> +	if (dentry_name)
>>> +		kfree(dentry_name);
>>>    	return err;
>>>    }
>>>    
>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>> index b573a0f33450..19e5275eae1c 100644
>>> --- a/fs/ceph/inode.c
>>> +++ b/fs/ceph/inode.c
>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>    			goto out;
>>>    		}
>>>    
>>> +		rde->dentry = NULL;
>>>    		dname.name = oname.name;
>>>    		dname.len = oname.len;
>>>    		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>    			goto retry_lookup;
>>>    		}
>>>    
>>> +		/*
>>> +		 * ceph_readdir will use the dentry to get the name
>>> +		 * to avoid doing the dencrypt again there.
>>> +		 */
>>> +		rde->dentry = dget(dn);
>>> +
>>>    		/* inode */
>>>    		if (d_really_is_positive(dn)) {
>>>    			in = d_inode(dn);
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>    
>>>    		kfree(rde->inode.fscrypt_auth);
>>>    		kfree(rde->inode.fscrypt_file);
>>> +		dput(rde->dentry);
>>>    	}
>>>    	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>    }
>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>> index 0dfe24f94567..663d7754d57d 100644
>>> --- a/fs/ceph/mds_client.h
>>> +++ b/fs/ceph/mds_client.h
>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>    };
>>>    
>>>    struct ceph_mds_reply_dir_entry {
>>> +	struct dentry		      *dentry;
>>>    	char                          *name;
>>>    	u8			      *altname;
>>>    	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 10:36   ` Jeff Layton
@ 2022-03-10 10:54     ` Xiubo Li
  2022-03-10 11:21       ` Jeff Layton
  0 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-03-10 10:54 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 6:36 PM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
>> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> For the readdir request the dentries will be pasred and dencrypted
>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>> get the dentry name from the dentry cache instead of parsing and
>>> dencrypting them again. This could improve performance.
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>
>>> V7:
>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>
>>> V6:
>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>     instead, since we are limiting the max length of snapshot name to
>>>     240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>
>>> V5:
>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>> - release the rde->dentry in destroy_reply_info
>>>
>>>
>>>
>>>    fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>    fs/ceph/inode.c      |  7 ++++++
>>>    fs/ceph/mds_client.c |  1 +
>>>    fs/ceph/mds_client.h |  1 +
>>>    4 files changed, 34 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>> index 6df2a91af236..2397c34e9173 100644
>>> --- a/fs/ceph/dir.c
>>> +++ b/fs/ceph/dir.c
>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    	int err;
>>>    	unsigned frag = -1;
>>>    	struct ceph_mds_reply_info_parsed *rinfo;
>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>> +	char *dentry_name = NULL;
>>>    
>>>    	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>    	if (dfi->file_info.flags & CEPH_F_ATEND)
>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    		spin_unlock(&ci->i_ceph_lock);
>>>    	}
>>>    
>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>> -	if (err < 0)
>>> -		goto out;
>>> -
>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>> -	if (err < 0)
>>> -		goto out;
>>> -
>>>    	/* proceed with a normal readdir */
>>>    more:
>>>    	/* do we have the correct frag content buffered? */
>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    			}
>>>    		}
>>>    	}
>>> +
>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>> +	if (!dentry_name) {
>>> +		err = -ENOMEM;
>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>> +		dfi->last_readdir = NULL;
>>> +		goto out;
>>> +	}
>>> +
>>>    	for (; i < rinfo->dir_nr; i++) {
>>>    		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>> -		struct ceph_fname fname = { .dir	= inode,
>>> -					    .name	= rde->name,
>>> -					    .name_len	= rde->name_len,
>>> -					    .ctext	= rde->altname,
>>> -					    .ctext_len	= rde->altname_len };
>>> -		u32 olen = oname.len;
>>> -
>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>> -		if (err) {
>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>> -			       rde->name_len, rde->name, err);
>>> -			goto out;
>>> -		}
>>> +		struct dentry *dn = rde->dentry;
>>> +		int name_len;
>>>    
>>>    		BUG_ON(rde->offset < ctx->pos);
>>>    		BUG_ON(!rde->inode.in);
>>> +		BUG_ON(!rde->dentry);
>>>    
>>>    		ctx->pos = rde->offset;
>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>> -		     i, rinfo->dir_nr, ctx->pos,
>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>    
>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>> +		spin_lock(&dn->d_lock);
>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>> +		name_len = dn->d_name.len;
>>> +		spin_unlock(&dn->d_lock);
>>> +
>> Hi Jeff,
>>
>> BTW, does the dn->d_lock is must here ? From the code comments, the
>> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
>>
>> In ceph code I found some places are using the d_lock when accessing the
>> d_name, some are not. And in none ceph code, they will almost never use
>> the d_lock to protect the d_name.
>>
> It's probably not needed. The d_name can only change if there are no
> other outstanding references to the dentry.

If so, the dentry_name = kmalloc() is not needed any more.

- Xiubo


>>
>>> +		dentry_name[name_len] = '\0';
>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>> +
>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>    			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>    			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>    			/*
>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    			goto out;
>>>    		}
>>>    
>>> -		/* Reset the lengths to their original allocated vals */
>>> -		oname.len = olen;
>>>    		ctx->pos++;
>>>    	}
>>>    
>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>    	err = 0;
>>>    	dout("readdir %p file %p done.\n", inode, file);
>>>    out:
>>> -	ceph_fname_free_buffer(inode, &tname);
>>> -	ceph_fname_free_buffer(inode, &oname);
>>> +	if (dentry_name)
>>> +		kfree(dentry_name);
>>>    	return err;
>>>    }
>>>    
>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>> index b573a0f33450..19e5275eae1c 100644
>>> --- a/fs/ceph/inode.c
>>> +++ b/fs/ceph/inode.c
>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>    			goto out;
>>>    		}
>>>    
>>> +		rde->dentry = NULL;
>>>    		dname.name = oname.name;
>>>    		dname.len = oname.len;
>>>    		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>    			goto retry_lookup;
>>>    		}
>>>    
>>> +		/*
>>> +		 * ceph_readdir will use the dentry to get the name
>>> +		 * to avoid doing the dencrypt again there.
>>> +		 */
>>> +		rde->dentry = dget(dn);
>>> +
>>>    		/* inode */
>>>    		if (d_really_is_positive(dn)) {
>>>    			in = d_inode(dn);
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>    
>>>    		kfree(rde->inode.fscrypt_auth);
>>>    		kfree(rde->inode.fscrypt_file);
>>> +		dput(rde->dentry);
>>>    	}
>>>    	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>    }
>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>> index 0dfe24f94567..663d7754d57d 100644
>>> --- a/fs/ceph/mds_client.h
>>> +++ b/fs/ceph/mds_client.h
>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>    };
>>>    
>>>    struct ceph_mds_reply_dir_entry {
>>> +	struct dentry		      *dentry;
>>>    	char                          *name;
>>>    	u8			      *altname;
>>>    	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 10:54     ` Xiubo Li
@ 2022-03-10 11:21       ` Jeff Layton
  2022-03-10 11:29         ` Xiubo Li
  2022-03-10 12:21         ` Xiubo Li
  0 siblings, 2 replies; 18+ messages in thread
From: Jeff Layton @ 2022-03-10 11:21 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
> On 3/10/22 6:36 PM, Jeff Layton wrote:
> > On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
> > > On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> > > > From: Xiubo Li <xiubli@redhat.com>
> > > > 
> > > > For the readdir request the dentries will be pasred and dencrypted
> > > > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > > > get the dentry name from the dentry cache instead of parsing and
> > > > dencrypting them again. This could improve performance.
> > > > 
> > > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > > ---
> > > > 
> > > > V7:
> > > > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > > > 
> > > > V6:
> > > > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> > > >     instead, since we are limiting the max length of snapshot name to
> > > >     240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > > > 
> > > > V5:
> > > > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > > > - release the rde->dentry in destroy_reply_info
> > > > 
> > > > 
> > > > 
> > > >    fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> > > >    fs/ceph/inode.c      |  7 ++++++
> > > >    fs/ceph/mds_client.c |  1 +
> > > >    fs/ceph/mds_client.h |  1 +
> > > >    4 files changed, 34 insertions(+), 31 deletions(-)
> > > > 
> > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > > index 6df2a91af236..2397c34e9173 100644
> > > > --- a/fs/ceph/dir.c
> > > > +++ b/fs/ceph/dir.c
> > > > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > >    	int err;
> > > >    	unsigned frag = -1;
> > > >    	struct ceph_mds_reply_info_parsed *rinfo;
> > > > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > > > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > > > +	char *dentry_name = NULL;
> > > >    
> > > >    	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> > > >    	if (dfi->file_info.flags & CEPH_F_ATEND)
> > > > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > >    		spin_unlock(&ci->i_ceph_lock);
> > > >    	}
> > > >    
> > > > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > > > -	if (err < 0)
> > > > -		goto out;
> > > > -
> > > > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > > > -	if (err < 0)
> > > > -		goto out;
> > > > -
> > > >    	/* proceed with a normal readdir */
> > > >    more:
> > > >    	/* do we have the correct frag content buffered? */
> > > > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > >    			}
> > > >    		}
> > > >    	}
> > > > +
> > > > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > > > +	if (!dentry_name) {
> > > > +		err = -ENOMEM;
> > > > +		ceph_mdsc_put_request(dfi->last_readdir);
> > > > +		dfi->last_readdir = NULL;
> > > > +		goto out;
> > > > +	}
> > > > +
> > > >    	for (; i < rinfo->dir_nr; i++) {
> > > >    		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > > > -		struct ceph_fname fname = { .dir	= inode,
> > > > -					    .name	= rde->name,
> > > > -					    .name_len	= rde->name_len,
> > > > -					    .ctext	= rde->altname,
> > > > -					    .ctext_len	= rde->altname_len };
> > > > -		u32 olen = oname.len;
> > > > -
> > > > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > > > -		if (err) {
> > > > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > > > -			       rde->name_len, rde->name, err);
> > > > -			goto out;
> > > > -		}
> > > > +		struct dentry *dn = rde->dentry;
> > > > +		int name_len;
> > > >    
> > > >    		BUG_ON(rde->offset < ctx->pos);
> > > >    		BUG_ON(!rde->inode.in);
> > > > +		BUG_ON(!rde->dentry);
> > > >    
> > > >    		ctx->pos = rde->offset;
> > > > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > > > -		     i, rinfo->dir_nr, ctx->pos,
> > > > -		     rde->name_len, rde->name, &rde->inode.in);
> > > >    
> > > > -		if (!dir_emit(ctx, oname.name, oname.len,
> > > > +		spin_lock(&dn->d_lock);
> > > > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > > > +		name_len = dn->d_name.len;
> > > > +		spin_unlock(&dn->d_lock);
> > > > +
> > > Hi Jeff,
> > > 
> > > BTW, does the dn->d_lock is must here ? From the code comments, the
> > > d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
> > > 
> > > In ceph code I found some places are using the d_lock when accessing the
> > > d_name, some are not. And in none ceph code, they will almost never use
> > > the d_lock to protect the d_name.
> > > 
> > It's probably not needed. The d_name can only change if there are no
> > other outstanding references to the dentry.
> 
> If so, the dentry_name = kmalloc() is not needed any more.
> 
> 

I take it back.

I think the d_lock is the only thing that protects this against a
concurrent rename. Have a look at __d_move, and (in particular) the call
to copy_name. The d_lock is what guards against that happening while
you're working with it here.

You might be able to get away without using it, but you'll need to
follow similar rules as RCU walk.

FWIW, the best source for this info is Neil Brown's writeup in
Documentation/filesystems/path_lookup.rst.

> 
> > > 
> > > > +		dentry_name[name_len] = '\0';
> > > > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > > > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > > > +
> > > > +		if (!dir_emit(ctx, dentry_name, name_len,
> > > >    			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> > > >    			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> > > >    			/*
> > > > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > >    			goto out;
> > > >    		}
> > > >    
> > > > -		/* Reset the lengths to their original allocated vals */
> > > > -		oname.len = olen;
> > > >    		ctx->pos++;
> > > >    	}
> > > >    
> > > > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > >    	err = 0;
> > > >    	dout("readdir %p file %p done.\n", inode, file);
> > > >    out:
> > > > -	ceph_fname_free_buffer(inode, &tname);
> > > > -	ceph_fname_free_buffer(inode, &oname);
> > > > +	if (dentry_name)
> > > > +		kfree(dentry_name);
> > > >    	return err;
> > > >    }
> > > >    
> > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > > index b573a0f33450..19e5275eae1c 100644
> > > > --- a/fs/ceph/inode.c
> > > > +++ b/fs/ceph/inode.c
> > > > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > > >    			goto out;
> > > >    		}
> > > >    
> > > > +		rde->dentry = NULL;
> > > >    		dname.name = oname.name;
> > > >    		dname.len = oname.len;
> > > >    		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > > > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > > >    			goto retry_lookup;
> > > >    		}
> > > >    
> > > > +		/*
> > > > +		 * ceph_readdir will use the dentry to get the name
> > > > +		 * to avoid doing the dencrypt again there.
> > > > +		 */
> > > > +		rde->dentry = dget(dn);
> > > > +
> > > >    		/* inode */
> > > >    		if (d_really_is_positive(dn)) {
> > > >    			in = d_inode(dn);
> > > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > > index 8d704ddd7291..9e0a51ef1dfa 100644
> > > > --- a/fs/ceph/mds_client.c
> > > > +++ b/fs/ceph/mds_client.c
> > > > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> > > >    
> > > >    		kfree(rde->inode.fscrypt_auth);
> > > >    		kfree(rde->inode.fscrypt_file);
> > > > +		dput(rde->dentry);
> > > >    	}
> > > >    	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> > > >    }
> > > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > > > index 0dfe24f94567..663d7754d57d 100644
> > > > --- a/fs/ceph/mds_client.h
> > > > +++ b/fs/ceph/mds_client.h
> > > > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> > > >    };
> > > >    
> > > >    struct ceph_mds_reply_dir_entry {
> > > > +	struct dentry		      *dentry;
> > > >    	char                          *name;
> > > >    	u8			      *altname;
> > > >    	u32                           name_len;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 11:21       ` Jeff Layton
@ 2022-03-10 11:29         ` Xiubo Li
  2022-03-10 12:21         ` Xiubo Li
  1 sibling, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2022-03-10 11:29 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 7:21 PM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
>> On 3/10/22 6:36 PM, Jeff Layton wrote:
>>> On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
>>>> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>
>>>>> For the readdir request the dentries will be pasred and dencrypted
>>>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>>>> get the dentry name from the dentry cache instead of parsing and
>>>>> dencrypting them again. This could improve performance.
>>>>>
>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>> ---
>>>>>
>>>>> V7:
>>>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>>>
>>>>> V6:
>>>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>>>      instead, since we are limiting the max length of snapshot name to
>>>>>      240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>>>
>>>>> V5:
>>>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>>>> - release the rde->dentry in destroy_reply_info
>>>>>
>>>>>
>>>>>
>>>>>     fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>>>     fs/ceph/inode.c      |  7 ++++++
>>>>>     fs/ceph/mds_client.c |  1 +
>>>>>     fs/ceph/mds_client.h |  1 +
>>>>>     4 files changed, 34 insertions(+), 31 deletions(-)
>>>>>
>>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>>> index 6df2a91af236..2397c34e9173 100644
>>>>> --- a/fs/ceph/dir.c
>>>>> +++ b/fs/ceph/dir.c
>>>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     	int err;
>>>>>     	unsigned frag = -1;
>>>>>     	struct ceph_mds_reply_info_parsed *rinfo;
>>>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>>>> +	char *dentry_name = NULL;
>>>>>     
>>>>>     	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>>>     	if (dfi->file_info.flags & CEPH_F_ATEND)
>>>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     		spin_unlock(&ci->i_ceph_lock);
>>>>>     	}
>>>>>     
>>>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>>>> -	if (err < 0)
>>>>> -		goto out;
>>>>> -
>>>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>>>> -	if (err < 0)
>>>>> -		goto out;
>>>>> -
>>>>>     	/* proceed with a normal readdir */
>>>>>     more:
>>>>>     	/* do we have the correct frag content buffered? */
>>>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     			}
>>>>>     		}
>>>>>     	}
>>>>> +
>>>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>>>> +	if (!dentry_name) {
>>>>> +		err = -ENOMEM;
>>>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>>>> +		dfi->last_readdir = NULL;
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>>     	for (; i < rinfo->dir_nr; i++) {
>>>>>     		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>>>> -		struct ceph_fname fname = { .dir	= inode,
>>>>> -					    .name	= rde->name,
>>>>> -					    .name_len	= rde->name_len,
>>>>> -					    .ctext	= rde->altname,
>>>>> -					    .ctext_len	= rde->altname_len };
>>>>> -		u32 olen = oname.len;
>>>>> -
>>>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>>>> -		if (err) {
>>>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>>>> -			       rde->name_len, rde->name, err);
>>>>> -			goto out;
>>>>> -		}
>>>>> +		struct dentry *dn = rde->dentry;
>>>>> +		int name_len;
>>>>>     
>>>>>     		BUG_ON(rde->offset < ctx->pos);
>>>>>     		BUG_ON(!rde->inode.in);
>>>>> +		BUG_ON(!rde->dentry);
>>>>>     
>>>>>     		ctx->pos = rde->offset;
>>>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>>>> -		     i, rinfo->dir_nr, ctx->pos,
>>>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>>>     
>>>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>>>> +		spin_lock(&dn->d_lock);
>>>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>>>> +		name_len = dn->d_name.len;
>>>>> +		spin_unlock(&dn->d_lock);
>>>>> +
>>>> Hi Jeff,
>>>>
>>>> BTW, does the dn->d_lock is must here ? From the code comments, the
>>>> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
>>>>
>>>> In ceph code I found some places are using the d_lock when accessing the
>>>> d_name, some are not. And in none ceph code, they will almost never use
>>>> the d_lock to protect the d_name.
>>>>
>>> It's probably not needed. The d_name can only change if there are no
>>> other outstanding references to the dentry.
>> If so, the dentry_name = kmalloc() is not needed any more.
>>
>>
> I take it back.
>
> I think the d_lock is the only thing that protects this against a
> concurrent rename. Have a look at __d_move, and (in particular) the call
> to copy_name. The d_lock is what guards against that happening while
> you're working with it here.
>
> You might be able to get away without using it, but you'll need to
> follow similar rules as RCU walk.

Yeah, for this I was also checking how the function 'dentry_name()' in 
lib/vsprintf.c is working without the d_lock.

For now to be simple let's keep what it is now.

- Xiubo


> FWIW, the best source for this info is Neil Brown's writeup in
> Documentation/filesystems/path_lookup.rst.
>
>>>>> +		dentry_name[name_len] = '\0';
>>>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>>>> +
>>>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>>>     			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>>>     			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>>>     			/*
>>>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     			goto out;
>>>>>     		}
>>>>>     
>>>>> -		/* Reset the lengths to their original allocated vals */
>>>>> -		oname.len = olen;
>>>>>     		ctx->pos++;
>>>>>     	}
>>>>>     
>>>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     	err = 0;
>>>>>     	dout("readdir %p file %p done.\n", inode, file);
>>>>>     out:
>>>>> -	ceph_fname_free_buffer(inode, &tname);
>>>>> -	ceph_fname_free_buffer(inode, &oname);
>>>>> +	if (dentry_name)
>>>>> +		kfree(dentry_name);
>>>>>     	return err;
>>>>>     }
>>>>>     
>>>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>>>> index b573a0f33450..19e5275eae1c 100644
>>>>> --- a/fs/ceph/inode.c
>>>>> +++ b/fs/ceph/inode.c
>>>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>     			goto out;
>>>>>     		}
>>>>>     
>>>>> +		rde->dentry = NULL;
>>>>>     		dname.name = oname.name;
>>>>>     		dname.len = oname.len;
>>>>>     		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>     			goto retry_lookup;
>>>>>     		}
>>>>>     
>>>>> +		/*
>>>>> +		 * ceph_readdir will use the dentry to get the name
>>>>> +		 * to avoid doing the dencrypt again there.
>>>>> +		 */
>>>>> +		rde->dentry = dget(dn);
>>>>> +
>>>>>     		/* inode */
>>>>>     		if (d_really_is_positive(dn)) {
>>>>>     			in = d_inode(dn);
>>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>>>> --- a/fs/ceph/mds_client.c
>>>>> +++ b/fs/ceph/mds_client.c
>>>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>>>     
>>>>>     		kfree(rde->inode.fscrypt_auth);
>>>>>     		kfree(rde->inode.fscrypt_file);
>>>>> +		dput(rde->dentry);
>>>>>     	}
>>>>>     	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>>>     }
>>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>>> index 0dfe24f94567..663d7754d57d 100644
>>>>> --- a/fs/ceph/mds_client.h
>>>>> +++ b/fs/ceph/mds_client.h
>>>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>>>     };
>>>>>     
>>>>>     struct ceph_mds_reply_dir_entry {
>>>>> +	struct dentry		      *dentry;
>>>>>     	char                          *name;
>>>>>     	u8			      *altname;
>>>>>     	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 11:21       ` Jeff Layton
  2022-03-10 11:29         ` Xiubo Li
@ 2022-03-10 12:21         ` Xiubo Li
  2022-03-10 13:12           ` Jeff Layton
  1 sibling, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-03-10 12:21 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 7:21 PM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
>> On 3/10/22 6:36 PM, Jeff Layton wrote:
>>> On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
>>>> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>
>>>>> For the readdir request the dentries will be pasred and dencrypted
>>>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>>>> get the dentry name from the dentry cache instead of parsing and
>>>>> dencrypting them again. This could improve performance.
>>>>>
>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>> ---
>>>>>
>>>>> V7:
>>>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>>>
>>>>> V6:
>>>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>>>      instead, since we are limiting the max length of snapshot name to
>>>>>      240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>>>
>>>>> V5:
>>>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>>>> - release the rde->dentry in destroy_reply_info
>>>>>
>>>>>
>>>>>
>>>>>     fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>>>     fs/ceph/inode.c      |  7 ++++++
>>>>>     fs/ceph/mds_client.c |  1 +
>>>>>     fs/ceph/mds_client.h |  1 +
>>>>>     4 files changed, 34 insertions(+), 31 deletions(-)
>>>>>
>>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>>> index 6df2a91af236..2397c34e9173 100644
>>>>> --- a/fs/ceph/dir.c
>>>>> +++ b/fs/ceph/dir.c
>>>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     	int err;
>>>>>     	unsigned frag = -1;
>>>>>     	struct ceph_mds_reply_info_parsed *rinfo;
>>>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>>>> +	char *dentry_name = NULL;
>>>>>     
>>>>>     	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>>>     	if (dfi->file_info.flags & CEPH_F_ATEND)
>>>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     		spin_unlock(&ci->i_ceph_lock);
>>>>>     	}
>>>>>     
>>>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>>>> -	if (err < 0)
>>>>> -		goto out;
>>>>> -
>>>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>>>> -	if (err < 0)
>>>>> -		goto out;
>>>>> -
>>>>>     	/* proceed with a normal readdir */
>>>>>     more:
>>>>>     	/* do we have the correct frag content buffered? */
>>>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     			}
>>>>>     		}
>>>>>     	}
>>>>> +
>>>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>>>> +	if (!dentry_name) {
>>>>> +		err = -ENOMEM;
>>>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>>>> +		dfi->last_readdir = NULL;
>>>>> +		goto out;
>>>>> +	}
>>>>> +
>>>>>     	for (; i < rinfo->dir_nr; i++) {
>>>>>     		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>>>> -		struct ceph_fname fname = { .dir	= inode,
>>>>> -					    .name	= rde->name,
>>>>> -					    .name_len	= rde->name_len,
>>>>> -					    .ctext	= rde->altname,
>>>>> -					    .ctext_len	= rde->altname_len };
>>>>> -		u32 olen = oname.len;
>>>>> -
>>>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>>>> -		if (err) {
>>>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>>>> -			       rde->name_len, rde->name, err);
>>>>> -			goto out;
>>>>> -		}
>>>>> +		struct dentry *dn = rde->dentry;
>>>>> +		int name_len;
>>>>>     
>>>>>     		BUG_ON(rde->offset < ctx->pos);
>>>>>     		BUG_ON(!rde->inode.in);
>>>>> +		BUG_ON(!rde->dentry);
>>>>>     
>>>>>     		ctx->pos = rde->offset;
>>>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>>>> -		     i, rinfo->dir_nr, ctx->pos,
>>>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>>>     
>>>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>>>> +		spin_lock(&dn->d_lock);
>>>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>>>> +		name_len = dn->d_name.len;
>>>>> +		spin_unlock(&dn->d_lock);
>>>>> +
>>>> Hi Jeff,
>>>>
>>>> BTW, does the dn->d_lock is must here ? From the code comments, the
>>>> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
>>>>
>>>> In ceph code I found some places are using the d_lock when accessing the
>>>> d_name, some are not. And in none ceph code, they will almost never use
>>>> the d_lock to protect the d_name.
>>>>
>>> It's probably not needed. The d_name can only change if there are no
>>> other outstanding references to the dentry.
>> If so, the dentry_name = kmalloc() is not needed any more.
>>
>>
> I take it back.
>
> I think the d_lock is the only thing that protects this against a
> concurrent rename. Have a look at __d_move, and (in particular) the call
> to copy_name. The d_lock is what guards against that happening while
> you're working with it here.
>
> You might be able to get away without using it, but you'll need to
> follow similar rules as RCU walk.
>
> FWIW, the best source for this info is Neil Brown's writeup in
> Documentation/filesystems/path_lookup.rst.

IMO we can get rid of the 'd_lock' by:

char *dentry_name = READ_ONCE(dentry->d_name.name);

if (!dir_emit(ctx, dentry_name, strlen(dentry_name),....

Because both the swap_names() and copy_name() won't release the memory 
of dentry->d_name.name.

But possible we will see a corrupted dentry name if it's doing the 
copy_name() ?

>>>>> +		dentry_name[name_len] = '\0';
>>>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>>>> +
>>>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>>>     			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>>>     			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>>>     			/*
>>>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     			goto out;
>>>>>     		}
>>>>>     
>>>>> -		/* Reset the lengths to their original allocated vals */
>>>>> -		oname.len = olen;
>>>>>     		ctx->pos++;
>>>>>     	}
>>>>>     
>>>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>     	err = 0;
>>>>>     	dout("readdir %p file %p done.\n", inode, file);
>>>>>     out:
>>>>> -	ceph_fname_free_buffer(inode, &tname);
>>>>> -	ceph_fname_free_buffer(inode, &oname);
>>>>> +	if (dentry_name)
>>>>> +		kfree(dentry_name);
>>>>>     	return err;
>>>>>     }
>>>>>     
>>>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>>>> index b573a0f33450..19e5275eae1c 100644
>>>>> --- a/fs/ceph/inode.c
>>>>> +++ b/fs/ceph/inode.c
>>>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>     			goto out;
>>>>>     		}
>>>>>     
>>>>> +		rde->dentry = NULL;
>>>>>     		dname.name = oname.name;
>>>>>     		dname.len = oname.len;
>>>>>     		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>     			goto retry_lookup;
>>>>>     		}
>>>>>     
>>>>> +		/*
>>>>> +		 * ceph_readdir will use the dentry to get the name
>>>>> +		 * to avoid doing the dencrypt again there.
>>>>> +		 */
>>>>> +		rde->dentry = dget(dn);
>>>>> +
>>>>>     		/* inode */
>>>>>     		if (d_really_is_positive(dn)) {
>>>>>     			in = d_inode(dn);
>>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>>>> --- a/fs/ceph/mds_client.c
>>>>> +++ b/fs/ceph/mds_client.c
>>>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>>>     
>>>>>     		kfree(rde->inode.fscrypt_auth);
>>>>>     		kfree(rde->inode.fscrypt_file);
>>>>> +		dput(rde->dentry);
>>>>>     	}
>>>>>     	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>>>     }
>>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>>> index 0dfe24f94567..663d7754d57d 100644
>>>>> --- a/fs/ceph/mds_client.h
>>>>> +++ b/fs/ceph/mds_client.h
>>>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>>>     };
>>>>>     
>>>>>     struct ceph_mds_reply_dir_entry {
>>>>> +	struct dentry		      *dentry;
>>>>>     	char                          *name;
>>>>>     	u8			      *altname;
>>>>>     	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 12:21         ` Xiubo Li
@ 2022-03-10 13:12           ` Jeff Layton
  2022-03-10 14:14             ` Xiubo Li
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2022-03-10 13:12 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Thu, 2022-03-10 at 20:21 +0800, Xiubo Li wrote:
> On 3/10/22 7:21 PM, Jeff Layton wrote:
> > On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
> > > On 3/10/22 6:36 PM, Jeff Layton wrote:
> > > > On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
> > > > > On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > 
> > > > > > For the readdir request the dentries will be pasred and dencrypted
> > > > > > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > > > > > get the dentry name from the dentry cache instead of parsing and
> > > > > > dencrypting them again. This could improve performance.
> > > > > > 
> > > > > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > > > > ---
> > > > > > 
> > > > > > V7:
> > > > > > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > > > > > 
> > > > > > V6:
> > > > > > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> > > > > >      instead, since we are limiting the max length of snapshot name to
> > > > > >      240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > > > > > 
> > > > > > V5:
> > > > > > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > > > > > - release the rde->dentry in destroy_reply_info
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > >     fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> > > > > >     fs/ceph/inode.c      |  7 ++++++
> > > > > >     fs/ceph/mds_client.c |  1 +
> > > > > >     fs/ceph/mds_client.h |  1 +
> > > > > >     4 files changed, 34 insertions(+), 31 deletions(-)
> > > > > > 
> > > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > > > > index 6df2a91af236..2397c34e9173 100644
> > > > > > --- a/fs/ceph/dir.c
> > > > > > +++ b/fs/ceph/dir.c
> > > > > > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > >     	int err;
> > > > > >     	unsigned frag = -1;
> > > > > >     	struct ceph_mds_reply_info_parsed *rinfo;
> > > > > > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > > > > > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > > > > > +	char *dentry_name = NULL;
> > > > > >     
> > > > > >     	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> > > > > >     	if (dfi->file_info.flags & CEPH_F_ATEND)
> > > > > > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > >     		spin_unlock(&ci->i_ceph_lock);
> > > > > >     	}
> > > > > >     
> > > > > > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > > > > > -	if (err < 0)
> > > > > > -		goto out;
> > > > > > -
> > > > > > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > > > > > -	if (err < 0)
> > > > > > -		goto out;
> > > > > > -
> > > > > >     	/* proceed with a normal readdir */
> > > > > >     more:
> > > > > >     	/* do we have the correct frag content buffered? */
> > > > > > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > >     			}
> > > > > >     		}
> > > > > >     	}
> > > > > > +
> > > > > > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > > > > > +	if (!dentry_name) {
> > > > > > +		err = -ENOMEM;
> > > > > > +		ceph_mdsc_put_request(dfi->last_readdir);
> > > > > > +		dfi->last_readdir = NULL;
> > > > > > +		goto out;
> > > > > > +	}
> > > > > > +
> > > > > >     	for (; i < rinfo->dir_nr; i++) {
> > > > > >     		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > > > > > -		struct ceph_fname fname = { .dir	= inode,
> > > > > > -					    .name	= rde->name,
> > > > > > -					    .name_len	= rde->name_len,
> > > > > > -					    .ctext	= rde->altname,
> > > > > > -					    .ctext_len	= rde->altname_len };
> > > > > > -		u32 olen = oname.len;
> > > > > > -
> > > > > > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > > > > > -		if (err) {
> > > > > > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > > > > > -			       rde->name_len, rde->name, err);
> > > > > > -			goto out;
> > > > > > -		}
> > > > > > +		struct dentry *dn = rde->dentry;
> > > > > > +		int name_len;
> > > > > >     
> > > > > >     		BUG_ON(rde->offset < ctx->pos);
> > > > > >     		BUG_ON(!rde->inode.in);
> > > > > > +		BUG_ON(!rde->dentry);
> > > > > >     
> > > > > >     		ctx->pos = rde->offset;
> > > > > > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > > > > > -		     i, rinfo->dir_nr, ctx->pos,
> > > > > > -		     rde->name_len, rde->name, &rde->inode.in);
> > > > > >     
> > > > > > -		if (!dir_emit(ctx, oname.name, oname.len,
> > > > > > +		spin_lock(&dn->d_lock);
> > > > > > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > > > > > +		name_len = dn->d_name.len;
> > > > > > +		spin_unlock(&dn->d_lock);
> > > > > > +
> > > > > Hi Jeff,
> > > > > 
> > > > > BTW, does the dn->d_lock is must here ? From the code comments, the
> > > > > d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
> > > > > 
> > > > > In ceph code I found some places are using the d_lock when accessing the
> > > > > d_name, some are not. And in none ceph code, they will almost never use
> > > > > the d_lock to protect the d_name.
> > > > > 
> > > > It's probably not needed. The d_name can only change if there are no
> > > > other outstanding references to the dentry.
> > > If so, the dentry_name = kmalloc() is not needed any more.
> > > 
> > > 
> > I take it back.
> > 
> > I think the d_lock is the only thing that protects this against a
> > concurrent rename. Have a look at __d_move, and (in particular) the call
> > to copy_name. The d_lock is what guards against that happening while
> > you're working with it here.
> > 
> > You might be able to get away without using it, but you'll need to
> > follow similar rules as RCU walk.
> > 
> > FWIW, the best source for this info is Neil Brown's writeup in
> > Documentation/filesystems/path_lookup.rst.
> 

That should have been Documentation/filesystems/path-lookup.rst


> IMO we can get rid of the 'd_lock' by:
> 
> char *dentry_name = READ_ONCE(dentry->d_name.name);
> 
> if (!dir_emit(ctx, dentry_name, strlen(dentry_name),....
> 
> Because both the swap_names() and copy_name() won't release the memory 
> of dentry->d_name.name.
> 
> But possible we will see a corrupted dentry name if it's doing the 
> copy_name() ?
> 

ceph already satisfies readdir out of the dcache in some cases. See
__dcache_readdir, which basically walks a list of dentry pointers
stashed in the pagecache and then calls dir_emit on each (without the
dentry->d_lock held!). That's a bit different though since we have some
assurance that things won't change in that case (it may still be racy
too -- not sure).

I don't think you have the same guarantees here. What might be best is
to either allocate (or declare on-stack) a NAME_MAX buffer, and copy
into that from the d_name while you hold the lock. Then drop the lock
and dir_emit.

I wonder if we're going about this the wrong way...

Once we've decrypted the names, we don't need the crypttext anymore at
all. Maybe it would be better to just decrypt the names in-place, or
copy back over the crypttext after decrypting? Then we might not even
need some of these changes in readdir at all.


> > > > > > +		dentry_name[name_len] = '\0';
> > > > > > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > > > > > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > > > > > +
> > > > > > +		if (!dir_emit(ctx, dentry_name, name_len,
> > > > > >     			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> > > > > >     			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> > > > > >     			/*
> > > > > > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > >     			goto out;
> > > > > >     		}
> > > > > >     
> > > > > > -		/* Reset the lengths to their original allocated vals */
> > > > > > -		oname.len = olen;
> > > > > >     		ctx->pos++;
> > > > > >     	}
> > > > > >     
> > > > > > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > >     	err = 0;
> > > > > >     	dout("readdir %p file %p done.\n", inode, file);
> > > > > >     out:
> > > > > > -	ceph_fname_free_buffer(inode, &tname);
> > > > > > -	ceph_fname_free_buffer(inode, &oname);
> > > > > > +	if (dentry_name)
> > > > > > +		kfree(dentry_name);
> > > > > >     	return err;
> > > > > >     }
> > > > > >     
> > > > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > > > > index b573a0f33450..19e5275eae1c 100644
> > > > > > --- a/fs/ceph/inode.c
> > > > > > +++ b/fs/ceph/inode.c
> > > > > > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > > > > >     			goto out;
> > > > > >     		}
> > > > > >     
> > > > > > +		rde->dentry = NULL;
> > > > > >     		dname.name = oname.name;
> > > > > >     		dname.len = oname.len;
> > > > > >     		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > > > > > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > > > > >     			goto retry_lookup;
> > > > > >     		}
> > > > > >     
> > > > > > +		/*
> > > > > > +		 * ceph_readdir will use the dentry to get the name
> > > > > > +		 * to avoid doing the dencrypt again there.
> > > > > > +		 */
> > > > > > +		rde->dentry = dget(dn);
> > > > > > +
> > > > > >     		/* inode */
> > > > > >     		if (d_really_is_positive(dn)) {
> > > > > >     			in = d_inode(dn);
> > > > > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > > > > index 8d704ddd7291..9e0a51ef1dfa 100644
> > > > > > --- a/fs/ceph/mds_client.c
> > > > > > +++ b/fs/ceph/mds_client.c
> > > > > > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> > > > > >     
> > > > > >     		kfree(rde->inode.fscrypt_auth);
> > > > > >     		kfree(rde->inode.fscrypt_file);
> > > > > > +		dput(rde->dentry);
> > > > > >     	}
> > > > > >     	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> > > > > >     }
> > > > > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > > > > > index 0dfe24f94567..663d7754d57d 100644
> > > > > > --- a/fs/ceph/mds_client.h
> > > > > > +++ b/fs/ceph/mds_client.h
> > > > > > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> > > > > >     };
> > > > > >     
> > > > > >     struct ceph_mds_reply_dir_entry {
> > > > > > +	struct dentry		      *dentry;
> > > > > >     	char                          *name;
> > > > > >     	u8			      *altname;
> > > > > >     	u32                           name_len;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 13:12           ` Jeff Layton
@ 2022-03-10 14:14             ` Xiubo Li
  2022-03-10 14:20               ` Jeff Layton
  0 siblings, 1 reply; 18+ messages in thread
From: Xiubo Li @ 2022-03-10 14:14 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 9:12 PM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 20:21 +0800, Xiubo Li wrote:
>> On 3/10/22 7:21 PM, Jeff Layton wrote:
>>> On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
>>>> On 3/10/22 6:36 PM, Jeff Layton wrote:
>>>>> On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
>>>>>> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
>>>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>>>
>>>>>>> For the readdir request the dentries will be pasred and dencrypted
>>>>>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>>>>>> get the dentry name from the dentry cache instead of parsing and
>>>>>>> dencrypting them again. This could improve performance.
>>>>>>>
>>>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>>>> ---
>>>>>>>
>>>>>>> V7:
>>>>>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>>>>>
>>>>>>> V6:
>>>>>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>>>>>       instead, since we are limiting the max length of snapshot name to
>>>>>>>       240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>>>>>
>>>>>>> V5:
>>>>>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>>>>>> - release the rde->dentry in destroy_reply_info
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>      fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>>>>>      fs/ceph/inode.c      |  7 ++++++
>>>>>>>      fs/ceph/mds_client.c |  1 +
>>>>>>>      fs/ceph/mds_client.h |  1 +
>>>>>>>      4 files changed, 34 insertions(+), 31 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>>>>> index 6df2a91af236..2397c34e9173 100644
>>>>>>> --- a/fs/ceph/dir.c
>>>>>>> +++ b/fs/ceph/dir.c
>>>>>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>      	int err;
>>>>>>>      	unsigned frag = -1;
>>>>>>>      	struct ceph_mds_reply_info_parsed *rinfo;
>>>>>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>>>>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>>>>>> +	char *dentry_name = NULL;
>>>>>>>      
>>>>>>>      	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>>>>>      	if (dfi->file_info.flags & CEPH_F_ATEND)
>>>>>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>      		spin_unlock(&ci->i_ceph_lock);
>>>>>>>      	}
>>>>>>>      
>>>>>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>>>>>> -	if (err < 0)
>>>>>>> -		goto out;
>>>>>>> -
>>>>>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>>>>>> -	if (err < 0)
>>>>>>> -		goto out;
>>>>>>> -
>>>>>>>      	/* proceed with a normal readdir */
>>>>>>>      more:
>>>>>>>      	/* do we have the correct frag content buffered? */
>>>>>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>      			}
>>>>>>>      		}
>>>>>>>      	}
>>>>>>> +
>>>>>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>>>>>> +	if (!dentry_name) {
>>>>>>> +		err = -ENOMEM;
>>>>>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>>>>>> +		dfi->last_readdir = NULL;
>>>>>>> +		goto out;
>>>>>>> +	}
>>>>>>> +
>>>>>>>      	for (; i < rinfo->dir_nr; i++) {
>>>>>>>      		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>>>>>> -		struct ceph_fname fname = { .dir	= inode,
>>>>>>> -					    .name	= rde->name,
>>>>>>> -					    .name_len	= rde->name_len,
>>>>>>> -					    .ctext	= rde->altname,
>>>>>>> -					    .ctext_len	= rde->altname_len };
>>>>>>> -		u32 olen = oname.len;
>>>>>>> -
>>>>>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>>>>>> -		if (err) {
>>>>>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>>>>>> -			       rde->name_len, rde->name, err);
>>>>>>> -			goto out;
>>>>>>> -		}
>>>>>>> +		struct dentry *dn = rde->dentry;
>>>>>>> +		int name_len;
>>>>>>>      
>>>>>>>      		BUG_ON(rde->offset < ctx->pos);
>>>>>>>      		BUG_ON(!rde->inode.in);
>>>>>>> +		BUG_ON(!rde->dentry);
>>>>>>>      
>>>>>>>      		ctx->pos = rde->offset;
>>>>>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>>>>>> -		     i, rinfo->dir_nr, ctx->pos,
>>>>>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>>>>>      
>>>>>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>>>>>> +		spin_lock(&dn->d_lock);
>>>>>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>>>>>> +		name_len = dn->d_name.len;
>>>>>>> +		spin_unlock(&dn->d_lock);
>>>>>>> +
>>>>>> Hi Jeff,
>>>>>>
>>>>>> BTW, does the dn->d_lock is must here ? From the code comments, the
>>>>>> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
>>>>>>
>>>>>> In ceph code I found some places are using the d_lock when accessing the
>>>>>> d_name, some are not. And in none ceph code, they will almost never use
>>>>>> the d_lock to protect the d_name.
>>>>>>
>>>>> It's probably not needed. The d_name can only change if there are no
>>>>> other outstanding references to the dentry.
>>>> If so, the dentry_name = kmalloc() is not needed any more.
>>>>
>>>>
>>> I take it back.
>>>
>>> I think the d_lock is the only thing that protects this against a
>>> concurrent rename. Have a look at __d_move, and (in particular) the call
>>> to copy_name. The d_lock is what guards against that happening while
>>> you're working with it here.
>>>
>>> You might be able to get away without using it, but you'll need to
>>> follow similar rules as RCU walk.
>>>
>>> FWIW, the best source for this info is Neil Brown's writeup in
>>> Documentation/filesystems/path_lookup.rst.
> That should have been Documentation/filesystems/path-lookup.rst
>
>
>> IMO we can get rid of the 'd_lock' by:
>>
>> char *dentry_name = READ_ONCE(dentry->d_name.name);
>>
>> if (!dir_emit(ctx, dentry_name, strlen(dentry_name),....
>>
>> Because both the swap_names() and copy_name() won't release the memory
>> of dentry->d_name.name.
>>
>> But possible we will see a corrupted dentry name if it's doing the
>> copy_name() ?
>>
> ceph already satisfies readdir out of the dcache in some cases. See
> __dcache_readdir, which basically walks a list of dentry pointers
> stashed in the pagecache and then calls dir_emit on each (without the
> dentry->d_lock held!). That's a bit different though since we have some
> assurance that things won't change in that case (it may still be racy
> too -- not sure).
>
> I don't think you have the same guarantees here. What might be best is
> to either allocate (or declare on-stack) a NAME_MAX buffer, and copy
> into that from the d_name while you hold the lock. Then drop the lock
> and dir_emit.
>
> I wonder if we're going about this the wrong way...
>
> Once we've decrypted the names, we don't need the crypttext anymore at
> all. Maybe it would be better to just decrypt the names in-place, or
> copy back over the crypttext after decrypting? Then we might not even
> need some of these changes in readdir at all.
>
For this, I think you mean that in ceph_readdir_prepopulate() when the 
first time decrypting the crypttext we will save it in rde->name or 
rde->altername, then in ceph_readdir() to emit it directly or 
base64_encode it again without key.

Right ?


>>>>>>> +		dentry_name[name_len] = '\0';
>>>>>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>>>>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>>>>>> +
>>>>>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>>>>>      			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>>>>>      			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>>>>>      			/*
>>>>>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>      			goto out;
>>>>>>>      		}
>>>>>>>      
>>>>>>> -		/* Reset the lengths to their original allocated vals */
>>>>>>> -		oname.len = olen;
>>>>>>>      		ctx->pos++;
>>>>>>>      	}
>>>>>>>      
>>>>>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>      	err = 0;
>>>>>>>      	dout("readdir %p file %p done.\n", inode, file);
>>>>>>>      out:
>>>>>>> -	ceph_fname_free_buffer(inode, &tname);
>>>>>>> -	ceph_fname_free_buffer(inode, &oname);
>>>>>>> +	if (dentry_name)
>>>>>>> +		kfree(dentry_name);
>>>>>>>      	return err;
>>>>>>>      }
>>>>>>>      
>>>>>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>>>>>> index b573a0f33450..19e5275eae1c 100644
>>>>>>> --- a/fs/ceph/inode.c
>>>>>>> +++ b/fs/ceph/inode.c
>>>>>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>>>      			goto out;
>>>>>>>      		}
>>>>>>>      
>>>>>>> +		rde->dentry = NULL;
>>>>>>>      		dname.name = oname.name;
>>>>>>>      		dname.len = oname.len;
>>>>>>>      		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>>>>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>>>      			goto retry_lookup;
>>>>>>>      		}
>>>>>>>      
>>>>>>> +		/*
>>>>>>> +		 * ceph_readdir will use the dentry to get the name
>>>>>>> +		 * to avoid doing the dencrypt again there.
>>>>>>> +		 */
>>>>>>> +		rde->dentry = dget(dn);
>>>>>>> +
>>>>>>>      		/* inode */
>>>>>>>      		if (d_really_is_positive(dn)) {
>>>>>>>      			in = d_inode(dn);
>>>>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>>>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>>>>>> --- a/fs/ceph/mds_client.c
>>>>>>> +++ b/fs/ceph/mds_client.c
>>>>>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>>>>>      
>>>>>>>      		kfree(rde->inode.fscrypt_auth);
>>>>>>>      		kfree(rde->inode.fscrypt_file);
>>>>>>> +		dput(rde->dentry);
>>>>>>>      	}
>>>>>>>      	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>>>>>      }
>>>>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>>>>> index 0dfe24f94567..663d7754d57d 100644
>>>>>>> --- a/fs/ceph/mds_client.h
>>>>>>> +++ b/fs/ceph/mds_client.h
>>>>>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>>>>>      };
>>>>>>>      
>>>>>>>      struct ceph_mds_reply_dir_entry {
>>>>>>> +	struct dentry		      *dentry;
>>>>>>>      	char                          *name;
>>>>>>>      	u8			      *altname;
>>>>>>>      	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 14:14             ` Xiubo Li
@ 2022-03-10 14:20               ` Jeff Layton
  2022-03-10 14:24                 ` Xiubo Li
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff Layton @ 2022-03-10 14:20 UTC (permalink / raw)
  To: Xiubo Li; +Cc: idryomov, vshankar, lhenriques, ceph-devel

On Thu, 2022-03-10 at 22:14 +0800, Xiubo Li wrote:
> On 3/10/22 9:12 PM, Jeff Layton wrote:
> > On Thu, 2022-03-10 at 20:21 +0800, Xiubo Li wrote:
> > > On 3/10/22 7:21 PM, Jeff Layton wrote:
> > > > On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
> > > > > On 3/10/22 6:36 PM, Jeff Layton wrote:
> > > > > > On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
> > > > > > > On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
> > > > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > > > 
> > > > > > > > For the readdir request the dentries will be pasred and dencrypted
> > > > > > > > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
> > > > > > > > get the dentry name from the dentry cache instead of parsing and
> > > > > > > > dencrypting them again. This could improve performance.
> > > > > > > > 
> > > > > > > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > > > > > > ---
> > > > > > > > 
> > > > > > > > V7:
> > > > > > > > - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
> > > > > > > > 
> > > > > > > > V6:
> > > > > > > > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
> > > > > > > >       instead, since we are limiting the max length of snapshot name to
> > > > > > > >       240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
> > > > > > > > 
> > > > > > > > V5:
> > > > > > > > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
> > > > > > > > - release the rde->dentry in destroy_reply_info
> > > > > > > > 
> > > > > > > > 
> > > > > > > > 
> > > > > > > >      fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
> > > > > > > >      fs/ceph/inode.c      |  7 ++++++
> > > > > > > >      fs/ceph/mds_client.c |  1 +
> > > > > > > >      fs/ceph/mds_client.h |  1 +
> > > > > > > >      4 files changed, 34 insertions(+), 31 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > > > > > > index 6df2a91af236..2397c34e9173 100644
> > > > > > > > --- a/fs/ceph/dir.c
> > > > > > > > +++ b/fs/ceph/dir.c
> > > > > > > > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > > > >      	int err;
> > > > > > > >      	unsigned frag = -1;
> > > > > > > >      	struct ceph_mds_reply_info_parsed *rinfo;
> > > > > > > > -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
> > > > > > > > -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
> > > > > > > > +	char *dentry_name = NULL;
> > > > > > > >      
> > > > > > > >      	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
> > > > > > > >      	if (dfi->file_info.flags & CEPH_F_ATEND)
> > > > > > > > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > > > >      		spin_unlock(&ci->i_ceph_lock);
> > > > > > > >      	}
> > > > > > > >      
> > > > > > > > -	err = ceph_fname_alloc_buffer(inode, &tname);
> > > > > > > > -	if (err < 0)
> > > > > > > > -		goto out;
> > > > > > > > -
> > > > > > > > -	err = ceph_fname_alloc_buffer(inode, &oname);
> > > > > > > > -	if (err < 0)
> > > > > > > > -		goto out;
> > > > > > > > -
> > > > > > > >      	/* proceed with a normal readdir */
> > > > > > > >      more:
> > > > > > > >      	/* do we have the correct frag content buffered? */
> > > > > > > > @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > > > >      			}
> > > > > > > >      		}
> > > > > > > >      	}
> > > > > > > > +
> > > > > > > > +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
> > > > > > > > +	if (!dentry_name) {
> > > > > > > > +		err = -ENOMEM;
> > > > > > > > +		ceph_mdsc_put_request(dfi->last_readdir);
> > > > > > > > +		dfi->last_readdir = NULL;
> > > > > > > > +		goto out;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > >      	for (; i < rinfo->dir_nr; i++) {
> > > > > > > >      		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
> > > > > > > > -		struct ceph_fname fname = { .dir	= inode,
> > > > > > > > -					    .name	= rde->name,
> > > > > > > > -					    .name_len	= rde->name_len,
> > > > > > > > -					    .ctext	= rde->altname,
> > > > > > > > -					    .ctext_len	= rde->altname_len };
> > > > > > > > -		u32 olen = oname.len;
> > > > > > > > -
> > > > > > > > -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
> > > > > > > > -		if (err) {
> > > > > > > > -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
> > > > > > > > -			       rde->name_len, rde->name, err);
> > > > > > > > -			goto out;
> > > > > > > > -		}
> > > > > > > > +		struct dentry *dn = rde->dentry;
> > > > > > > > +		int name_len;
> > > > > > > >      
> > > > > > > >      		BUG_ON(rde->offset < ctx->pos);
> > > > > > > >      		BUG_ON(!rde->inode.in);
> > > > > > > > +		BUG_ON(!rde->dentry);
> > > > > > > >      
> > > > > > > >      		ctx->pos = rde->offset;
> > > > > > > > -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
> > > > > > > > -		     i, rinfo->dir_nr, ctx->pos,
> > > > > > > > -		     rde->name_len, rde->name, &rde->inode.in);
> > > > > > > >      
> > > > > > > > -		if (!dir_emit(ctx, oname.name, oname.len,
> > > > > > > > +		spin_lock(&dn->d_lock);
> > > > > > > > +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
> > > > > > > > +		name_len = dn->d_name.len;
> > > > > > > > +		spin_unlock(&dn->d_lock);
> > > > > > > > +
> > > > > > > Hi Jeff,
> > > > > > > 
> > > > > > > BTW, does the dn->d_lock is must here ? From the code comments, the
> > > > > > > d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
> > > > > > > 
> > > > > > > In ceph code I found some places are using the d_lock when accessing the
> > > > > > > d_name, some are not. And in none ceph code, they will almost never use
> > > > > > > the d_lock to protect the d_name.
> > > > > > > 
> > > > > > It's probably not needed. The d_name can only change if there are no
> > > > > > other outstanding references to the dentry.
> > > > > If so, the dentry_name = kmalloc() is not needed any more.
> > > > > 
> > > > > 
> > > > I take it back.
> > > > 
> > > > I think the d_lock is the only thing that protects this against a
> > > > concurrent rename. Have a look at __d_move, and (in particular) the call
> > > > to copy_name. The d_lock is what guards against that happening while
> > > > you're working with it here.
> > > > 
> > > > You might be able to get away without using it, but you'll need to
> > > > follow similar rules as RCU walk.
> > > > 
> > > > FWIW, the best source for this info is Neil Brown's writeup in
> > > > Documentation/filesystems/path_lookup.rst.
> > That should have been Documentation/filesystems/path-lookup.rst
> > 
> > 
> > > IMO we can get rid of the 'd_lock' by:
> > > 
> > > char *dentry_name = READ_ONCE(dentry->d_name.name);
> > > 
> > > if (!dir_emit(ctx, dentry_name, strlen(dentry_name),....
> > > 
> > > Because both the swap_names() and copy_name() won't release the memory
> > > of dentry->d_name.name.
> > > 
> > > But possible we will see a corrupted dentry name if it's doing the
> > > copy_name() ?
> > > 
> > ceph already satisfies readdir out of the dcache in some cases. See
> > __dcache_readdir, which basically walks a list of dentry pointers
> > stashed in the pagecache and then calls dir_emit on each (without the
> > dentry->d_lock held!). That's a bit different though since we have some
> > assurance that things won't change in that case (it may still be racy
> > too -- not sure).
> > 
> > I don't think you have the same guarantees here. What might be best is
> > to either allocate (or declare on-stack) a NAME_MAX buffer, and copy
> > into that from the d_name while you hold the lock. Then drop the lock
> > and dir_emit.
> > 
> > I wonder if we're going about this the wrong way...
> > 
> > Once we've decrypted the names, we don't need the crypttext anymore at
> > all. Maybe it would be better to just decrypt the names in-place, or
> > copy back over the crypttext after decrypting? Then we might not even
> > need some of these changes in readdir at all.
> > 
> For this, I think you mean that in ceph_readdir_prepopulate() when the 
> first time decrypting the crypttext we will save it in rde->name or 
> rde->altername, then in ceph_readdir() to emit it directly or 
> base64_encode it again without key.
> 
> Right ?
> 

Right.

Or, possibly even do that in an extra step before
ceph_readdir_prepopulate. Then we might not need as many changes to that
function.

> 
> > > > > > > > +		dentry_name[name_len] = '\0';
> > > > > > > > +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
> > > > > > > > +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
> > > > > > > > +
> > > > > > > > +		if (!dir_emit(ctx, dentry_name, name_len,
> > > > > > > >      			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
> > > > > > > >      			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
> > > > > > > >      			/*
> > > > > > > > @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > > > >      			goto out;
> > > > > > > >      		}
> > > > > > > >      
> > > > > > > > -		/* Reset the lengths to their original allocated vals */
> > > > > > > > -		oname.len = olen;
> > > > > > > >      		ctx->pos++;
> > > > > > > >      	}
> > > > > > > >      
> > > > > > > > @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> > > > > > > >      	err = 0;
> > > > > > > >      	dout("readdir %p file %p done.\n", inode, file);
> > > > > > > >      out:
> > > > > > > > -	ceph_fname_free_buffer(inode, &tname);
> > > > > > > > -	ceph_fname_free_buffer(inode, &oname);
> > > > > > > > +	if (dentry_name)
> > > > > > > > +		kfree(dentry_name);
> > > > > > > >      	return err;
> > > > > > > >      }
> > > > > > > >      
> > > > > > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > > > > > > index b573a0f33450..19e5275eae1c 100644
> > > > > > > > --- a/fs/ceph/inode.c
> > > > > > > > +++ b/fs/ceph/inode.c
> > > > > > > > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > > > > > > >      			goto out;
> > > > > > > >      		}
> > > > > > > >      
> > > > > > > > +		rde->dentry = NULL;
> > > > > > > >      		dname.name = oname.name;
> > > > > > > >      		dname.len = oname.len;
> > > > > > > >      		dname.hash = full_name_hash(parent, dname.name, dname.len);
> > > > > > > > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
> > > > > > > >      			goto retry_lookup;
> > > > > > > >      		}
> > > > > > > >      
> > > > > > > > +		/*
> > > > > > > > +		 * ceph_readdir will use the dentry to get the name
> > > > > > > > +		 * to avoid doing the dencrypt again there.
> > > > > > > > +		 */
> > > > > > > > +		rde->dentry = dget(dn);
> > > > > > > > +
> > > > > > > >      		/* inode */
> > > > > > > >      		if (d_really_is_positive(dn)) {
> > > > > > > >      			in = d_inode(dn);
> > > > > > > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > > > > > > index 8d704ddd7291..9e0a51ef1dfa 100644
> > > > > > > > --- a/fs/ceph/mds_client.c
> > > > > > > > +++ b/fs/ceph/mds_client.c
> > > > > > > > @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
> > > > > > > >      
> > > > > > > >      		kfree(rde->inode.fscrypt_auth);
> > > > > > > >      		kfree(rde->inode.fscrypt_file);
> > > > > > > > +		dput(rde->dentry);
> > > > > > > >      	}
> > > > > > > >      	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
> > > > > > > >      }
> > > > > > > > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > > > > > > > index 0dfe24f94567..663d7754d57d 100644
> > > > > > > > --- a/fs/ceph/mds_client.h
> > > > > > > > +++ b/fs/ceph/mds_client.h
> > > > > > > > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
> > > > > > > >      };
> > > > > > > >      
> > > > > > > >      struct ceph_mds_reply_dir_entry {
> > > > > > > > +	struct dentry		      *dentry;
> > > > > > > >      	char                          *name;
> > > > > > > >      	u8			      *altname;
> > > > > > > >      	u32                           name_len;
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir
  2022-03-10 14:20               ` Jeff Layton
@ 2022-03-10 14:24                 ` Xiubo Li
  0 siblings, 0 replies; 18+ messages in thread
From: Xiubo Li @ 2022-03-10 14:24 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, vshankar, lhenriques, ceph-devel


On 3/10/22 10:20 PM, Jeff Layton wrote:
> On Thu, 2022-03-10 at 22:14 +0800, Xiubo Li wrote:
>> On 3/10/22 9:12 PM, Jeff Layton wrote:
>>> On Thu, 2022-03-10 at 20:21 +0800, Xiubo Li wrote:
>>>> On 3/10/22 7:21 PM, Jeff Layton wrote:
>>>>> On Thu, 2022-03-10 at 18:54 +0800, Xiubo Li wrote:
>>>>>> On 3/10/22 6:36 PM, Jeff Layton wrote:
>>>>>>> On Thu, 2022-03-10 at 14:08 +0800, Xiubo Li wrote:
>>>>>>>> On 3/9/22 9:59 PM, xiubli@redhat.com wrote:
>>>>>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>>>>>
>>>>>>>>> For the readdir request the dentries will be pasred and dencrypted
>>>>>>>>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just
>>>>>>>>> get the dentry name from the dentry cache instead of parsing and
>>>>>>>>> dencrypting them again. This could improve performance.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>>>>>> ---
>>>>>>>>>
>>>>>>>>> V7:
>>>>>>>>> - Fix the xfstest generic/006 crash bug about the rde->dentry == NULL.
>>>>>>>>>
>>>>>>>>> V6:
>>>>>>>>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX
>>>>>>>>>        instead, since we are limiting the max length of snapshot name to
>>>>>>>>>        240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>).
>>>>>>>>>
>>>>>>>>> V5:
>>>>>>>>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro
>>>>>>>>> - release the rde->dentry in destroy_reply_info
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>       fs/ceph/dir.c        | 56 ++++++++++++++++++++------------------------
>>>>>>>>>       fs/ceph/inode.c      |  7 ++++++
>>>>>>>>>       fs/ceph/mds_client.c |  1 +
>>>>>>>>>       fs/ceph/mds_client.h |  1 +
>>>>>>>>>       4 files changed, 34 insertions(+), 31 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>>>>>>>>> index 6df2a91af236..2397c34e9173 100644
>>>>>>>>> --- a/fs/ceph/dir.c
>>>>>>>>> +++ b/fs/ceph/dir.c
>>>>>>>>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>>>       	int err;
>>>>>>>>>       	unsigned frag = -1;
>>>>>>>>>       	struct ceph_mds_reply_info_parsed *rinfo;
>>>>>>>>> -	struct fscrypt_str tname = FSTR_INIT(NULL, 0);
>>>>>>>>> -	struct fscrypt_str oname = FSTR_INIT(NULL, 0);
>>>>>>>>> +	char *dentry_name = NULL;
>>>>>>>>>       
>>>>>>>>>       	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
>>>>>>>>>       	if (dfi->file_info.flags & CEPH_F_ATEND)
>>>>>>>>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>>>       		spin_unlock(&ci->i_ceph_lock);
>>>>>>>>>       	}
>>>>>>>>>       
>>>>>>>>> -	err = ceph_fname_alloc_buffer(inode, &tname);
>>>>>>>>> -	if (err < 0)
>>>>>>>>> -		goto out;
>>>>>>>>> -
>>>>>>>>> -	err = ceph_fname_alloc_buffer(inode, &oname);
>>>>>>>>> -	if (err < 0)
>>>>>>>>> -		goto out;
>>>>>>>>> -
>>>>>>>>>       	/* proceed with a normal readdir */
>>>>>>>>>       more:
>>>>>>>>>       	/* do we have the correct frag content buffered? */
>>>>>>>>> @@ -528,31 +519,36 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>>>       			}
>>>>>>>>>       		}
>>>>>>>>>       	}
>>>>>>>>> +
>>>>>>>>> +	dentry_name = kmalloc(NAME_MAX, GFP_KERNEL);
>>>>>>>>> +	if (!dentry_name) {
>>>>>>>>> +		err = -ENOMEM;
>>>>>>>>> +		ceph_mdsc_put_request(dfi->last_readdir);
>>>>>>>>> +		dfi->last_readdir = NULL;
>>>>>>>>> +		goto out;
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>>       	for (; i < rinfo->dir_nr; i++) {
>>>>>>>>>       		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
>>>>>>>>> -		struct ceph_fname fname = { .dir	= inode,
>>>>>>>>> -					    .name	= rde->name,
>>>>>>>>> -					    .name_len	= rde->name_len,
>>>>>>>>> -					    .ctext	= rde->altname,
>>>>>>>>> -					    .ctext_len	= rde->altname_len };
>>>>>>>>> -		u32 olen = oname.len;
>>>>>>>>> -
>>>>>>>>> -		err = ceph_fname_to_usr(&fname, &tname, &oname, NULL);
>>>>>>>>> -		if (err) {
>>>>>>>>> -			pr_err("%s unable to decode %.*s, got %d\n", __func__,
>>>>>>>>> -			       rde->name_len, rde->name, err);
>>>>>>>>> -			goto out;
>>>>>>>>> -		}
>>>>>>>>> +		struct dentry *dn = rde->dentry;
>>>>>>>>> +		int name_len;
>>>>>>>>>       
>>>>>>>>>       		BUG_ON(rde->offset < ctx->pos);
>>>>>>>>>       		BUG_ON(!rde->inode.in);
>>>>>>>>> +		BUG_ON(!rde->dentry);
>>>>>>>>>       
>>>>>>>>>       		ctx->pos = rde->offset;
>>>>>>>>> -		dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
>>>>>>>>> -		     i, rinfo->dir_nr, ctx->pos,
>>>>>>>>> -		     rde->name_len, rde->name, &rde->inode.in);
>>>>>>>>>       
>>>>>>>>> -		if (!dir_emit(ctx, oname.name, oname.len,
>>>>>>>>> +		spin_lock(&dn->d_lock);
>>>>>>>>> +		memcpy(dentry_name, dn->d_name.name, dn->d_name.len);
>>>>>>>>> +		name_len = dn->d_name.len;
>>>>>>>>> +		spin_unlock(&dn->d_lock);
>>>>>>>>> +
>>>>>>>> Hi Jeff,
>>>>>>>>
>>>>>>>> BTW, does the dn->d_lock is must here ? From the code comments, the
>>>>>>>> d_lock intends to protect the 'd_flag' and 'd_lockref.count'.
>>>>>>>>
>>>>>>>> In ceph code I found some places are using the d_lock when accessing the
>>>>>>>> d_name, some are not. And in none ceph code, they will almost never use
>>>>>>>> the d_lock to protect the d_name.
>>>>>>>>
>>>>>>> It's probably not needed. The d_name can only change if there are no
>>>>>>> other outstanding references to the dentry.
>>>>>> If so, the dentry_name = kmalloc() is not needed any more.
>>>>>>
>>>>>>
>>>>> I take it back.
>>>>>
>>>>> I think the d_lock is the only thing that protects this against a
>>>>> concurrent rename. Have a look at __d_move, and (in particular) the call
>>>>> to copy_name. The d_lock is what guards against that happening while
>>>>> you're working with it here.
>>>>>
>>>>> You might be able to get away without using it, but you'll need to
>>>>> follow similar rules as RCU walk.
>>>>>
>>>>> FWIW, the best source for this info is Neil Brown's writeup in
>>>>> Documentation/filesystems/path_lookup.rst.
>>> That should have been Documentation/filesystems/path-lookup.rst
>>>
>>>
>>>> IMO we can get rid of the 'd_lock' by:
>>>>
>>>> char *dentry_name = READ_ONCE(dentry->d_name.name);
>>>>
>>>> if (!dir_emit(ctx, dentry_name, strlen(dentry_name),....
>>>>
>>>> Because both the swap_names() and copy_name() won't release the memory
>>>> of dentry->d_name.name.
>>>>
>>>> But possible we will see a corrupted dentry name if it's doing the
>>>> copy_name() ?
>>>>
>>> ceph already satisfies readdir out of the dcache in some cases. See
>>> __dcache_readdir, which basically walks a list of dentry pointers
>>> stashed in the pagecache and then calls dir_emit on each (without the
>>> dentry->d_lock held!). That's a bit different though since we have some
>>> assurance that things won't change in that case (it may still be racy
>>> too -- not sure).
>>>
>>> I don't think you have the same guarantees here. What might be best is
>>> to either allocate (or declare on-stack) a NAME_MAX buffer, and copy
>>> into that from the d_name while you hold the lock. Then drop the lock
>>> and dir_emit.
>>>
>>> I wonder if we're going about this the wrong way...
>>>
>>> Once we've decrypted the names, we don't need the crypttext anymore at
>>> all. Maybe it would be better to just decrypt the names in-place, or
>>> copy back over the crypttext after decrypting? Then we might not even
>>> need some of these changes in readdir at all.
>>>
>> For this, I think you mean that in ceph_readdir_prepopulate() when the
>> first time decrypting the crypttext we will save it in rde->name or
>> rde->altername, then in ceph_readdir() to emit it directly or
>> base64_encode it again without key.
>>
>> Right ?
>>
> Right.
>
> Or, possibly even do that in an extra step before
> ceph_readdir_prepopulate. Then we might not need as many changes to that
> function.

Yeah, this approach sounds good too.

Let me try it.

- Xiubo


>>>>>>>>> +		dentry_name[name_len] = '\0';
>>>>>>>>> +		dout("readdir (%d/%d) -> %llx '%s' %p\n",
>>>>>>>>> +		     i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in);
>>>>>>>>> +
>>>>>>>>> +		if (!dir_emit(ctx, dentry_name, name_len,
>>>>>>>>>       			      ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
>>>>>>>>>       			      le32_to_cpu(rde->inode.in->mode) >> 12)) {
>>>>>>>>>       			/*
>>>>>>>>> @@ -566,8 +562,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>>>       			goto out;
>>>>>>>>>       		}
>>>>>>>>>       
>>>>>>>>> -		/* Reset the lengths to their original allocated vals */
>>>>>>>>> -		oname.len = olen;
>>>>>>>>>       		ctx->pos++;
>>>>>>>>>       	}
>>>>>>>>>       
>>>>>>>>> @@ -625,8 +619,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>>>>>>>>>       	err = 0;
>>>>>>>>>       	dout("readdir %p file %p done.\n", inode, file);
>>>>>>>>>       out:
>>>>>>>>> -	ceph_fname_free_buffer(inode, &tname);
>>>>>>>>> -	ceph_fname_free_buffer(inode, &oname);
>>>>>>>>> +	if (dentry_name)
>>>>>>>>> +		kfree(dentry_name);
>>>>>>>>>       	return err;
>>>>>>>>>       }
>>>>>>>>>       
>>>>>>>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>>>>>>>> index b573a0f33450..19e5275eae1c 100644
>>>>>>>>> --- a/fs/ceph/inode.c
>>>>>>>>> +++ b/fs/ceph/inode.c
>>>>>>>>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>>>>>       			goto out;
>>>>>>>>>       		}
>>>>>>>>>       
>>>>>>>>> +		rde->dentry = NULL;
>>>>>>>>>       		dname.name = oname.name;
>>>>>>>>>       		dname.len = oname.len;
>>>>>>>>>       		dname.hash = full_name_hash(parent, dname.name, dname.len);
>>>>>>>>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
>>>>>>>>>       			goto retry_lookup;
>>>>>>>>>       		}
>>>>>>>>>       
>>>>>>>>> +		/*
>>>>>>>>> +		 * ceph_readdir will use the dentry to get the name
>>>>>>>>> +		 * to avoid doing the dencrypt again there.
>>>>>>>>> +		 */
>>>>>>>>> +		rde->dentry = dget(dn);
>>>>>>>>> +
>>>>>>>>>       		/* inode */
>>>>>>>>>       		if (d_really_is_positive(dn)) {
>>>>>>>>>       			in = d_inode(dn);
>>>>>>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>>>>>>> index 8d704ddd7291..9e0a51ef1dfa 100644
>>>>>>>>> --- a/fs/ceph/mds_client.c
>>>>>>>>> +++ b/fs/ceph/mds_client.c
>>>>>>>>> @@ -733,6 +733,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
>>>>>>>>>       
>>>>>>>>>       		kfree(rde->inode.fscrypt_auth);
>>>>>>>>>       		kfree(rde->inode.fscrypt_file);
>>>>>>>>> +		dput(rde->dentry);
>>>>>>>>>       	}
>>>>>>>>>       	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
>>>>>>>>>       }
>>>>>>>>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>>>>>>>>> index 0dfe24f94567..663d7754d57d 100644
>>>>>>>>> --- a/fs/ceph/mds_client.h
>>>>>>>>> +++ b/fs/ceph/mds_client.h
>>>>>>>>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in {
>>>>>>>>>       };
>>>>>>>>>       
>>>>>>>>>       struct ceph_mds_reply_dir_entry {
>>>>>>>>> +	struct dentry		      *dentry;
>>>>>>>>>       	char                          *name;
>>>>>>>>>       	u8			      *altname;
>>>>>>>>>       	u32                           name_len;


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-03-10 14:33 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-09 13:59 [PATCH V7] ceph: do not dencrypt the dentry name twice for readdir xiubli
2022-03-09 14:50 ` Jeff Layton
2022-03-10  0:20   ` Xiubo Li
2022-03-10  0:27     ` Jeff Layton
2022-03-10  0:53       ` Xiubo Li
2022-03-10  6:08 ` Xiubo Li
2022-03-10 10:36   ` Jeff Layton
2022-03-10 10:54     ` Xiubo Li
2022-03-10 11:21       ` Jeff Layton
2022-03-10 11:29         ` Xiubo Li
2022-03-10 12:21         ` Xiubo Li
2022-03-10 13:12           ` Jeff Layton
2022-03-10 14:14             ` Xiubo Li
2022-03-10 14:20               ` Jeff Layton
2022-03-10 14:24                 ` Xiubo Li
2022-03-10  8:22 ` Xiubo Li
2022-03-10 10:38   ` Jeff Layton
2022-03-10 10:49     ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.