linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ext4: Optimize case-insensitive lookups
@ 2019-06-17 19:02 Gabriel Krisman Bertazi
  2019-06-20  4:01 ` Theodore Ts'o
  0 siblings, 1 reply; 5+ messages in thread
From: Gabriel Krisman Bertazi @ 2019-06-17 19:02 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, Gabriel Krisman Bertazi, kernel

Temporarily cache a casefolded version of the file name under lookup in
ext4_filename, to avoid repeatedly casefolding it.  I got up to 30%
speedup on lookups of large directories (>100k entries), depending on
the length of the string under lookup.

v2:
  - Dinamically allocate space for the casefolded version.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 fs/ext4/dir.c           |  2 +-
 fs/ext4/ext4.h          | 39 ++++++++++++++++++++++++++++++++++---
 fs/ext4/namei.c         | 43 ++++++++++++++++++++++++++++++++++++-----
 fs/unicode/utf8-core.c  | 28 +++++++++++++++++++++++++++
 include/linux/unicode.h |  3 +++
 5 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c7843b149a1e..0a427e18584a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -674,7 +674,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
 		return memcmp(str, name->name, len);
 	}
 
-	return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
+	return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
 }
 
 static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb67859e051..c0793d9e5d12 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2077,6 +2077,9 @@ struct ext4_filename {
 #ifdef CONFIG_FS_ENCRYPTION
 	struct fscrypt_str crypto_buf;
 #endif
+#ifdef CONFIG_UNICODE
+	struct fscrypt_str cf_name;
+#endif
 };
 
 #define fname_name(p) ((p)->disk_name.name)
@@ -2302,6 +2305,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
+#ifdef CONFIG_UNICODE
+extern void ext4_fname_setup_ci_filename(struct inode *dir,
+					 const struct qstr *iname,
+					 struct fscrypt_str *fname);
+#endif
+
 #ifdef CONFIG_FS_ENCRYPTION
 static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
 						const struct fscrypt_name *src)
@@ -2328,6 +2337,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
 		return err;
 
 	ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+	ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
 	return 0;
 }
 
@@ -2343,6 +2356,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
 		return err;
 
 	ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+	ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
+#endif
 	return 0;
 }
 
@@ -2356,6 +2373,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
 	fname->crypto_buf.name = NULL;
 	fname->usr_fname = NULL;
 	fname->disk_name.name = NULL;
+
+#ifdef CONFIG_UNICODE
+	kfree(fname->cf_name.name);
+	fname->cf_name.name = NULL;
+#endif
 }
 #else /* !CONFIG_FS_ENCRYPTION */
 static inline int ext4_fname_setup_filename(struct inode *dir,
@@ -2366,6 +2388,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
 	fname->usr_fname = iname;
 	fname->disk_name.name = (unsigned char *) iname->name;
 	fname->disk_name.len = iname->len;
+
+#ifdef CONFIG_UNICODE
+	ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
+
 	return 0;
 }
 
@@ -2376,7 +2403,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
 	return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
 }
 
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+	kfree(fname->cf_name.name);
+	fname->cf_name.name = NULL;
+#endif
+}
 #endif /* !CONFIG_FS_ENCRYPTION */
 
 /* dir.c */
@@ -3119,8 +3152,8 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle,
 					 struct inode *inode,
 					 struct buffer_head *bh);
 extern int ext4_ci_compare(const struct inode *parent,
-			   const struct qstr *name,
-			   const struct qstr *entry);
+			   const struct qstr *fname,
+			   const struct qstr *entry, bool quick);
 
 #define S_SHIFT 12
 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cd01c4a67ffb..4909ced4e672 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1259,19 +1259,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
 #ifdef CONFIG_UNICODE
 /*
  * Test whether a case-insensitive directory entry matches the filename
- * being searched for.
+ * being searched for.  If quick is set, assume the name being looked up
+ * is already in the casefolded form.
  *
  * Returns: 0 if the directory entry matches, more than 0 if it
  * doesn't match or less than zero on error.
  */
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
-		    const struct qstr *entry)
+		    const struct qstr *entry, bool quick)
 {
 	const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
 	const struct unicode_map *um = sbi->s_encoding;
 	int ret;
 
-	ret = utf8_strncasecmp(um, name, entry);
+	if (quick)
+		ret = utf8_strncasecmp_folded(um, name, entry);
+	else
+		ret = utf8_strncasecmp(um, name, entry);
+
 	if (ret < 0) {
 		/* Handle invalid character sequence as either an error
 		 * or as an opaque byte sequence.
@@ -1287,6 +1292,27 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
 
 	return ret;
 }
+
+void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+				  struct fscrypt_str *cf_name)
+{
+	if (!IS_CASEFOLDED(dir)) {
+		cf_name->name = NULL;
+		return;
+	}
+
+	cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+	if (!cf_name->name)
+		return;
+
+	cf_name->len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+				     iname, cf_name->name,
+				     EXT4_NAME_LEN);
+	if (cf_name->len <= 0) {
+		kfree(cf_name->name);
+		cf_name->name = NULL;
+	}
+}
 #endif
 
 /*
@@ -1313,8 +1339,15 @@ static inline bool ext4_match(const struct inode *parent,
 #endif
 
 #ifdef CONFIG_UNICODE
-	if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
-		return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
+	if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+		if (fname->cf_name.name) {
+			struct qstr cf = {.name = fname->cf_name.name,
+					  .len = fname->cf_name.len};
+			return !ext4_ci_compare(parent, &cf, &entry, true);
+		}
+		return !ext4_ci_compare(parent, fname->usr_fname, &entry,
+					false);
+	}
 #endif
 
 	return fscrypt_match_name(&f, de->name, de->name_len);
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 6afab4fdce90..71ca4d047d65 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
 }
 EXPORT_SYMBOL(utf8_strncasecmp);
 
+/* String cf is expected to be a valid UTF-8 casefolded
+ * string.
+ */
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+			    const struct qstr *cf,
+			    const struct qstr *s1)
+{
+	const struct utf8data *data = utf8nfdicf(um->version);
+	struct utf8cursor cur1;
+	int c1, c2;
+	int i = 0;
+
+	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+		return -EINVAL;
+
+	do {
+		c1 = utf8byte(&cur1);
+		c2 = cf->name[i++];
+		if (c1 < 0)
+			return -EINVAL;
+		if (c1 != c2)
+			return 1;
+	} while (c1);
+
+	return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
 		  unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index aec2c6d800aa..990aa97d8049 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um,
 
 int utf8_strncasecmp(const struct unicode_map *um,
 		 const struct qstr *s1, const struct qstr *s2);
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+			    const struct qstr *cf,
+			    const struct qstr *s1);
 
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
 		   unsigned char *dest, size_t dlen);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext4: Optimize case-insensitive lookups
  2019-06-17 19:02 [PATCH] ext4: Optimize case-insensitive lookups Gabriel Krisman Bertazi
@ 2019-06-20  4:01 ` Theodore Ts'o
  0 siblings, 0 replies; 5+ messages in thread
From: Theodore Ts'o @ 2019-06-20  4:01 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi; +Cc: linux-ext4, kernel

On Mon, Jun 17, 2019 at 03:02:40PM -0400, Gabriel Krisman Bertazi wrote:
> Temporarily cache a casefolded version of the file name under lookup in
> ext4_filename, to avoid repeatedly casefolding it.  I got up to 30%
> speedup on lookups of large directories (>100k entries), depending on
> the length of the string under lookup.
> 
> v2:
>   - Dinamically allocate space for the casefolded version.
> 
> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>

Thanks, applied.

						- Ted

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext4: Optimize case-insensitive lookups
  2019-05-30 21:01 ` Theodore Ts'o
@ 2019-05-31 18:29   ` Gabriel Krisman Bertazi
  0 siblings, 0 replies; 5+ messages in thread
From: Gabriel Krisman Bertazi @ 2019-05-31 18:29 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-ext4, linux-fsdevel, linux-fscrypt

"Theodore Ts'o" <tytso@mit.edu> writes:

> On Wed, May 29, 2019 at 02:54:46PM -0400, Gabriel Krisman Bertazi wrote:
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index c18ab748d20d..e3809cfda9f4 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -2078,6 +2078,10 @@ struct ext4_filename {
>>  #ifdef CONFIG_FS_ENCRYPTION
>>  	struct fscrypt_str crypto_buf;
>>  #endif
>> +#ifdef CONFIG_UNICODE
>> +	int cf_len;
>> +	unsigned char cf_name[EXT4_NAME_LEN];
>> +#endif
>>  };
>>  
>>  #define fname_name(p) ((p)->disk_name.name)
>
> EXT4_NAME_LEN is 256, and struct ext4_filename is allocated on the
> stack.  So this is going to increase the stack usage by 258 bytes.
> Perhaps should we just kmalloc the temporary buffer when it's needed?

I wanted to avoid adding an allocation to this path, but maybe that was
misguided, since this is out of the dcache critical path.  I also wanted
to remove the allocation from d_hash, but we'd require a similar size
allocation in the stack. Is that a good idea?

> The other thing that this patch reminds me is that there is great
> interest in supporting case folded directories and fscrypt at the same
> time.  Today fscrypt works by encrypting the filename, and stashes it
> in fname->crypto_buf, and this allows for a byte-for-byte comparison
> of the encrypted name.  To support fscrypt && casefold, what we would
> need to do is to change the htree hash so that the hash is caluclated
> on the normalized form, and then we'll have to decrypt each filename
> in the directory block and then compare it against the normalized form
> that stashed in cf_name.  So that means we'll never need to allocate
> memory for cf_name and crypto_buf at the same time.

fscrypt and case-insensitive is getting to the top of my to-do list,
i'll something there early next week.  Thanks for the explanation on
it.

>
> We can also use struct fscrypt_str for cf_name; it's defined as a
> combined unsighed char *name and u32 len.  We already use fscrypt_str
> even the !CONFIG_FS_ENCRYPTION case, since it's a convenient way of
> handling a non-NULL terminated filename blob.  And this will hopefully
> make it simpler to deal with integrating casefolding and fscrypt in
> the future.

I will send a v2 with this change already, to simplify
fscrypt+casefolding support.
>
> Cheers,
>
> 					- Ted

-- 
Gabriel Krisman Bertazi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext4: Optimize case-insensitive lookups
  2019-05-29 18:54 Gabriel Krisman Bertazi
@ 2019-05-30 21:01 ` Theodore Ts'o
  2019-05-31 18:29   ` Gabriel Krisman Bertazi
  0 siblings, 1 reply; 5+ messages in thread
From: Theodore Ts'o @ 2019-05-30 21:01 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi; +Cc: linux-ext4, linux-fsdevel, linux-fscrypt

On Wed, May 29, 2019 at 02:54:46PM -0400, Gabriel Krisman Bertazi wrote:
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index c18ab748d20d..e3809cfda9f4 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2078,6 +2078,10 @@ struct ext4_filename {
>  #ifdef CONFIG_FS_ENCRYPTION
>  	struct fscrypt_str crypto_buf;
>  #endif
> +#ifdef CONFIG_UNICODE
> +	int cf_len;
> +	unsigned char cf_name[EXT4_NAME_LEN];
> +#endif
>  };
>  
>  #define fname_name(p) ((p)->disk_name.name)

EXT4_NAME_LEN is 256, and struct ext4_filename is allocated on the
stack.  So this is going to increase the stack usage by 258 bytes.
Perhaps should we just kmalloc the temporary buffer when it's needed?

The other thing that this patch reminds me is that there is great
interest in supporting case folded directories and fscrypt at the same
time.  Today fscrypt works by encrypting the filename, and stashes it
in fname->crypto_buf, and this allows for a byte-for-byte comparison
of the encrypted name.  To support fscrypt && casefold, what we would
need to do is to change the htree hash so that the hash is caluclated
on the normalized form, and then we'll have to decrypt each filename
in the directory block and then compare it against the normalized form
that stashed in cf_name.  So that means we'll never need to allocate
memory for cf_name and crypto_buf at the same time.

We can also use struct fscrypt_str for cf_name; it's defined as a
combined unsighed char *name and u32 len.  We already use fscrypt_str
even the !CONFIG_FS_ENCRYPTION case, since it's a convenient way of
handling a non-NULL terminated filename blob.  And this will hopefully
make it simpler to deal with integrating casefolding and fscrypt in
the future.

Cheers,

					- Ted

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] ext4: Optimize case-insensitive lookups
@ 2019-05-29 18:54 Gabriel Krisman Bertazi
  2019-05-30 21:01 ` Theodore Ts'o
  0 siblings, 1 reply; 5+ messages in thread
From: Gabriel Krisman Bertazi @ 2019-05-29 18:54 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, linux-fsdevel, Gabriel Krisman Bertazi

Temporarily cache a casefolded version of the file name under lookup in
ext4_filename, to avoid repeatedly casefolding it.

This gives me some performance gains on the lookup patch, when
performing cold dcache case-insensitive lookups on directories with a
high number of entries.

Below is a measure of average speedup of several runs with the patch
applied; where the baseline is the currently ext4 code, and the rows are
for different number of directory entries/number of lookup performed.
The speedup is not linear with the number of dentries, which I believe
is because of the changing format of the htree.

| entries | avg speedup |
| 1000    |  1.15	|
| 10000   |  1.19	|
| 50000   |  1.25	|
| 70000   |  1.20	|
| 100000  |  1.13	|

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 fs/ext4/dir.c           |  2 +-
 fs/ext4/ext4.h          | 32 +++++++++++++++++++++++++++++---
 fs/ext4/namei.c         | 41 +++++++++++++++++++++++++++++++++++------
 fs/unicode/utf8-core.c  | 25 +++++++++++++++++++++++++
 include/linux/unicode.h |  3 +++
 5 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c7843b149a1e..0a427e18584a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -674,7 +674,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
 		return memcmp(str, name->name, len);
 	}
 
-	return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
+	return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
 }
 
 static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c18ab748d20d..e3809cfda9f4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2078,6 +2078,10 @@ struct ext4_filename {
 #ifdef CONFIG_FS_ENCRYPTION
 	struct fscrypt_str crypto_buf;
 #endif
+#ifdef CONFIG_UNICODE
+	int cf_len;
+	unsigned char cf_name[EXT4_NAME_LEN];
+#endif
 };
 
 #define fname_name(p) ((p)->disk_name.name)
@@ -2303,6 +2307,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
+#ifdef CONFIG_UNICODE
+extern void ext4_setup_ci_filename(struct inode *dir,
+				   const struct qstr *iname,
+				   struct ext4_filename *fname);
+#endif
+
 #ifdef CONFIG_FS_ENCRYPTION
 static inline int ext4_fname_setup_filename(struct inode *dir,
 			const struct qstr *iname,
@@ -2313,6 +2323,9 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
 
 	memset(fname, 0, sizeof(struct ext4_filename));
 
+#ifdef CONFIG_UNICODE
+	ext4_setup_ci_filename(dir, iname, fname);
+#endif
 	err = fscrypt_setup_filename(dir, iname, lookup, &name);
 
 	fname->usr_fname = name.usr_fname;
@@ -2333,18 +2346,31 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
 	fname->crypto_buf.name = NULL;
 	fname->usr_fname = NULL;
 	fname->disk_name.name = NULL;
+#ifdef CONFIG_UNICODE
+	fname->cf_len = 0;
+#endif
 }
 #else
 static inline int ext4_fname_setup_filename(struct inode *dir,
 		const struct qstr *iname,
 		int lookup, struct ext4_filename *fname)
 {
+
+#ifdef CONFIG_UNICODE
+	ext4_setup_ci_filename(dir, iname, fname);
+#endif
 	fname->usr_fname = iname;
 	fname->disk_name.name = (unsigned char *) iname->name;
 	fname->disk_name.len = iname->len;
 	return 0;
 }
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
+
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+	fname->cf_len = 0;
+#endif
+}
 
 #endif
 
@@ -3088,8 +3114,8 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle,
 					 struct inode *inode,
 					 struct buffer_head *bh);
 extern int ext4_ci_compare(const struct inode *parent,
-			   const struct qstr *name,
-			   const struct qstr *entry);
+			   const struct qstr *fname,
+			   const struct qstr *entry, bool quick);
 
 #define S_SHIFT 12
 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ac7457fef9e6..082f941520f3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1259,19 +1259,25 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
 #ifdef CONFIG_UNICODE
 /*
  * Test whether a case-insensitive directory entry matches the filename
- * being searched for.
+ * being searched for.  If quick is set, assume the name being looked up
+ * is already in the casefolded form.
  *
  * Returns: 0 if the directory entry matches, more than 0 if it
  * doesn't match or less than zero on error.
  */
-int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
-		    const struct qstr *entry)
+int ext4_ci_compare(const struct inode *parent,
+		    const struct qstr *name,
+		    const struct qstr *entry, bool quick)
 {
 	const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
 	const struct unicode_map *um = sbi->s_encoding;
 	int ret;
 
-	ret = utf8_strncasecmp(um, name, entry);
+	if (quick)
+		ret = utf8_strncasecmp_folded(um, name, entry);
+	else
+		ret = utf8_strncasecmp(um, name, entry);
+
 	if (ret < 0) {
 		/* Handle invalid character sequence as either an error
 		 * or as an opaque byte sequence.
@@ -1287,6 +1293,21 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
 
 	return ret;
 }
+
+void ext4_setup_ci_filename(struct inode *dir,
+			    const struct qstr *iname,
+			    struct ext4_filename *fname)
+{
+	int len = EXT4_NAME_LEN;
+
+	fname->cf_len = 0;
+	if (IS_CASEFOLDED(dir)) {
+		len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+				    iname, fname->cf_name, len);
+		if (len >= 0)
+			fname->cf_len = len;
+	}
+}
 #endif
 
 /*
@@ -1313,8 +1334,16 @@ static inline bool ext4_match(const struct inode *parent,
 #endif
 
 #ifdef CONFIG_UNICODE
-	if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
-		return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
+	if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+		if (fname->cf_len) {
+			struct qstr cf = {.name = fname->cf_name,
+					  .len = fname->cf_len};
+			return !ext4_ci_compare(parent, &cf,
+						&entry, true);
+		}
+		return !ext4_ci_compare(parent, fname->usr_fname,
+						&entry, false);
+	}
 #endif
 
 	return fscrypt_match_name(&f, de->name, de->name_len);
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 6afab4fdce90..1d74de2778e9 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -73,6 +73,31 @@ int utf8_strncasecmp(const struct unicode_map *um,
 }
 EXPORT_SYMBOL(utf8_strncasecmp);
 
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+			    const struct qstr *normalized,
+			    const struct qstr *s1)
+{
+	const struct utf8data *data = utf8nfdicf(um->version);
+	struct utf8cursor cur1;
+	int c1, c2;
+	int i = 0;
+
+	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+		return -EINVAL;
+
+	do {
+		c1 = utf8byte(&cur1);
+		c2 = normalized->name[i++];
+		if (c1 < 0)
+			return -EINVAL;
+		if (c1 != c2)
+			return 1;
+	} while (c1);
+
+	return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
 		  unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index aec2c6d800aa..e89bf3ecef21 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -17,6 +17,9 @@ int utf8_strncmp(const struct unicode_map *um,
 
 int utf8_strncasecmp(const struct unicode_map *um,
 		 const struct qstr *s1, const struct qstr *s2);
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+			    const struct qstr *normalized,
+			    const struct qstr *s1);
 
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
 		   unsigned char *dest, size_t dlen);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-06-20  4:02 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-17 19:02 [PATCH] ext4: Optimize case-insensitive lookups Gabriel Krisman Bertazi
2019-06-20  4:01 ` Theodore Ts'o
  -- strict thread matches above, loose matches on Subject: below --
2019-05-29 18:54 Gabriel Krisman Bertazi
2019-05-30 21:01 ` Theodore Ts'o
2019-05-31 18:29   ` Gabriel Krisman Bertazi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).