linux-bcachefs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn
@ 2023-08-12 14:47 Joshua Ashton
  2023-08-12 14:47 ` [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes Joshua Ashton
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Joshua Ashton @ 2023-08-12 14:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Joshua Ashton

This will be used when we need to re-hash a directory tree when setting
flags.

It is not possible to have concurrent btree_trans on a thread.

Signed-off-by: Joshua Ashton <joshua@froggi.es>
---
 fs/bcachefs/fs-io.c    | 12 ++++++++----
 fs/bcachefs/fs-ioctl.c | 11 +++++++----
 fs/bcachefs/fs.c       |  5 +++--
 fs/bcachefs/fs.h       |  3 ++-
 fs/bcachefs/xattr.c    |  3 ++-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 963c7971d495..4804e5a47ac9 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -109,7 +109,8 @@ struct inode_new_size {
 	unsigned	fields;
 };
 
-static int inode_set_size(struct bch_inode_info *inode,
+static int inode_set_size(struct btree_trans *trans,
+			  struct bch_inode_info *inode,
 			  struct bch_inode_unpacked *bi,
 			  void *p)
 {
@@ -390,7 +391,8 @@ static int bch2_extend(struct mnt_idmap *idmap,
 	return bch2_setattr_nonsize(idmap, inode, iattr);
 }
 
-static int bch2_truncate_finish_fn(struct bch_inode_info *inode,
+static int bch2_truncate_finish_fn(struct btree_trans *trans,
+				   struct bch_inode_info *inode,
 				   struct bch_inode_unpacked *bi,
 				   void *p)
 {
@@ -398,7 +400,8 @@ static int bch2_truncate_finish_fn(struct bch_inode_info *inode,
 	return 0;
 }
 
-static int bch2_truncate_start_fn(struct bch_inode_info *inode,
+static int bch2_truncate_start_fn(struct btree_trans *trans,
+				  struct bch_inode_info *inode,
 				  struct bch_inode_unpacked *bi, void *p)
 {
 	u64 *new_i_size = p;
@@ -519,7 +522,8 @@ int bch2_truncate(struct mnt_idmap *idmap,
 
 /* fallocate: */
 
-static int inode_update_times_fn(struct bch_inode_info *inode,
+static int inode_update_times_fn(struct btree_trans *trans,
+				 struct bch_inode_info *inode,
 				 struct bch_inode_unpacked *bi, void *p)
 {
 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index dfa1bf73c854..141bcced031e 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -31,7 +31,8 @@ struct flags_set {
 	bool			projinherit;
 };
 
-static int bch2_inode_flags_set(struct bch_inode_info *inode,
+static int bch2_inode_flags_set(struct btree_trans *trans,
+				struct bch_inode_info *inode,
 				struct bch_inode_unpacked *bi,
 				void *p)
 {
@@ -124,7 +125,8 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
 	return copy_to_user(arg, &fa, sizeof(fa));
 }
 
-static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
+static int fssetxattr_inode_update_fn(struct btree_trans *trans,
+				      struct bch_inode_info *inode,
 				      struct bch_inode_unpacked *bi,
 				      void *p)
 {
@@ -135,7 +137,7 @@ static int fssetxattr_inode_update_fn(struct bch_inode_info *inode,
 		bi->bi_project = s->projid;
 	}
 
-	return bch2_inode_flags_set(inode, bi, p);
+	return bch2_inode_flags_set(trans, inode, bi, p);
 }
 
 static int bch2_ioc_fssetxattr(struct bch_fs *c,
@@ -192,7 +194,8 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
 	return ret;
 }
 
-static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode,
+static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
+				   struct bch_inode_info *inode,
 				   struct bch_inode_unpacked *bi,
 				   void *p)
 {
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 925f5e52029b..eb5da358871b 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -92,7 +92,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
 
 	ret   = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode),
 				BTREE_ITER_INTENT) ?:
-		(set ? set(inode, &inode_u, p) : 0) ?:
+		(set ? set(&trans, inode, &inode_u, p) : 0) ?:
 		bch2_inode_write(&trans, &iter, &inode_u) ?:
 		bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
 
@@ -1414,7 +1414,8 @@ static void bch2_destroy_inode(struct inode *vinode)
 	call_rcu(&vinode->i_rcu, bch2_i_callback);
 }
 
-static int inode_update_times_fn(struct bch_inode_info *inode,
+static int inode_update_times_fn(struct btree_trans *trans,
+				 struct bch_inode_info *inode,
 				 struct bch_inode_unpacked *bi,
 				 void *p)
 {
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index 6170d214d648..10e11119ded2 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -174,7 +174,8 @@ static inline int bch2_set_projid(struct bch_fs *c,
 struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum);
 
 /* returns 0 if we want to do the update, or error is passed up */
-typedef int (*inode_set_fn)(struct bch_inode_info *,
+typedef int (*inode_set_fn)(struct btree_trans *,
+			    struct bch_inode_info *,
 			    struct bch_inode_unpacked *, void *);
 
 void bch2_inode_update_after_write(struct btree_trans *,
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 70f78006daf2..6f6b3caf0607 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -494,7 +494,8 @@ struct inode_opt_set {
 	bool			defined;
 };
 
-static int inode_opt_set_fn(struct bch_inode_info *inode,
+static int inode_opt_set_fn(struct btree_trans *trans,
+			    struct bch_inode_info *inode,
 			    struct bch_inode_unpacked *bi,
 			    void *p)
 {
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes
  2023-08-12 14:47 [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Joshua Ashton
@ 2023-08-12 14:47 ` Joshua Ashton
  2023-08-12 16:23   ` Kent Overstreet
  2023-08-12 14:47 ` [PATCH 3/4] bcachefs: Introduce bch2_dirent_get_name Joshua Ashton
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Joshua Ashton @ 2023-08-12 14:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Joshua Ashton

Avoids doing a full strnlen for getting the length of the name of a
dirent entry.

Given the fact that the name of dirents is stored at the end of the
bkey's value, and we know the length of that in u64s, we can find the
last u64 and figure out how many NUL bytes are at the end of the string.

On little endian systems this ends up being the leading zeros of the
last u64, whereas on big endian systems this ends up being the trailing
zeros of the last u64.
We can take that value in bits and divide it by 8 to get the number of
NUL bytes at the end.

There is no endian-fixup or other compatibility here as this is string
data interpreted as a u64.

Signed-off-by: Joshua Ashton <joshua@froggi.es>
---
 fs/bcachefs/dirent.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 065ea59ee9fa..b86c6c27424a 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -15,10 +15,18 @@
 
 unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
 {
-	unsigned len = bkey_val_bytes(d.k) -
-		offsetof(struct bch_dirent, d_name);
-
-	return strnlen(d.v->d_name, len);
+	unsigned bkey_u64s = bkey_val_u64s(d.k);
+	unsigned bkey_bytes = bkey_u64s * sizeof(u64);
+	u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1];
+#if CPU_BIG_ENDIAN
+	unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8;
+#else
+	unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8;
+#endif
+
+	return bkey_bytes -
+		offsetof(struct bch_dirent, d_name) -
+		trailing_nuls;
 }
 
 static u64 bch2_dirent_hash(const struct bch_hash_info *info,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/4] bcachefs: Introduce bch2_dirent_get_name
  2023-08-12 14:47 [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Joshua Ashton
  2023-08-12 14:47 ` [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes Joshua Ashton
@ 2023-08-12 14:47 ` Joshua Ashton
  2023-08-12 14:47 ` [PATCH 4/4] bcachefs: Implement casefolding Joshua Ashton
  2023-08-12 16:17 ` [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Kent Overstreet
  3 siblings, 0 replies; 8+ messages in thread
From: Joshua Ashton @ 2023-08-12 14:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Joshua Ashton

A nice cleanup that avoids a bunch of open-coding name/string usage
around dirent usage.

Will be used by casefolding impl in future commits.

Signed-off-by: Joshua Ashton <joshua@froggi.es>
---
 fs/bcachefs/dirent.c | 50 +++++++++++++++++++++++++-------------------
 fs/bcachefs/dirent.h |  1 +
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index b86c6c27424a..49b2f9b330e1 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -29,6 +29,11 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
 		trailing_nuls;
 }
 
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
+{
+	return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+}
+
 static u64 bch2_dirent_hash(const struct bch_hash_info *info,
 			    const struct qstr *name)
 {
@@ -49,7 +54,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
 static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 {
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-	struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+	struct qstr name = bch2_dirent_get_name(d);
 
 	return bch2_dirent_hash(info, &name);
 }
@@ -57,20 +62,20 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
 {
 	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
-	int len = bch2_dirent_name_bytes(l);
-	const struct qstr *r = _r;
+	const struct qstr l_name = bch2_dirent_get_name(l);
+	const struct qstr *r_name = _r;
 
-	return len - r->len ?: memcmp(l.v->d_name, r->name, len);
+	return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len);
 }
 
 static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
 {
 	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
 	struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
-	int l_len = bch2_dirent_name_bytes(l);
-	int r_len = bch2_dirent_name_bytes(r);
+	const struct qstr l_name = bch2_dirent_get_name(l);
+	const struct qstr r_name = bch2_dirent_get_name(r);
 
-	return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
+	return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len);
 }
 
 static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k)
@@ -97,37 +102,36 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
 			struct printbuf *err)
 {
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
-	unsigned len;
+	struct qstr d_name = bch2_dirent_get_name(d);
 
-	len = bch2_dirent_name_bytes(d);
-	if (!len) {
+	if (!d_name.len) {
 		prt_printf(err, "empty name");
 		return -BCH_ERR_invalid_bkey;
 	}
 
-	if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) {
+	if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) {
 		prt_printf(err, "value too big (%zu > %u)",
-		       bkey_val_u64s(k.k), dirent_val_u64s(len));
+		       bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
 		return -BCH_ERR_invalid_bkey;
 	}
 
-	if (len > BCH_NAME_MAX) {
+	if (d_name.len > BCH_NAME_MAX) {
 		prt_printf(err, "dirent name too big (%u > %u)",
-		       len, BCH_NAME_MAX);
+		       d_name.len, BCH_NAME_MAX);
 		return -BCH_ERR_invalid_bkey;
 	}
 
-	if (len == 1 && !memcmp(d.v->d_name, ".", 1)) {
+	if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) {
 		prt_printf(err, "invalid name");
 		return -BCH_ERR_invalid_bkey;
 	}
 
-	if (len == 2 && !memcmp(d.v->d_name, "..", 2)) {
+	if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) {
 		prt_printf(err, "invalid name");
 		return -BCH_ERR_invalid_bkey;
 	}
 
-	if (memchr(d.v->d_name, '/', len)) {
+	if (memchr(d_name.name, '/', d_name.len)) {
 		prt_printf(err, "invalid name");
 		return -BCH_ERR_invalid_bkey;
 	}
@@ -145,10 +149,11 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
 			 struct bkey_s_c k)
 {
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+	struct qstr d_name = bch2_dirent_get_name(d);
 
 	prt_printf(out, "%.*s -> %llu type %s",
-	       bch2_dirent_name_bytes(d),
-	       d.v->d_name,
+	       d_name.len,
+	       d_name.name,
 	       d.v->d_type != DT_SUBVOL
 	       ? le64_to_cpu(d.v->d_inum)
 	       : le32_to_cpu(d.v->d_child_subvol),
@@ -515,6 +520,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
 	subvol_inum target;
 	u32 snapshot;
 	struct bkey_buf sk;
+	struct qstr name;
 	int ret;
 
 	bch2_bkey_buf_init(&sk);
@@ -545,9 +551,11 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
 		dirent = bkey_i_to_s_c_dirent(sk.k);
 		bch2_trans_unlock(&trans);
 
+		name = bch2_dirent_get_name(dirent);
+
 		ctx->pos = dirent.k->p.offset;
-		if (!dir_emit(ctx, dirent.v->d_name,
-			      bch2_dirent_name_bytes(dirent),
+		if (!dir_emit(ctx, name.name,
+			      name.len,
 			      target.inum,
 			      vfs_d_type(dirent.v->d_type)))
 			break;
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index b42f4a13bc55..5dd853d3aa0e 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -25,6 +25,7 @@ struct bch_hash_info;
 struct bch_inode_info;
 
 unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent);
+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d);
 
 static inline unsigned dirent_val_u64s(unsigned len)
 {
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/4] bcachefs: Implement casefolding
  2023-08-12 14:47 [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Joshua Ashton
  2023-08-12 14:47 ` [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes Joshua Ashton
  2023-08-12 14:47 ` [PATCH 3/4] bcachefs: Introduce bch2_dirent_get_name Joshua Ashton
@ 2023-08-12 14:47 ` Joshua Ashton
  2023-08-12 22:44   ` Kent Overstreet
  2023-08-12 16:17 ` [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Kent Overstreet
  3 siblings, 1 reply; 8+ messages in thread
From: Joshua Ashton @ 2023-08-12 14:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Joshua Ashton, André Almeida, Gabriel Krisman Bertazi

This patch implements support for case-insensitive file name lookups
in bcachefs.

The implementation the same utf8 lowering and normalization that ext4
and f2fs is using currently.

It uses the regular CASEFOLD attributes and stores the casefolded name
contiguously with the regular name on disk and in memory if space
permits it.

Names that would be too long to fit contiguously are instead compared
using a folding strcmp.

The crux of the implementation, is that cached casefolded names are
twice the length of uncasefolded names.
In the case that they are not (which I don't believe is possible in
the current UTF-8 spec for any cased glyphs), it again, falls back to
a folding strcmp.

There is currently no option provided for selecting the casefolding
encoding; ext4 and f2fs only support a single encoding per-superblock
(utf8 12.1), but it would be trivial to extend this on bcachefs on a
per-inode level using the opts system so it not provided in this patch.

Signed-off-by: Joshua Ashton <joshua@froggi.es>

Cc: André Almeida <andrealmeid@igalia.com>
Cc: Gabriel Krisman Bertazi <krisman@suse.de>
---
 fs/bcachefs/bcachefs.h        |   8 ++
 fs/bcachefs/bcachefs_format.h |  18 +++-
 fs/bcachefs/dirent.c          | 167 ++++++++++++++++++++++++++++++----
 fs/bcachefs/fs-common.c       |   4 +
 fs/bcachefs/fs-ioctl.c        |  23 +++++
 fs/bcachefs/fs-ioctl.h        |  20 ++--
 fs/bcachefs/fsck.c            |   8 +-
 fs/bcachefs/str_hash.h        |  72 +++++++++++++--
 fs/bcachefs/super.c           |  10 ++
 fs/bcachefs/xattr.c           |   6 +-
 10 files changed, 297 insertions(+), 39 deletions(-)

diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 30b3d7b9f9dc..baf45b0e6cb9 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -202,6 +202,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <linux/zstd.h>
+#include <linux/unicode.h>
 
 #include "bcachefs_format.h"
 #include "errcode.h"
@@ -657,6 +658,10 @@ enum bch_write_ref {
 	BCH_WRITE_REF_NR,
 };
 
+#if IS_ENABLED(CONFIG_UNICODE)
+#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
+#endif
+
 struct bch_fs {
 	struct closure		cl;
 
@@ -723,6 +728,9 @@ struct bch_fs {
 		u64		compat;
 	}			sb;
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	struct unicode_map 	*s_encoding;
+#endif
 
 	struct bch_sb_handle	disk_sb;
 
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 5ec218ee3569..fb846aed8656 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -852,6 +852,8 @@ enum {
 	__BCH_INODE_UNLINKED		= 7,
 	__BCH_INODE_BACKPTR_UNTRUSTED	= 8,
 
+	__BCH_INODE_CASEFOLDED = 9,
+
 	/* bits 20+ reserved for packed fields below: */
 };
 
@@ -864,6 +866,7 @@ enum {
 #define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
 #define BCH_INODE_UNLINKED	(1 << __BCH_INODE_UNLINKED)
 #define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
+#define BCH_INODE_CASEFOLDED (1 << __BCH_INODE_CASEFOLDED)
 
 LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
 LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 31);
@@ -908,7 +911,15 @@ struct bch_dirent {
 	 * Copy of mode bits 12-15 from the target inode - so userspace can get
 	 * the filetype without having to do a stat()
 	 */
-	__u8			d_type;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8			d_type:5,
+				d_unused:2,
+				d_casefold:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8			d_casefold:1,
+				d_unused:2,
+				d_type:5;
+#endif
 
 	__u8			d_name[];
 } __packed __aligned(8);
@@ -920,6 +931,8 @@ struct bch_dirent {
 			 sizeof(struct bkey) -				\
 			 offsetof(struct bch_dirent, d_name)))
 
+#define BCH_CF_NAME_MAX (BCH_NAME_MAX / 2)
+
 /* Xattrs */
 
 #define KEY_TYPE_XATTR_INDEX_USER			0
@@ -1843,7 +1856,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
 	x(new_varint,			15)	\
 	x(journal_no_flush,		16)	\
 	x(alloc_v2,			17)	\
-	x(extents_across_btree_nodes,	18)
+	x(extents_across_btree_nodes,	18)	\
+	x(casefolding,			19)
 
 #define BCH_SB_FEATURES_ALWAYS				\
 	((1ULL << BCH_FEATURE_new_extent_overwrite)|	\
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 49b2f9b330e1..bd657f680137 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -12,6 +12,7 @@
 #include "subvolume.h"
 
 #include <linux/dcache.h>
+#include <linux/unicode.h>
 
 unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
 {
@@ -31,9 +32,23 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
 
 struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
 {
-	return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+	unsigned len = bch2_dirent_name_bytes(d);
+	return (struct qstr) QSTR_INIT(d.v->d_name, d.v->d_casefold ? len / 2 : len);
 }
 
+#if IS_ENABLED(CONFIG_UNICODE)
+struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d)
+{
+	unsigned len;
+	if (!d.v->d_casefold)
+		return (struct qstr) QSTR_INIT(NULL, 0);
+
+	/* casefolded name is stored contiguously after the regular name */
+	len = bch2_dirent_name_bytes(d) / 2;
+	return (struct qstr) QSTR_INIT(d.v->d_name + len, len);
+}
+#endif
+
 static u64 bch2_dirent_hash(const struct bch_hash_info *info,
 			    const struct qstr *name)
 {
@@ -46,25 +61,65 @@ static u64 bch2_dirent_hash(const struct bch_hash_info *info,
 	return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
 }
 
-static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key, struct bch_cf_lookup_cache *cf_cache)
 {
-	return bch2_dirent_hash(info, key);
+	const struct qstr *name = key;
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (cf_cache) {
+		int casefold_len = utf8_casefold(info->s_encoding, name,
+						 cf_cache->casefold_lookup_buf, BCH_CF_NAME_MAX + 1);
+		if (casefold_len < 0)
+			goto key_hash;
+
+		cf_cache->casefold_lookup = (struct qstr) QSTR_INIT(cf_cache->casefold_lookup_buf, casefold_len);
+		return bch2_dirent_hash(info, &cf_cache->casefold_lookup);
+	}
+key_hash:
+#endif
+	return bch2_dirent_hash(info, name);
 }
 
-static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k, struct bch_cf_lookup_cache *cf_cache)
 {
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
 	struct qstr name = bch2_dirent_get_name(d);
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (cf_cache) {
+		struct qstr casefold_name = bch2_dirent_get_casefold_name(d);
+		if (casefold_name.len) {
+			cf_cache->casefold_lookup = casefold_name;
+		} else {
+			int casefold_len = utf8_casefold(info->s_encoding, &name,
+							 cf_cache->casefold_lookup_buf, BCH_CF_NAME_MAX + 1);
+			if (casefold_len < 0)
+				goto bkey_hash;
+
+			cf_cache->casefold_lookup = (struct qstr) QSTR_INIT(cf_cache->casefold_lookup_buf, casefold_len);
+		}
+		return bch2_dirent_hash(info, &cf_cache->casefold_lookup);
+	}
+bkey_hash:
+#endif
 	return bch2_dirent_hash(info, &name);
 }
 
-static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
+static bool dirent_cmp_key(const struct bch_hash_info *info, struct bkey_s_c _l, const void *_r, struct bch_cf_lookup_cache *cf_cache)
 {
 	struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
 	const struct qstr l_name = bch2_dirent_get_name(l);
 	const struct qstr *r_name = _r;
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (cf_cache && cf_cache->casefold_lookup.len) {
+		struct qstr l_casefold_name = bch2_dirent_get_casefold_name(l);
+		if (l_casefold_name.len)
+			return l_casefold_name.len - cf_cache->casefold_lookup.len
+				?: memcmp(l_casefold_name.name, cf_cache->casefold_lookup.name, l_casefold_name.len);
+		else
+			return utf8_strncasecmp_folded(info->s_encoding, &cf_cache->casefold_lookup, &l_name);
+	}
+#endif
 	return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len);
 }
 
@@ -75,6 +130,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
 	const struct qstr l_name = bch2_dirent_get_name(l);
 	const struct qstr r_name = bch2_dirent_get_name(r);
 
+	/* bkey to bkey comparisons do not need casefolding. */
+
 	return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len);
 }
 
@@ -97,24 +154,62 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
 	.is_visible	= dirent_is_visible,
 };
 
+#if IS_ENABLED(CONFIG_UNICODE)
+static bool bch2_cf_modify_name_block_len(int *name_len, bool casefold)
+{
+	if (casefold && *name_len && *name_len <= BCH_CF_NAME_MAX) {
+		/*
+		 * Use the remaining space to store the casefolded name,
+		 * which has the same length as the regular name.
+		 */
+		*name_len = *name_len * 2;
+		return true;
+	}
+
+	return false;
+}
+#endif
+
 int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
 			enum bkey_invalid_flags flags,
 			struct printbuf *err)
 {
 	struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
 	struct qstr d_name = bch2_dirent_get_name(d);
+	int name_block_len = d_name.len;
+
+#if IS_ENABLED(CONFIG_UNICODE)
+	struct qstr d_cf_name = bch2_dirent_get_casefold_name(d);
+	bool use_casefold_cache = bch2_cf_modify_name_block_len(&name_block_len,
+								d.v->d_casefold);
+#endif
 
 	if (!d_name.len) {
 		prt_printf(err, "empty name");
 		return -BCH_ERR_invalid_bkey;
 	}
 
-	if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) {
+	if (bkey_val_u64s(k.k) > dirent_val_u64s(name_block_len)) {
 		prt_printf(err, "value too big (%zu > %u)",
-		       bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
+		       bkey_val_u64s(k.k), dirent_val_u64s(name_block_len));
 		return -BCH_ERR_invalid_bkey;
 	}
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (use_casefold_cache) {
+		if (d_name.len > BCH_CF_NAME_MAX) {
+			prt_printf(err, "dirent w/ casefolding cache name too big (%u > %u)",
+			d_name.len, BCH_CF_NAME_MAX);
+			return -BCH_ERR_invalid_bkey;
+		}
+
+		if (d_cf_name.len > BCH_CF_NAME_MAX) {
+			prt_printf(err, "dirent w/ casefolding cache cf name too big (%u > %u)",
+			d_cf_name.len, BCH_CF_NAME_MAX);
+			return -BCH_ERR_invalid_bkey;
+		}
+	}
+#endif
 	if (d_name.len > BCH_NAME_MAX) {
 		prt_printf(err, "dirent name too big (%u > %u)",
 		       d_name.len, BCH_NAME_MAX);
@@ -161,13 +256,38 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
 }
 
 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
-				subvol_inum dir, u8 type,
+				subvol_inum dir,
+				const struct bch_hash_info *hash_info,
+				u8 type,
 				const struct qstr *name, u64 dst)
 {
 	struct bkey_i_dirent *dirent;
-	unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
+	int name_block_len = name->len;
+	unsigned u64s;
+#if IS_ENABLED(CONFIG_UNICODE)
+	int casefold_len;
+	bool use_casefold_cache = bch2_cf_modify_name_block_len(&name_block_len,
+								hash_info->s_encoding != NULL);
+	struct bch_cf_lookup_cache *cf_cache __free(bch2_hash_free_cf) =
+		use_casefold_cache ? bch2_hash_create_cf(hash_info) : NULL;
+
+	if (use_casefold_cache) {
+		casefold_len = utf8_casefold(hash_info->s_encoding, name,
+					     cf_cache->casefold_lookup_buf, BCH_CF_NAME_MAX + 1);
+
+		if (casefold_len != name->len) {
+			/*
+			 * In the event the casefold len does not match the name's
+			 * length, fallback to using casefolding without a cache.
+			 */
+			use_casefold_cache = false;
+			name_block_len = name->len;
+		}
+	}
+#endif
+	u64s = BKEY_U64s + dirent_val_u64s(name_block_len);
 
-	if (name->len > BCH_NAME_MAX)
+	if (name_block_len > BCH_NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
 
 	BUG_ON(u64s > U8_MAX);
@@ -187,14 +307,29 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
 	}
 
 	dirent->v.d_type = type;
+	dirent->v.d_unused = 0;
+	dirent->v.d_casefold = 0;
 
 	memcpy(dirent->v.d_name, name->name, name->len);
-	memset(dirent->v.d_name + name->len, 0,
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (use_casefold_cache) {
+		EBUG_ON(casefold_len != name->len);
+		dirent->v.d_casefold = 1;
+		memcpy(&dirent->v.d_name[name->len], cf_cache->casefold_lookup_buf, casefold_len);
+	}
+#endif
+	memset(dirent->v.d_name + name_block_len, 0,
 	       bkey_val_bytes(&dirent->k) -
 	       offsetof(struct bch_dirent, d_name) -
-	       name->len);
+	       name_block_len);
 
-	EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+	EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name_block_len);
+	EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (use_casefold_cache) {
+		EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != name->len);
+	}
+#endif
 
 	return dirent;
 }
@@ -207,7 +342,7 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
 	struct bkey_i_dirent *dirent;
 	int ret;
 
-	dirent = dirent_create_key(trans, dir, type, name, dst_inum);
+	dirent = dirent_create_key(trans, dir, hash_info, type, name, dst_inum);
 	ret = PTR_ERR_OR_ZERO(dirent);
 	if (ret)
 		return ret;
@@ -333,7 +468,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
 		*src_offset = dst_iter.pos.offset;
 
 	/* Create new dst key: */
-	new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
+	new_dst = dirent_create_key(trans, dst_dir, dst_hash, 0, dst_name, 0);
 	ret = PTR_ERR_OR_ZERO(new_dst);
 	if (ret)
 		goto out;
@@ -343,7 +478,7 @@ int bch2_dirent_rename(struct btree_trans *trans,
 
 	/* Create new src key: */
 	if (mode == BCH_RENAME_EXCHANGE) {
-		new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
+		new_src = dirent_create_key(trans, src_dir, src_hash, 0, src_name, 0);
 		ret = PTR_ERR_OR_ZERO(new_src);
 		if (ret)
 			goto out;
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index bb5305441f27..9649872797da 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -46,6 +46,10 @@ int bch2_create_trans(struct btree_trans *trans,
 	if (ret)
 		goto err;
 
+	/* Inherit casefold state from parent. */
+	if (dir_type == DT_DIR && (dir_u->bi_flags & BCH_INODE_CASEFOLDED))
+		new_inode->bi_flags |= BCH_INODE_CASEFOLDED;
+
 	if (!(flags & BCH_CREATE_SNAPSHOT)) {
 		/* Normal create path - allocate a new inode: */
 		bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 141bcced031e..9ead754a24b9 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -6,6 +6,7 @@
 #include "dirent.h"
 #include "fs.h"
 #include "fs-common.h"
+#include "str_hash.h"
 #include "fs-ioctl.h"
 #include "quota.h"
 
@@ -54,6 +55,28 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
 	    (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
 		return -EINVAL;
 
+	if ((newflags ^ oldflags) & BCH_INODE_CASEFOLDED) {
+#if IS_ENABLED(CONFIG_UNICODE)
+		int ret = 0;
+		/* Not supported on individual files. */
+		if (!S_ISDIR(bi->bi_mode))
+			return -EOPNOTSUPP;
+
+		/*
+		 * Make sure the dir is empty, as otherwise we'd need to
+		 * rehash everything and update the dirent keys.
+		 */
+		ret = bch2_empty_dir_trans(trans, inode_inum(inode));
+		if (ret < 0)
+			return ret;
+
+		bch2_check_set_feature(c, BCH_FEATURE_casefolding);
+#else
+		printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
+		return -EINVAL;
+#endif
+	}
+
 	if (s->set_projinherit) {
 		bi->bi_fields_set &= ~(1 << Inode_opt_project);
 		bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
index f201980ef2c3..2950091b5ac6 100644
--- a/fs/bcachefs/fs-ioctl.h
+++ b/fs/bcachefs/fs-ioctl.h
@@ -6,19 +6,21 @@
 
 /* bcachefs inode flags -> vfs inode flags: */
 static const unsigned bch_flags_to_vfs[] = {
-	[__BCH_INODE_SYNC]	= S_SYNC,
-	[__BCH_INODE_IMMUTABLE]	= S_IMMUTABLE,
-	[__BCH_INODE_APPEND]	= S_APPEND,
-	[__BCH_INODE_NOATIME]	= S_NOATIME,
+	[__BCH_INODE_SYNC]		= S_SYNC,
+	[__BCH_INODE_IMMUTABLE]		= S_IMMUTABLE,
+	[__BCH_INODE_APPEND]		= S_APPEND,
+	[__BCH_INODE_NOATIME]		= S_NOATIME,
+	[__BCH_INODE_CASEFOLDED]	= S_CASEFOLD,
 };
 
 /* bcachefs inode flags -> FS_IOC_GETFLAGS: */
 static const unsigned bch_flags_to_uflags[] = {
-	[__BCH_INODE_SYNC]	= FS_SYNC_FL,
-	[__BCH_INODE_IMMUTABLE]	= FS_IMMUTABLE_FL,
-	[__BCH_INODE_APPEND]	= FS_APPEND_FL,
-	[__BCH_INODE_NODUMP]	= FS_NODUMP_FL,
-	[__BCH_INODE_NOATIME]	= FS_NOATIME_FL,
+	[__BCH_INODE_SYNC]		= FS_SYNC_FL,
+	[__BCH_INODE_IMMUTABLE]		= FS_IMMUTABLE_FL,
+	[__BCH_INODE_APPEND]		= FS_APPEND_FL,
+	[__BCH_INODE_NODUMP]		= FS_NODUMP_FL,
+	[__BCH_INODE_NOATIME]		= FS_NOATIME_FL,
+	[__BCH_INODE_CASEFOLDED]	= FS_CASEFOLD_FL,
 };
 
 /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index d99c04af2c55..d060465ecca0 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -750,6 +750,7 @@ static int hash_check_key(struct btree_trans *trans,
 			  struct bch_hash_info *hash_info,
 			  struct btree_iter *k_iter, struct bkey_s_c hash_k)
 {
+	struct bch_cf_lookup_cache *cf_cache __free(bch2_hash_free_cf);
 	struct bch_fs *c = trans->c;
 	struct btree_iter iter = { NULL };
 	struct printbuf buf = PRINTBUF;
@@ -760,7 +761,12 @@ static int hash_check_key(struct btree_trans *trans,
 	if (hash_k.k->type != desc.key_type)
 		return 0;
 
-	hash = desc.hash_bkey(hash_info, hash_k);
+	cf_cache = bch2_hash_create_cf(hash_info);
+	ret = PTR_ERR_OR_ZERO(cf_cache);
+	if (ret < 0)
+		return ret;
+
+	hash = desc.hash_bkey(hash_info, hash_k, cf_cache);
 
 	if (likely(hash == hash_k.k->p.offset))
 		return 0;
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index ae21a8cca1b4..61010405baa5 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -12,6 +12,7 @@
 #include "super.h"
 
 #include <linux/crc32c.h>
+#include <linux/cleanup.h>
 #include <crypto/hash.h>
 #include <crypto/sha2.h>
 
@@ -34,6 +35,9 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
 
 struct bch_hash_info {
 	u8			type;
+#if IS_ENABLED(CONFIG_UNICODE)
+	struct unicode_map 	*s_encoding;
+#endif
 	/*
 	 * For crc32 or crc64 string hashes the first key value of
 	 * the siphash_key (k0) is used as the key.
@@ -48,6 +52,9 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
 	struct bch_hash_info info = {
 		.type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
 			~(~0U << INODE_STR_HASH_BITS),
+#if IS_ENABLED(CONFIG_UNICODE)
+		.s_encoding = !!(bi->bi_flags & BCH_INODE_CASEFOLDED) ? c->s_encoding : NULL,
+#endif
 		.siphash_key = { .k0 = bi->bi_hash_seed }
 	};
 
@@ -65,6 +72,31 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
 	return info;
 }
 
+/* Fed back from hashing operations. */
+struct bch_cf_lookup_cache {
+	struct qstr		casefold_lookup;
+	unsigned char		casefold_lookup_buf[BCH_NAME_MAX + 1];
+};
+
+static __always_inline struct bch_cf_lookup_cache *
+bch2_hash_create_cf(const struct bch_hash_info *info)
+{
+#if IS_ENABLED(CONFIG_UNICODE)
+	if (info->s_encoding) {
+		struct bch_cf_lookup_cache *cf_cache =
+			kmalloc(sizeof(struct bch_cf_lookup_cache), GFP_KERNEL);
+		if (!cf_cache)
+			return ERR_PTR(-ENOMEM);
+
+		cf_cache->casefold_lookup = (struct qstr) QSTR_INIT(NULL, 0);
+		return cf_cache;
+	}
+#endif
+	return NULL;
+}
+
+DEFINE_FREE(bch2_hash_free_cf, struct bch_cf_lookup_cache *, if (_T) kfree(_T))
+
 struct bch_str_hash_ctx {
 	union {
 		u32		crc32c;
@@ -134,9 +166,9 @@ struct bch_hash_desc {
 	enum btree_id	btree_id;
 	u8		key_type;
 
-	u64		(*hash_key)(const struct bch_hash_info *, const void *);
-	u64		(*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c);
-	bool		(*cmp_key)(struct bkey_s_c, const void *);
+	u64		(*hash_key)(const struct bch_hash_info *, const void *, struct bch_cf_lookup_cache *cf_cache);
+	u64		(*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c, struct bch_cf_lookup_cache *cf_cache);
+	bool		(*cmp_key)(const struct bch_hash_info *, struct bkey_s_c, const void *, struct bch_cf_lookup_cache *cf_cache);
 	bool		(*cmp_bkey)(struct bkey_s_c, struct bkey_s_c);
 	bool		(*is_visible)(subvol_inum inum, struct bkey_s_c);
 };
@@ -157,6 +189,7 @@ bch2_hash_lookup(struct btree_trans *trans,
 		 subvol_inum inum, const void *key,
 		 unsigned flags)
 {
+	struct bch_cf_lookup_cache *cf_cache __free(bch2_hash_free_cf);
 	struct bkey_s_c k;
 	u32 snapshot;
 	int ret;
@@ -165,12 +198,17 @@ bch2_hash_lookup(struct btree_trans *trans,
 	if (ret)
 		return ret;
 
+	cf_cache = bch2_hash_create_cf(info);
+	ret = PTR_ERR_OR_ZERO(cf_cache);
+	if (ret < 0)
+		return ret;
+
 	for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
-			   SPOS(inum.inum, desc.hash_key(info, key), snapshot),
+			   SPOS(inum.inum, desc.hash_key(info, key, cf_cache), snapshot),
 			   POS(inum.inum, U64_MAX),
 			   BTREE_ITER_SLOTS|flags, k, ret) {
 		if (is_visible_key(desc, inum, k)) {
-			if (!desc.cmp_key(k, key))
+			if (!desc.cmp_key(info, k, key, cf_cache))
 				return 0;
 		} else if (k.k->type == KEY_TYPE_hash_whiteout) {
 			;
@@ -191,6 +229,7 @@ bch2_hash_hole(struct btree_trans *trans,
 	       const struct bch_hash_info *info,
 	       subvol_inum inum, const void *key)
 {
+	struct bch_cf_lookup_cache *cf_cache __free(bch2_hash_free_cf);
 	struct bkey_s_c k;
 	u32 snapshot;
 	int ret;
@@ -199,8 +238,13 @@ bch2_hash_hole(struct btree_trans *trans,
 	if (ret)
 		return ret;
 
+	cf_cache = bch2_hash_create_cf(info);
+	ret = PTR_ERR_OR_ZERO(cf_cache);
+	if (ret < 0)
+		return ret;
+
 	for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
-			   SPOS(inum.inum, desc.hash_key(info, key), snapshot),
+			   SPOS(inum.inum, desc.hash_key(info, key, cf_cache), snapshot),
 			   POS(inum.inum, U64_MAX),
 			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
 		if (!is_visible_key(desc, inum, k))
@@ -216,6 +260,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
 			     const struct bch_hash_info *info,
 			     struct btree_iter *start)
 {
+	struct bch_cf_lookup_cache *cf_cache __free(bch2_hash_free_cf);
 	struct btree_iter iter;
 	struct bkey_s_c k;
 	int ret;
@@ -224,13 +269,18 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
 
 	bch2_btree_iter_advance(&iter);
 
+	cf_cache = bch2_hash_create_cf(info);
+	ret = PTR_ERR_OR_ZERO(cf_cache);
+	if (ret < 0)
+		return ret;
+
 	for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, k, ret) {
 		if (k.k->type != desc.key_type &&
 		    k.k->type != KEY_TYPE_hash_whiteout)
 			break;
 
 		if (k.k->type == desc.key_type &&
-		    desc.hash_bkey(info, k) <= start->pos.offset) {
+		    desc.hash_bkey(info, k, cf_cache) <= start->pos.offset) {
 			ret = 1;
 			break;
 		}
@@ -250,13 +300,19 @@ int bch2_hash_set_snapshot(struct btree_trans *trans,
 			   int update_flags)
 {
 	struct btree_iter iter, slot = { NULL };
+	struct bch_cf_lookup_cache *cf_cache __free(bch2_hash_free_cf);
 	struct bkey_s_c k;
 	bool found = false;
 	int ret;
 
+	cf_cache = bch2_hash_create_cf(info);
+	ret = PTR_ERR_OR_ZERO(cf_cache);
+	if (ret < 0)
+		return ret;
+
 	for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
 			   SPOS(insert->k.p.inode,
-				desc.hash_bkey(info, bkey_i_to_s_c(insert)),
+				desc.hash_bkey(info, bkey_i_to_s_c(insert), cf_cache),
 				snapshot),
 			   POS(insert->k.p.inode, U64_MAX),
 			   BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 5c62fcf3afdb..204a337728ad 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -757,6 +757,16 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 	if (ret)
 		goto err;
 
+#if IS_ENABLED(CONFIG_UNICODE)
+	/* Default encoding until we can potentially have more as an option. */
+	c->s_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+#else
+	if (c->sb.features & (1ULL << BCH_FEATURE_casefolding)) {
+		printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
+		return ERR_PTR(-EINVAL);
+	}
+#endif
+
 	pr_uuid(&name, c->sb.user_uuid.b);
 	strscpy(c->name, name.buf, sizeof(c->name));
 	printbuf_exit(&name);
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 6f6b3caf0607..1010002f6282 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -27,12 +27,12 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info,
 	return bch2_str_hash_end(&ctx, info);
 }
 
-static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key, struct bch_cf_lookup_cache *cf_cache)
 {
 	return bch2_xattr_hash(info, key);
 }
 
-static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k, struct bch_cf_lookup_cache *cf_cache)
 {
 	struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k);
 
@@ -40,7 +40,7 @@ static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
 		 &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len));
 }
 
-static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r)
+static bool xattr_cmp_key(const struct bch_hash_info *info, struct bkey_s_c _l, const void *_r, struct bch_cf_lookup_cache *cf_cache)
 {
 	struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l);
 	const struct xattr_search_key *r = _r;
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn
  2023-08-12 14:47 [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Joshua Ashton
                   ` (2 preceding siblings ...)
  2023-08-12 14:47 ` [PATCH 4/4] bcachefs: Implement casefolding Joshua Ashton
@ 2023-08-12 16:17 ` Kent Overstreet
  3 siblings, 0 replies; 8+ messages in thread
From: Kent Overstreet @ 2023-08-12 16:17 UTC (permalink / raw)
  To: Joshua Ashton; +Cc: linux-bcachefs

On Sat, Aug 12, 2023 at 03:47:45PM +0100, Joshua Ashton wrote:
> This will be used when we need to re-hash a directory tree when setting
> flags.
> 
> It is not possible to have concurrent btree_trans on a thread.
> 
> Signed-off-by: Joshua Ashton <joshua@froggi.es>

Applied

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes
  2023-08-12 14:47 ` [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes Joshua Ashton
@ 2023-08-12 16:23   ` Kent Overstreet
  2023-08-12 17:09     ` Joshua Ashton
  0 siblings, 1 reply; 8+ messages in thread
From: Kent Overstreet @ 2023-08-12 16:23 UTC (permalink / raw)
  To: Joshua Ashton; +Cc: linux-bcachefs

On Sat, Aug 12, 2023 at 03:47:46PM +0100, Joshua Ashton wrote:
> Avoids doing a full strnlen for getting the length of the name of a
> dirent entry.
> 
> Given the fact that the name of dirents is stored at the end of the
> bkey's value, and we know the length of that in u64s, we can find the
> last u64 and figure out how many NUL bytes are at the end of the string.
> 
> On little endian systems this ends up being the leading zeros of the
> last u64, whereas on big endian systems this ends up being the trailing
> zeros of the last u64.
> We can take that value in bits and divide it by 8 to get the number of
> NUL bytes at the end.
> 
> There is no endian-fixup or other compatibility here as this is string
> data interpreted as a u64.

This needs a bit more analysis:

If we ever screwed up and created dirents where there was a nul before
the last u64, this is going to change how those dirents are interpreted
- we'll now be returning longer dirents, but with an embedded nul.

Fortunately, bch2_dirent_invalid() checks for this, so we're good there.

But, your change does break the check in bch2_dirent_invalid(); we still
do need to check somehow that there aren't any embedded nuls. Can you
fix that too?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes
  2023-08-12 16:23   ` Kent Overstreet
@ 2023-08-12 17:09     ` Joshua Ashton
  0 siblings, 0 replies; 8+ messages in thread
From: Joshua Ashton @ 2023-08-12 17:09 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-bcachefs



On 8/12/23 17:23, Kent Overstreet wrote:
> On Sat, Aug 12, 2023 at 03:47:46PM +0100, Joshua Ashton wrote:
>> Avoids doing a full strnlen for getting the length of the name of a
>> dirent entry.
>>
>> Given the fact that the name of dirents is stored at the end of the
>> bkey's value, and we know the length of that in u64s, we can find the
>> last u64 and figure out how many NUL bytes are at the end of the string.
>>
>> On little endian systems this ends up being the leading zeros of the
>> last u64, whereas on big endian systems this ends up being the trailing
>> zeros of the last u64.
>> We can take that value in bits and divide it by 8 to get the number of
>> NUL bytes at the end.
>>
>> There is no endian-fixup or other compatibility here as this is string
>> data interpreted as a u64.
> 
> This needs a bit more analysis:
> 
> If we ever screwed up and created dirents where there was a nul before
> the last u64, this is going to change how those dirents are interpreted
> - we'll now be returning longer dirents, but with an embedded nul.
> 
> Fortunately, bch2_dirent_invalid() checks for this, so we're good there.
> 
> But, your change does break the check in bch2_dirent_invalid(); we still
> do need to check somehow that there aren't any embedded nuls. Can you
> fix that too?

Yes. I have fixed this locally by ensuring strlen(name.name, name.len) 
== name.len now in bch2_dirent_invalid.

- Joshie 🐸✨

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/4] bcachefs: Implement casefolding
  2023-08-12 14:47 ` [PATCH 4/4] bcachefs: Implement casefolding Joshua Ashton
@ 2023-08-12 22:44   ` Kent Overstreet
  0 siblings, 0 replies; 8+ messages in thread
From: Kent Overstreet @ 2023-08-12 22:44 UTC (permalink / raw)
  To: Joshua Ashton; +Cc: linux-bcachefs, André Almeida, Gabriel Krisman Bertazi

On Sat, Aug 12, 2023 at 03:47:48PM +0100, Joshua Ashton wrote:
> This patch implements support for case-insensitive file name lookups
> in bcachefs.
> 
> The implementation the same utf8 lowering and normalization that ext4
> and f2fs is using currently.
> 
> It uses the regular CASEFOLD attributes and stores the casefolded name
> contiguously with the regular name on disk and in memory if space
> permits it.
> 
> Names that would be too long to fit contiguously are instead compared
> using a folding strcmp.
> 
> The crux of the implementation, is that cached casefolded names are
> twice the length of uncasefolded names.
> In the case that they are not (which I don't believe is possible in
> the current UTF-8 spec for any cased glyphs), it again, falls back to
> a folding strcmp.
> 
> There is currently no option provided for selecting the casefolding
> encoding; ext4 and f2fs only support a single encoding per-superblock
> (utf8 12.1), but it would be trivial to extend this on bcachefs on a
> per-inode level using the opts system so it not provided in this patch.

As discussed on IRC, repeating for the list: for new features, we need
to start making sure we document/save all the rationale and design
decisions we talked about - even just saving the IRC logs can be quite
helpful later, bonus points for turning it into nicely formatted and
structured markdown.

In particular, we need to document why we went with tacking this onto
bch_dirent instead of creating a bch_dirent_v2; the fact that _this_
type of casefolding doesn't change the number of glyphs is also
important.

Stick it in Documentation/filesystems/bcachefs/casefolding.

I've got other design docs on the wiki that could be moved there, as
well.

> +#define BCH_CF_NAME_MAX (BCH_NAME_MAX / 2)

We probably ought to have a single BCH_NAME_MAX for casefolded and non
casefolded names - and we discussed on IRC either making it the same as
other filesystems (255), or we could also make it somewhat bigger, since
other filesystems support longer names as well.

That could easily be a superblock option, so people can decide what sort
of compatibility they want (much like the inodes_32bit option).

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-08-12 22:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-12 14:47 [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Joshua Ashton
2023-08-12 14:47 ` [PATCH 2/4] bcachefs: Optimize bch2_dirent_name_bytes Joshua Ashton
2023-08-12 16:23   ` Kent Overstreet
2023-08-12 17:09     ` Joshua Ashton
2023-08-12 14:47 ` [PATCH 3/4] bcachefs: Introduce bch2_dirent_get_name Joshua Ashton
2023-08-12 14:47 ` [PATCH 4/4] bcachefs: Implement casefolding Joshua Ashton
2023-08-12 22:44   ` Kent Overstreet
2023-08-12 16:17 ` [PATCH 1/4] bcachefs: Add btree_trans* to inode_set_fn Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).