All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaud Ferraris <arnaud.ferraris@collabora.com>
To: linux-ext4@vger.kernel.org
Cc: drosen@google.com, krisman@collabora.com, ebiggers@kernel.org,
	tytso@mit.edu, Arnaud Ferraris <arnaud.ferraris@collabora.com>
Subject: [PATCH RESEND v2 07/12] e2fsck: Support casefold directories when rehashing
Date: Thu, 10 Dec 2020 16:03:48 +0100	[thread overview]
Message-ID: <20201210150353.91843-8-arnaud.ferraris@collabora.com> (raw)
In-Reply-To: <20201210150353.91843-1-arnaud.ferraris@collabora.com>

From: Gabriel Krisman Bertazi <krisman@collabora.com>

When rehashing a +F directory, the casefold comparison needs to be
performed, in order to identify duplicated filenames.  Like the -F
version, This is done in two steps, first adapt the qsort comparison to
consider casefolded directories, and then iterate over the sorted list
fixing dups.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
---
 e2fsck/rehash.c | 88 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 16 deletions(-)

diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index 30e510a6..14215011 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -214,6 +214,23 @@ static EXT2_QSORT_TYPE ino_cmp(const void *a, const void *b)
 	return (he_a->ino - he_b->ino);
 }
 
+struct name_cmp_ctx
+{
+	int casefold;
+	const struct ext2fs_nls_table *tbl;
+};
+
+
+static int same_name(const struct name_cmp_ctx *cmp_ctx, char *s1,
+		     int len1, char *s2, int len2)
+{
+	if (!cmp_ctx->casefold)
+		return (len1 == len2 &&	!memcmp(s1, s2, len1));
+	else
+		return !ext2fs_casefold_cmp(cmp_ctx->tbl,
+					    s1, len1, s2, len2);
+}
+
 /* Used for sorting the hash entry */
 static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
 {
@@ -240,9 +257,35 @@ static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
 	return ret;
 }
 
+static EXT2_QSORT_TYPE name_cf_cmp(const struct name_cmp_ctx *ctx,
+				   const void *a, const void *b)
+{
+	const struct hash_entry *he_a = (const struct hash_entry *) a;
+	const struct hash_entry *he_b = (const struct hash_entry *) b;
+	unsigned int he_a_len, he_b_len, min_len;
+	int ret;
+
+	he_a_len = ext2fs_dirent_name_len(he_a->dir);
+	he_b_len = ext2fs_dirent_name_len(he_b->dir);
+
+	ret = ext2fs_casefold_cmp(ctx->tbl, he_a->dir->name, he_a_len,
+				  he_b->dir->name, he_b_len);
+	if (ret == 0) {
+		if (he_a_len > he_b_len)
+			ret = 1;
+		else if (he_a_len < he_b_len)
+			ret = -1;
+		else
+			ret = he_b->dir->inode - he_a->dir->inode;
+	}
+	return ret;
+}
+
+
 /* Used for sorting the hash entry */
-static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
+static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b, void *arg)
 {
+	const struct name_cmp_ctx *ctx = (struct name_cmp_ctx *) arg;
 	const struct hash_entry *he_a = (const struct hash_entry *) a;
 	const struct hash_entry *he_b = (const struct hash_entry *) b;
 	int	ret;
@@ -256,8 +299,12 @@ static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
 			ret = 1;
 		else if (he_a->minor_hash < he_b->minor_hash)
 			ret = -1;
-		else
-			ret = name_cmp(a, b);
+		else {
+			if (ctx->casefold)
+				ret = name_cf_cmp(ctx, a, b);
+			else
+				ret = name_cmp(a, b);
+		}
 	}
 	return ret;
 }
@@ -380,7 +427,8 @@ static void mutate_name(char *str, unsigned int *len)
 
 static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 				    ext2_ino_t ino,
-				    struct fill_dir_struct *fd)
+				    struct fill_dir_struct *fd,
+				    const struct name_cmp_ctx *cmp_ctx)
 {
 	struct problem_context	pctx;
 	struct hash_entry	*ent, *prev;
@@ -403,11 +451,12 @@ static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 		ent = fd->harray + i;
 		prev = ent - 1;
 		if (!ent->dir->inode ||
-		    (ext2fs_dirent_name_len(ent->dir) !=
-		     ext2fs_dirent_name_len(prev->dir)) ||
-		    memcmp(ent->dir->name, prev->dir->name,
-			     ext2fs_dirent_name_len(ent->dir)))
+		    !same_name(cmp_ctx, ent->dir->name,
+			       ext2fs_dirent_name_len(ent->dir),
+			       prev->dir->name,
+			       ext2fs_dirent_name_len(prev->dir)))
 			continue;
+
 		pctx.dirent = ent->dir;
 		if ((ent->dir->inode == prev->dir->inode) &&
 		    fix_problem(ctx, PR_2_DUPLICATE_DIRENT, &pctx)) {
@@ -426,10 +475,11 @@ static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 		mutate_name(new_name, &new_len);
 		for (j=0; j < fd->num_array; j++) {
 			if ((i==j) ||
-			    (new_len !=
-			     (unsigned) ext2fs_dirent_name_len(fd->harray[j].dir)) ||
-			    memcmp(new_name, fd->harray[j].dir->name, new_len))
+			    !same_name(cmp_ctx, new_name, new_len,
+				       fd->harray[j].dir->name,
+				       ext2fs_dirent_name_len(fd->harray[j].dir))) {
 				continue;
+			}
 			mutate_name(new_name, &new_len);
 
 			j = -1;
@@ -894,6 +944,7 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino,
 	struct fill_dir_struct	fd = { NULL, NULL, 0, 0, 0, NULL,
 				       0, 0, 0, 0, 0, 0 };
 	struct out_dir		outdir = { 0, 0, 0, 0 };
+	struct name_cmp_ctx name_cmp_ctx = {0, NULL};
 
 	e2fsck_read_inode(ctx, ino, &inode, "rehash_dir");
 
@@ -921,6 +972,11 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino,
 		fd.compress = 1;
 	fd.parent = 0;
 
+	if (fs->encoding && (inode.i_flags & EXT4_CASEFOLD_FL)) {
+		name_cmp_ctx.casefold = 1;
+		name_cmp_ctx.tbl = fs->encoding;
+	}
+
 retry_nohash:
 	/* Read in the entire directory into memory */
 	retval = ext2fs_block_iterate3(fs, ino, 0, 0,
@@ -949,16 +1005,16 @@ retry_nohash:
 	/* Sort the list */
 resort:
 	if (fd.compress && fd.num_array > 1)
-		qsort(fd.harray+2, fd.num_array-2, sizeof(struct hash_entry),
-		      hash_cmp);
+		qsort_r(fd.harray+2, fd.num_array-2, sizeof(struct hash_entry),
+			hash_cmp, &name_cmp_ctx);
 	else
-		qsort(fd.harray, fd.num_array, sizeof(struct hash_entry),
-		      hash_cmp);
+		qsort_r(fd.harray, fd.num_array, sizeof(struct hash_entry),
+			hash_cmp, &name_cmp_ctx);
 
 	/*
 	 * Look for duplicates
 	 */
-	if (duplicate_search_and_fix(ctx, fs, ino, &fd))
+	if (duplicate_search_and_fix(ctx, fs, ino, &fd, &name_cmp_ctx))
 		goto resort;
 
 	if (ctx->options & E2F_OPT_NO) {
-- 
2.29.2


  parent reply	other threads:[~2020-12-10 15:06 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-10 15:03 [PATCH RESEND v2 00/12] e2fsprogs: improve case-insensitive fs Arnaud Ferraris
2020-12-10 15:03 ` [PATCH RESEND v2 01/12] tune2fs: Allow enabling casefold feature after fs creation Arnaud Ferraris
2021-01-27 22:42   ` Theodore Ts'o
2020-12-10 15:03 ` [PATCH RESEND v2 02/12] tune2fs: Fix casefold+encrypt error message Arnaud Ferraris
2021-01-27 22:46   ` Theodore Ts'o
2020-12-10 15:03 ` [PATCH RESEND v2 03/12] ext2fs: Add method to validate casefolded strings Arnaud Ferraris
2021-01-28  2:48   ` Theodore Ts'o
2020-12-10 15:03 ` [PATCH RESEND v2 04/12] ext2fs: Implement faster CI comparison of strings Arnaud Ferraris
2021-01-28  2:49   ` Theodore Ts'o
2020-12-10 15:03 ` [PATCH RESEND v2 05/12] e2fsck: add new problem for casefolded name check Arnaud Ferraris
2020-12-10 20:36   ` Gabriel Krisman Bertazi
2020-12-10 20:38   ` Gabriel Krisman Bertazi
2020-12-10 15:03 ` [PATCH RESEND v2 06/12] e2fsck: Fix entries with invalid encoded characters Arnaud Ferraris
2020-12-10 20:51   ` Gabriel Krisman Bertazi
2020-12-15 17:16     ` Arnaud Ferraris
2020-12-10 15:03 ` Arnaud Ferraris [this message]
2020-12-10 20:53   ` [PATCH RESEND v2 07/12] e2fsck: Support casefold directories when rehashing Gabriel Krisman Bertazi
2020-12-15 17:17     ` Arnaud Ferraris
2020-12-15 17:34       ` Gabriel Krisman Bertazi
2020-12-10 15:03 ` [PATCH RESEND v2 08/12] dict: Support comparison with context Arnaud Ferraris
2020-12-10 15:03 ` [PATCH RESEND v2 09/12] e2fsck: Detect duplicated casefolded direntries for rehash Arnaud Ferraris
2020-12-10 15:03 ` [PATCH RESEND v2 10/12] e2fsck: Add option to force encoded filename verification Arnaud Ferraris
2020-12-10 20:48   ` Gabriel Krisman Bertazi
2020-12-10 15:03 ` [PATCH RESEND v2 11/12] e2fsck.8.in: Document check_encoding extended option Arnaud Ferraris
2020-12-10 15:03 ` [PATCH RESEND v2 12/12] tests: f_bad_fname: Test fixes of invalid filenames and duplicates Arnaud Ferraris
2021-01-28  2:52 ` [PATCH RESEND v2 00/12] e2fsprogs: improve case-insensitive fs Theodore Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201210150353.91843-8-arnaud.ferraris@collabora.com \
    --to=arnaud.ferraris@collabora.com \
    --cc=drosen@google.com \
    --cc=ebiggers@kernel.org \
    --cc=krisman@collabora.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.