linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Gabriel Krisman Bertazi <krisman@collabora.com>
To: tytso@mit.edu
Cc: linux-ext4@vger.kernel.org,
	Gabriel Krisman Bertazi <krisman@collabora.com>
Subject: [PATCH e2fsprogs 06/11] e2fsck: Support casefold directories when rehashing
Date: Wed, 25 Mar 2020 17:18:06 -0400	[thread overview]
Message-ID: <20200325211812.2971787-7-krisman@collabora.com> (raw)
In-Reply-To: <20200325211812.2971787-1-krisman@collabora.com>

When rehashing a +F directory, the casefold comparison needs to be
performed, in order to identify duplicated filenames.  Like the -F
version, This is done in two steps, first adapt the qsort comparison to
consider casefolded directories, and then iterate over the sorted list
fixing dups.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 e2fsck/rehash.c | 88 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 16 deletions(-)

diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index 54bc680388c3..9254e2810ad2 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -211,6 +211,23 @@ static EXT2_QSORT_TYPE ino_cmp(const void *a, const void *b)
 	return (he_a->ino - he_b->ino);
 }
 
+struct name_cmp_ctx
+{
+	int casefold;
+	const struct ext2fs_nls_table *tbl;
+};
+
+
+static int same_name(const struct name_cmp_ctx *cmp_ctx, char *s1,
+		     int len1, char *s2, int len2)
+{
+	if (!cmp_ctx->casefold)
+		return (len1 == len2 &&	!memcmp(s1, s2, len1));
+	else
+		return !ext2fs_casefold_cmp(cmp_ctx->tbl,
+					    s1, len1, s2, len2);
+}
+
 /* Used for sorting the hash entry */
 static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
 {
@@ -237,9 +254,35 @@ static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
 	return ret;
 }
 
+static EXT2_QSORT_TYPE name_cf_cmp(const struct name_cmp_ctx *ctx,
+				   const void *a, const void *b)
+{
+	const struct hash_entry *he_a = (const struct hash_entry *) a;
+	const struct hash_entry *he_b = (const struct hash_entry *) b;
+	unsigned int he_a_len, he_b_len, min_len;
+	int ret;
+
+	he_a_len = ext2fs_dirent_name_len(he_a->dir);
+	he_b_len = ext2fs_dirent_name_len(he_b->dir);
+
+	ret = ext2fs_casefold_cmp(ctx->tbl, he_a->dir->name, he_a_len,
+				  he_b->dir->name, he_b_len);
+	if (ret == 0) {
+		if (he_a_len > he_b_len)
+			ret = 1;
+		else if (he_a_len < he_b_len)
+			ret = -1;
+		else
+			ret = he_b->dir->inode - he_a->dir->inode;
+	}
+	return ret;
+}
+
+
 /* Used for sorting the hash entry */
-static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
+static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b, void *arg)
 {
+	const struct name_cmp_ctx *ctx = (struct name_cmp_ctx *) arg;
 	const struct hash_entry *he_a = (const struct hash_entry *) a;
 	const struct hash_entry *he_b = (const struct hash_entry *) b;
 	int	ret;
@@ -253,8 +296,12 @@ static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
 			ret = 1;
 		else if (he_a->minor_hash < he_b->minor_hash)
 			ret = -1;
-		else
-			ret = name_cmp(a, b);
+		else {
+			if (ctx->casefold)
+				ret = name_cf_cmp(ctx, a, b);
+			else
+				ret = name_cmp(a, b);
+		}
 	}
 	return ret;
 }
@@ -376,7 +423,8 @@ static void mutate_name(char *str, unsigned int *len)
 
 static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 				    ext2_ino_t ino,
-				    struct fill_dir_struct *fd)
+				    struct fill_dir_struct *fd,
+				    const struct name_cmp_ctx *cmp_ctx)
 {
 	struct problem_context	pctx;
 	struct hash_entry 	*ent, *prev;
@@ -399,11 +447,12 @@ static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 		ent = fd->harray + i;
 		prev = ent - 1;
 		if (!ent->dir->inode ||
-		    (ext2fs_dirent_name_len(ent->dir) !=
-		     ext2fs_dirent_name_len(prev->dir)) ||
-		    memcmp(ent->dir->name, prev->dir->name,
-			     ext2fs_dirent_name_len(ent->dir)))
+		    !same_name(cmp_ctx, ent->dir->name,
+			       ext2fs_dirent_name_len(ent->dir),
+			       prev->dir->name,
+			       ext2fs_dirent_name_len(prev->dir)))
 			continue;
+
 		pctx.dirent = ent->dir;
 		if ((ent->dir->inode == prev->dir->inode) &&
 		    fix_problem(ctx, PR_2_DUPLICATE_DIRENT, &pctx)) {
@@ -422,10 +471,11 @@ static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 		mutate_name(new_name, &new_len);
 		for (j=0; j < fd->num_array; j++) {
 			if ((i==j) ||
-			    (new_len !=
-			     (unsigned) ext2fs_dirent_name_len(fd->harray[j].dir)) ||
-			    memcmp(new_name, fd->harray[j].dir->name, new_len))
+			    !same_name(cmp_ctx, new_name, new_len,
+				       fd->harray[j].dir->name,
+				       ext2fs_dirent_name_len(fd->harray[j].dir))) {
 				continue;
+			}
 			mutate_name(new_name, &new_len);
 
 			j = -1;
@@ -890,6 +940,7 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino,
 	struct fill_dir_struct	fd = { NULL, NULL, 0, 0, 0, NULL,
 				       0, 0, 0, 0, 0, 0 };
 	struct out_dir		outdir = { 0, 0, 0, 0 };
+	struct name_cmp_ctx name_cmp_ctx = {0, NULL};
 
 	e2fsck_read_inode(ctx, ino, &inode, "rehash_dir");
 
@@ -917,6 +968,11 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino,
 		fd.compress = 1;
 	fd.parent = 0;
 
+	if (fs->encoding && (inode.i_flags & EXT4_CASEFOLD_FL)) {
+		name_cmp_ctx.casefold = 1;
+		name_cmp_ctx.tbl = fs->encoding;
+	}
+
 retry_nohash:
 	/* Read in the entire directory into memory */
 	retval = ext2fs_block_iterate3(fs, ino, 0, 0,
@@ -945,16 +1001,16 @@ retry_nohash:
 	/* Sort the list */
 resort:
 	if (fd.compress && fd.num_array > 1)
-		qsort(fd.harray+2, fd.num_array-2, sizeof(struct hash_entry),
-		      hash_cmp);
+		qsort_r(fd.harray+2, fd.num_array-2, sizeof(struct hash_entry),
+			hash_cmp, &name_cmp_ctx);
 	else
-		qsort(fd.harray, fd.num_array, sizeof(struct hash_entry),
-		      hash_cmp);
+		qsort_r(fd.harray, fd.num_array, sizeof(struct hash_entry),
+			hash_cmp, &name_cmp_ctx);
 
 	/*
 	 * Look for duplicates
 	 */
-	if (duplicate_search_and_fix(ctx, fs, ino, &fd))
+	if (duplicate_search_and_fix(ctx, fs, ino, &fd, &name_cmp_ctx))
 		goto resort;
 
 	if (ctx->options & E2F_OPT_NO) {
-- 
2.25.0


  parent reply	other threads:[~2020-03-25 21:18 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-25 21:18 [PATCH e2fsprogs 00/11] Improvements for Case-insensitive handling Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 01/11] tune2fs: Allow enabling casefold feature after fs creation Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 02/11] tune2fs: Fix casefold+encrypt error message Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 03/11] ext2fs: Add method to validate casefolded strings Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 04/11] ext2fs: Implement faster CI comparison of strings Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 05/11] e2fsck: Fix entries with invalid encoded characters Gabriel Krisman Bertazi
2020-03-25 21:18 ` Gabriel Krisman Bertazi [this message]
2020-03-25 21:18 ` [PATCH e2fsprogs 07/11] dict: Support comparison with context Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 08/11] e2fsck: Detect duplicated casefolded direntries for rehash Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 09/11] e2fsck: Add option to force encoded filename verification Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 10/11] e2fsck.8.in: Document check_encoding extended option Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 11/11] tests: f_bad_fname: Test fixes of invalid filenames and duplicates Gabriel Krisman Bertazi
2020-03-25 21:18 ` [PATCH e2fsprogs 11/11] tests: f_bad_fname: Validate fix " Gabriel Krisman Bertazi
2020-03-26 17:25   ` Gabriel Krisman Bertazi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200325211812.2971787-7-krisman@collabora.com \
    --to=krisman@collabora.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).