All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] e2fsck: set dir_nlink feature if large dir exists
@ 2017-08-30  7:47 Andreas Dilger
  2017-08-30  7:47 ` [PATCH 2/2] ext2fs: improve expand_dir performance Andreas Dilger
  0 siblings, 1 reply; 2+ messages in thread
From: Andreas Dilger @ 2017-08-30  7:47 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, Andreas Dilger

If there is a directory with more than EXT2_LINK_MAX (65000)
subdirectories, but the DIR_NLINK feature is not set in the
superblock, the feature should be set before continuing on
to change the on-disk directory link count to 1.

While most filesystems should have DIR_NLINK set (it was set
by default for all ext4 filesystems, and the kernel before
4.12 automatically set it if the directory link count grew
too large), it is possible that this flag is lost due to disk
corruption or for an upgraded filesystem.  We no longer want
the kernel to automatically enable this feature.

Addresses: https://bugzilla.kernel.org/show_bug.cgi?id=196405
Signed-off-by: Andreas Dilger <adilger@dilger.ca>
---
 e2fsck/pass4.c           | 12 ++++++++-
 e2fsck/problem.c         |  5 ++++
 e2fsck/problem.h         |  3 +++
 tests/f_large_dir/expect |  7 +++--
 tests/f_large_dir/script | 67 +++++++++++++++++++++++++++++++-----------------
 5 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/e2fsck/pass4.c b/e2fsck/pass4.c
index 663f87a..d0ff8e9 100644
--- a/e2fsck/pass4.c
+++ b/e2fsck/pass4.c
@@ -170,6 +170,7 @@ void e2fsck_pass4(e2fsck_t ctx)
 #endif
 	struct problem_context	pctx;
 	__u16	link_count, link_counted;
+	int dir_nlink_fs;
 	char	*buf = 0;
 	dgrp_t	group, maxgroup;
 
@@ -193,6 +194,8 @@ void e2fsck_pass4(e2fsck_t ctx)
 	if (!(ctx->options & E2F_OPT_PREEN))
 		fix_problem(ctx, PR_4_PASS_HEADER, &pctx);
 
+	dir_nlink_fs = ext2fs_has_feature_dir_nlink(fs->super);
+
 	group = 0;
 	maxgroup = fs->group_desc_count;
 	if (ctx->progress)
@@ -249,8 +252,15 @@ void e2fsck_pass4(e2fsck_t ctx)
 					    &link_counted);
 		}
 		isdir = ext2fs_test_inode_bitmap2(ctx->inode_dir_map, i);
-		if (isdir && (link_counted > EXT2_LINK_MAX))
+		if (isdir && (link_counted > EXT2_LINK_MAX)) {
+			if (!dir_nlink_fs &&
+			    fix_problem(ctx, PR_4_DIR_NLINK_FEATURE, &pctx)) {
+				ext2fs_set_feature_dir_nlink(fs->super);
+				ext2fs_mark_super_dirty(fs);
+				dir_nlink_fs = 1;
+			}
 			link_counted = 1;
+		}
 		if (link_counted != link_count) {
 			e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
 					       inode_size, "pass4");
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index 9706933..25c1de9 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1873,6 +1873,11 @@ static struct e2fsck_problem problem_table[] = {
 	  N_("@a @i %i ref count is %N, @s %n. "),
 	  PROMPT_FIX, PR_PREEN_OK },
 
+	/* directory exceeds max links, but no DIR_NLINK feature in superblock*/
+	{ PR_4_DIR_NLINK_FEATURE,
+	  N_("@d exceeds max links, but no DIR_NLINK feature in @S.\n"),
+	  PROMPT_FIX, 0 },
+
 	/* Pass 5 errors */
 
 	/* Pass 5: Checking group summary information */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index f30f8f0..07ed0a7 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -1134,6 +1134,9 @@ struct problem_context {
 /* Extended attribute inode ref count wrong */
 #define PR_4_EA_INODE_REF_COUNT		0x040005
 
+/* directory exceeds max links, but no DIR_NLINK feature in superblock */
+#define PR_4_DIR_NLINK_FEATURE		0x040006
+
 /*
  * Pass 5 errors
  */
diff --git a/tests/f_large_dir/expect b/tests/f_large_dir/expect
index b099460..4b9ca6f 100644
--- a/tests/f_large_dir/expect
+++ b/tests/f_large_dir/expect
@@ -3,10 +3,13 @@ Pass 2: Checking directory structure
 Pass 3: Checking directory connectivity
 Pass 3A: Optimizing directories
 Pass 4: Checking reference counts
-Inode 13 ref count is 1, should be 47245.  Fix? yes
+Directory exceeds max links, but no DIR_NLINK feature in superblock.
+Fix? yes
+
+Inode 12 ref count is 65012, should be 1.  Fix? yes
 
 Pass 5: Checking group summary information
 
 test.img: ***** FILE SYSTEM WAS MODIFIED *****
-test.img: 13/115368 files (0.0% non-contiguous), 32817/460800 blocks
+test.img: 65023/65104 files (0.0% non-contiguous), 96668/100937 blocks
 Exit status is 1
diff --git a/tests/f_large_dir/script b/tests/f_large_dir/script
index 0b5fdff..b25e106 100644
--- a/tests/f_large_dir/script
+++ b/tests/f_large_dir/script
@@ -5,43 +5,64 @@ E2FSCK=../e2fsck/e2fsck
 NAMELEN=255
 DIRENT_SZ=8
 BLOCKSZ=1024
+INODESZ=128
 DIRENT_PER_LEAF=$((BLOCKSZ / (NAMELEN + DIRENT_SZ)))
 HEADER=32
 INDEX_SZ=8
 INDEX_L1=$(((BLOCKSZ - HEADER) / INDEX_SZ))
 INDEX_L2=$(((BLOCKSZ - DIRENT_SZ) / INDEX_SZ))
-ENTRIES=$((INDEX_L1 * INDEX_L2 * DIRENT_PER_LEAF))
+DIRBLK=$((2 + INDEX_L1 * INDEX_L2))
+ENTRIES=$((DIRBLK * DIRENT_PER_LEAF))
+EXT4_LINK_MAX=65000
+if [ $ENTRIES -lt $((EXT4_LINK_MAX + 10)) ]; then
+	ENTRIES=$((EXT4_LINK_MAX + 10))
+	DIRBLK=$((ENTRIES / DIRENT_PER_LEAF + 3))
+fi
+# directory leaf blocks plus inode count and 25% for the rest of the fs
+FSIZE=$(((DIRBLK + EXT4_LINK_MAX * ((BLOCKSZ + INODESZ) / BLOCKSZ)) * 5 / 4))
 
-cp /dev/null $OUT
-$MKE2FS -b 1024 -O large_dir,uninit_bg,dir_nlink -F $TMPFILE 460800 \
-	> /dev/null 2>&1
+> $OUT
+$MKE2FS -b 1024 -O large_dir,uninit_bg -N $((ENTRIES + 50)) \
+	-I $INODESZ -F $TMPFILE $FSIZE > $OUT 2>&1
+RC=$?
+if [ $RC -eq 0 ]; then
 {
-	echo "feature large_dir"
+	START=$SECONDS
 	echo "mkdir /foo"
 	echo "cd /foo"
-	touch foofile
-	echo "write foofile foofile"
+	touch $TMPFILE.tmp
+	echo "write $TMPFILE.tmp foofile"
 	i=0
-	while test $i  -lt $ENTRIES ; do
-	    if test $(( i % DIRENT_PER_LEAF )) -eq 0 ; then
-		echo "expand ./"
+	while test $i -lt $ENTRIES ; do
+	    if test $((i % DIRENT_PER_LEAF)) -eq 0; then
+	    	echo "expand ./"
 	    fi
-	    if test $(( i % 5000 )) -eq 0 -a $i -gt 0 ; then
-		>&2 echo "$test_name: $i processed"
+	    if test $((i % 5000)) -eq 0 -a $SECONDS -ne $START; then
+		ELAPSED=$((SECONDS - START))
+		RATE=$((i / ELAPSED))
+		echo "$test_name: $i processed in ${ELAPSED}s @ $RATE/s" >&2
+		START=$SECONDS
 	    fi
-	    printf "ln foofile %0255X\n" $i
-	    i=$(($i + 1))
+	    if test $i -lt $((EXT4_LINK_MAX + 10)); then
+		printf "mkdir d%0254u\n" $i
+	    else
+		printf "ln foofile f%0254u\n" $i
+	    fi
+	    i=$((i + 1))
 	done
-} | $DEBUGFS -w -f /dev/stdin $TMPFILE > /dev/null 2>&1

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2] ext2fs: improve expand_dir performance
  2017-08-30  7:47 [PATCH 1/2] e2fsck: set dir_nlink feature if large dir exists Andreas Dilger
@ 2017-08-30  7:47 ` Andreas Dilger
  0 siblings, 0 replies; 2+ messages in thread
From: Andreas Dilger @ 2017-08-30  7:47 UTC (permalink / raw)
  To: tytso; +Cc: linux-ext4, Andreas Dilger

Previously, ext2fs_expand_dir() in mke2fs and debugfs could
only add one block at a time after scanning the whole directory,
which was very slow if a large number of directory leaf blocks
are being added at once (in particular for the f_large_dir test).

Add a new ext2fs_expand_dir2() function that takes the number
of blocks to add to the directory and add them all at once, and
call that from mke2fs (to create lost+found) and debugfs (with
an optional 3rd argument to the "expand_dir" command).

Fix expand_dir_proc() to expand inline directories before trying
to add more blocks, to distinguish between adding blocks and
clusters, and not count added indirect/index blocks as leaves.

Have create_lost_and_found() round up to a full bigalloc chunk,
as there is little benefit if unused blocks in the same chunk
are left "free" since they can't be used for anything else.

This speeds up f_large_dir with 65000 files from 4232s to 4141s.

Signed-off-by: Andreas Dilger <adilger@dilger.ca>
---
 debugfs/debugfs.8.in     |  9 ++++++--
 debugfs/debugfs.c        | 23 +++++++++++++++++----
 lib/ext2fs/expanddir.c   | 53 ++++++++++++++++++++++++++++++++----------------
 lib/ext2fs/ext2fs.h      |  2 ++
 misc/mke2fs.c            | 27 +++++++++++++-----------
 tests/f_large_dir/script |  4 +---
 6 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/debugfs/debugfs.8.in b/debugfs/debugfs.8.in
index 87d487e..e3f54c1 100644
--- a/debugfs/debugfs.8.in
+++ b/debugfs/debugfs.8.in
@@ -316,9 +316,14 @@ Remove the extended attribute
 .I attr_name
 from the file \fIfilespec\fR.
 .TP
-.BI expand_dir " filespec"
+.B expand_dir
+.I filespec
+.RI [ blocks_to_add ]
 Expand the directory
-.IR filespec .
+.I filespec
+by
+.I blocks_to_add
+blocks, or 1 block if unspecified.
 .TP
 .BI fallocate " filespec start_block [end_block]
 Allocate and map uninitialized blocks into \fIfilespec\fR between
diff --git a/debugfs/debugfs.c b/debugfs/debugfs.c
index 0a4b536..868cbbd 100644
--- a/debugfs/debugfs.c
+++ b/debugfs/debugfs.c
@@ -1965,15 +1965,30 @@ void do_show_debugfs_params(int argc EXT2FS_ATTR((unused)),
 #ifndef READ_ONLY
 void do_expand_dir(int argc, char *argv[])
 {
-	ext2_ino_t inode;
+	ext2_ino_t ino;
+	int add_blocks;
 	int retval;
 
-	if (common_inode_args_process(argc, argv, &inode, CHECK_FS_RW))
+	if (common_args_process(argc, argv, 2, 3, argv[0],
+				"<file> [blocks_to_add]",
+				CHECK_FS_RW | CHECK_FS_BITMAPS))
+		return;
+
+	ino = string_to_inode(argv[1]);
+	if (!ino)
 		return;
 
-	retval = ext2fs_expand_dir(current_fs, inode);
+	if (argc == 3) {
+		add_blocks = parse_ulong(argv[2], argv[1], "blocks_to_add",
+					 &retval);
+		if (retval)
+			return;
+	} else {
+		add_blocks = 1;
+	}
+	retval = ext2fs_expand_dir2(current_fs, ino, add_blocks);
 	if (retval)
-		com_err("ext2fs_expand_dir", retval, 0);
+		com_err("ext2fs_expand_dir2", retval, 0);
 	return;
 }
 
diff --git a/lib/ext2fs/expanddir.c b/lib/ext2fs/expanddir.c
index 9f02312..d88861e 100644
--- a/lib/ext2fs/expanddir.c
+++ b/lib/ext2fs/expanddir.c
@@ -21,11 +21,13 @@
 #include "ext2fsP.h"
 
 struct expand_dir_struct {
-	int		done;
-	int		newblocks;
+	unsigned	done:1;
+	unsigned	clusters_added;
 	blk64_t		goal;
 	errcode_t	err;
 	ext2_ino_t	dir;
+	unsigned	blocks_to_add;
+	unsigned	blocks_added;
 };
 
 static int expand_dir_proc(ext2_filsys	fs,
@@ -35,7 +37,7 @@ static int expand_dir_proc(ext2_filsys	fs,
 			   int		ref_offset EXT2FS_ATTR((unused)),
 			   void		*priv_data)
 {
-	struct expand_dir_struct *es = (struct expand_dir_struct *) priv_data;
+	struct expand_dir_struct *es = priv_data;
 	blk64_t	new_blk;
 	char		*block;
 	errcode_t	retval;
@@ -46,30 +48,33 @@ static int expand_dir_proc(ext2_filsys	fs,
 		return 0;
 	}
 	if (blockcnt &&
-	    (EXT2FS_B2C(fs, es->goal) == EXT2FS_B2C(fs, es->goal+1)))
-		new_blk = es->goal+1;
-	else {
+	    (EXT2FS_B2C(fs, es->goal) == EXT2FS_B2C(fs, es->goal + 1))) {
+		new_blk = es->goal + 1;
+	} else {
 		es->goal &= ~EXT2FS_CLUSTER_MASK(fs);
 		retval = ext2fs_new_block2(fs, es->goal, 0, &new_blk);
 		if (retval) {
 			es->err = retval;
 			return BLOCK_ABORT;
 		}
-		es->newblocks++;
+		es->clusters_added++;
 		ext2fs_block_alloc_stats2(fs, new_blk, +1);
 	}
 	if (blockcnt > 0) {
+		es->blocks_added++;
 		retval = ext2fs_new_dir_block(fs, 0, 0, &block);
 		if (retval) {
 			es->err = retval;
 			return BLOCK_ABORT;
 		}
-		es->done = 1;
+		es->done = (es->blocks_added >= es->blocks_to_add);
 		retval = ext2fs_write_dir_block4(fs, new_blk, block, 0,
 						 es->dir);
 		ext2fs_free_mem(&block);
-	} else
+	} else {
+		/* indirect or index block */
 		retval = ext2fs_zero_blocks2(fs, new_blk, 1, NULL, NULL);
+	}
 	if (blockcnt >= 0)
 		es->goal = new_blk;
 	if (retval) {
@@ -84,10 +89,11 @@ static int expand_dir_proc(ext2_filsys	fs,
 		return BLOCK_CHANGED;
 }
 
-errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir)
+errcode_t ext2fs_expand_dir2(ext2_filsys fs, ext2_ino_t dir,
+			     unsigned blocks_to_add)
 {
 	errcode_t	retval;
-	struct expand_dir_struct es;
+	struct expand_dir_struct es = { 0 };
 	struct ext2_inode	inode;
 
 	EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
@@ -98,6 +104,9 @@ errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir)
 	if (!fs->block_map)
 		return EXT2_ET_NO_BLOCK_BITMAP;
 
+	if (blocks_to_add > fs->super->s_free_blocks_count)
+		return EXT2_ET_DIR_NO_SPACE;
+
 	retval = ext2fs_check_directory(fs, dir);
 	if (retval)
 		return retval;
@@ -106,16 +115,19 @@ errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir)
 	if (retval)
 		return retval;
 
-	es.done = 0;
-	es.err = 0;
+	/* expand inode inline data before starting iteration */
+	if (inode.i_flags & EXT4_INLINE_DATA_FL) {
+		retval = ext2fs_inline_data_expand(fs, dir);
+		if (retval)
+			return retval;
+		blocks_to_add--;
+	}
 	es.goal = ext2fs_find_inode_goal(fs, dir, &inode, 0);
-	es.newblocks = 0;
 	es.dir = dir;
+	es.blocks_to_add = blocks_to_add;
 
 	retval = ext2fs_block_iterate3(fs, dir, BLOCK_FLAG_APPEND,
 				       0, expand_dir_proc, &es);
-	if (retval == EXT2_ET_INLINE_DATA_CANT_ITERATE)
-		return ext2fs_inline_data_expand(fs, dir);
 
 	if (es.err)
 		return es.err;
@@ -129,8 +141,8 @@ errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir)
 	if (retval)
 		return retval;
 
-	inode.i_size += fs->blocksize;
-	ext2fs_iblk_add_blocks(fs, &inode, es.newblocks);
+	inode.i_size += fs->blocksize * es.blocks_added;
+	ext2fs_iblk_add_blocks(fs, &inode, es.clusters_added);
 
 	retval = ext2fs_write_inode(fs, dir, &inode);
 	if (retval)
@@ -138,3 +150,8 @@ errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir)
 
 	return 0;
 }
+
+errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir)
+{
+	return ext2fs_expand_dir2(fs, dir, 1);
+}
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index b734f1a..0267660 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1194,6 +1194,8 @@ extern errcode_t ext2fs_dup_handle(ext2_filsys src, ext2_filsys *dest);
 
 /* expanddir.c */
 extern errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir);
+extern errcode_t ext2fs_expand_dir2(ext2_filsys fs, ext2_ino_t dir,
+				    unsigned blocks_to_add);
 
 /* ext_attr.c */
 extern __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry,
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 85d88ed..2a83040 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -507,18 +507,21 @@ static void create_lost_and_found(ext2_filsys fs)
 		exit(1);
 	}
 
-	for (i=1; i < EXT2_NDIR_BLOCKS; i++) {
-		/* Ensure that lost+found is at least 2 blocks, so we always
-		 * test large empty blocks for big-block filesystems.  */
-		if ((lpf_size += fs->blocksize) >= 16*1024 &&
-		    lpf_size >= 2 * fs->blocksize)
-			break;
-		retval = ext2fs_expand_dir(fs, ino);
-		if (retval) {
-			com_err("ext2fs_expand_dir", retval, "%s",
-				_("while expanding /lost+found"));
-			exit(1);
-		}
+	/* Ensure that lost+found is at least 2 blocks, so we always
+	 * test large empty blocks for big-block filesystems.  */
+	lpf_size = EXT2_NDIR_BLOCKS;
+	if (lpf_size * fs->blocksize > 16 * 1024)
+		lpf_size = 16 * 1024 / fs->blocksize;
+	if (lpf_size < 2)
+		lpf_size = 2;
+
+	/* round up size to full cluster, no point in making it smaller */
+	lpf_size = EXT2FS_C2B(fs, EXT2FS_B2C(fs, lpf_size - 1) + 1);
+	retval = ext2fs_expand_dir2(fs, ino, lpf_size - 1 /* initial block */);
+	if (retval) {
+		com_err("ext2fs_expand_dir", retval, "%s",
+			_("while expanding /lost+found"));
+		exit(1);
 	}
 }
 
diff --git a/tests/f_large_dir/script b/tests/f_large_dir/script
index b25e106..1824778 100644
--- a/tests/f_large_dir/script
+++ b/tests/f_large_dir/script
@@ -32,11 +32,9 @@ if [ $RC -eq 0 ]; then
 	echo "cd /foo"
 	touch $TMPFILE.tmp
 	echo "write $TMPFILE.tmp foofile"
+	echo "expand ./ $((DIRBLK))"
 	i=0
 	while test $i -lt $ENTRIES ; do
-	    if test $((i % DIRENT_PER_LEAF)) -eq 0; then
-	    	echo "expand ./"
-	    fi
 	    if test $((i % 5000)) -eq 0 -a $SECONDS -ne $START; then
 		ELAPSED=$((SECONDS - START))
 		RATE=$((i / ELAPSED))
-- 
1.8.0

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-08-30  7:47 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-30  7:47 [PATCH 1/2] e2fsck: set dir_nlink feature if large dir exists Andreas Dilger
2017-08-30  7:47 ` [PATCH 2/2] ext2fs: improve expand_dir performance Andreas Dilger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.