Linux-ext4 Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap()
@ 2019-11-20  4:35 Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 2/5] mke2fs: fix setting bad blocks in the block bitmap Li Dongyang
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Li Dongyang @ 2019-11-20  4:35 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger

For a bigalloc filesystem, converting the block bitmap from blocks
to chunks in ext2fs_convert_subcluster_bitmap() can take a long time
when the device is huge, because we test the bitmap
bit-by-bit using ext2fs_test_block_bitmap2().
Use ext2fs_find_first_set_block_bitmap2() which is more efficient
for mke2fs when the fs is mostly empty.

e2fsck can also benefit from this during pass1 block scanning.

Time taken for "mke2fs -O bigalloc,extent -C 131072 -b 4096" on a 1PB
device:

without patch:
real    27m49.457s
user    21m36.474s
sys     6m9.514s

with patch:
real    6m31.908s
user    0m1.806s
sys    6m29.697s

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 lib/ext2fs/gen_bitmap64.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/lib/ext2fs/gen_bitmap64.c b/lib/ext2fs/gen_bitmap64.c
index 6e4d8b71..f1dd1891 100644
--- a/lib/ext2fs/gen_bitmap64.c
+++ b/lib/ext2fs/gen_bitmap64.c
@@ -799,8 +799,7 @@ errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
 	ext2fs_generic_bitmap_64 bmap, cmap;
 	ext2fs_block_bitmap	gen_bmap = *bitmap, gen_cmap;
 	errcode_t		retval;
-	blk64_t			i, b_end, c_end;
-	int			n, ratio;
+	blk64_t			i, next, b_end, c_end;
 
 	bmap = (ext2fs_generic_bitmap_64) gen_bmap;
 	if (fs->cluster_ratio_bits == ext2fs_get_bitmap_granularity(gen_bmap))
@@ -817,18 +816,13 @@ errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
 	bmap->end = bmap->real_end;
 	c_end = cmap->end;
 	cmap->end = cmap->real_end;
-	n = 0;
-	ratio = 1 << fs->cluster_ratio_bits;
 	while (i < bmap->real_end) {
-		if (ext2fs_test_block_bitmap2(gen_bmap, i)) {
-			ext2fs_mark_block_bitmap2(gen_cmap, i);
-			i += ratio - n;
-			n = 0;
-			continue;
-		}
-		i++; n++;
-		if (n >= ratio)
-			n = 0;
+		retval = ext2fs_find_first_set_block_bitmap2(gen_bmap,
+						i, bmap->real_end, &next);
+		if (retval)
+			break;
+		ext2fs_mark_block_bitmap2(gen_cmap, next);
+		i = EXT2FS_C2B(fs, EXT2FS_B2C(fs, next) + 1);
 	}
 	bmap->end = b_end;
 	cmap->end = c_end;
-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3 2/5] mke2fs: fix setting bad blocks in the block bitmap
  2019-11-20  4:35 [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Li Dongyang
@ 2019-11-20  4:35 ` Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 3/5] ext2fs: rename "s_overhead_blocks" to "s_overhead_clusters" Li Dongyang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Li Dongyang @ 2019-11-20  4:35 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger

We mark the bad blocks as used on fs->block_map
before allocating group tables.
Don't translate the block number to cluster number
when doing this, the fs->block_map is still a
block-granularity allocation map, it will be coverted
later by ext2fs_convert_subcluster_bitmap().

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
---
 misc/mke2fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index ffea8233..be38d2c4 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -344,7 +344,7 @@ _("Warning: the backup superblock/group descriptors at block %u contain\n"
 		exit(1);
 	}
 	while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
-		ext2fs_mark_block_bitmap2(fs->block_map, EXT2FS_B2C(fs, blk));
+		ext2fs_mark_block_bitmap2(fs->block_map, blk);
 	ext2fs_badblocks_list_iterate_end(bb_iter);
 }
 
-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3 3/5] ext2fs: rename "s_overhead_blocks" to "s_overhead_clusters"
  2019-11-20  4:35 [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 2/5] mke2fs: fix setting bad blocks in the block bitmap Li Dongyang
@ 2019-11-20  4:35 ` Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 4/5] mke2fs: set overhead in super block Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 5/5] ext4: record " Li Dongyang
  3 siblings, 0 replies; 5+ messages in thread
From: Li Dongyang @ 2019-11-20  4:35 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger

Rename s_overhead_blocks field from struct ext2_super_block to
make it consistent with the kernel counterpart.

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 debugfs/set_fields.c        | 2 +-
 lib/e2p/ls.c                | 6 +++---
 lib/ext2fs/ext2_fs.h        | 2 +-
 lib/ext2fs/swapfs.c         | 2 +-
 lib/ext2fs/tst_super_size.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
index 5142554d..f497bd92 100644
--- a/debugfs/set_fields.c
+++ b/debugfs/set_fields.c
@@ -160,7 +160,7 @@ static struct field_set_info super_fields[] = {
 	{ "usr_quota_inum", &set_sb.s_usr_quota_inum, NULL, 4, parse_uint },
 	{ "grp_quota_inum", &set_sb.s_grp_quota_inum, NULL, 4, parse_uint },
 	{ "prj_quota_inum", &set_sb.s_prj_quota_inum, NULL, 4, parse_uint },
-	{ "overhead_blocks", &set_sb.s_overhead_blocks, NULL, 4, parse_uint },
+	{ "overhead_clusters", &set_sb.s_overhead_clusters, NULL, 4, parse_uint },
 	{ "backup_bgs", &set_sb.s_backup_bgs[0], NULL, 4, parse_uint,
 	  FLAG_ARRAY, 2 },
 	{ "checksum", &set_sb.s_checksum, NULL, 4, parse_uint },
diff --git a/lib/e2p/ls.c b/lib/e2p/ls.c
index 5a446178..5ca750f6 100644
--- a/lib/e2p/ls.c
+++ b/lib/e2p/ls.c
@@ -272,9 +272,9 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
 	fprintf(f, "Inode count:              %u\n", sb->s_inodes_count);
 	fprintf(f, "Block count:              %llu\n", e2p_blocks_count(sb));
 	fprintf(f, "Reserved block count:     %llu\n", e2p_r_blocks_count(sb));
-	if (sb->s_overhead_blocks)
-		fprintf(f, "Overhead blocks:          %u\n",
-			sb->s_overhead_blocks);
+	if (sb->s_overhead_clusters)
+		fprintf(f, "Overhead clusters:          %u\n",
+			sb->s_overhead_clusters);
 	fprintf(f, "Free blocks:              %llu\n", e2p_free_blocks_count(sb));
 	fprintf(f, "Free inodes:              %u\n", sb->s_free_inodes_count);
 	fprintf(f, "First block:              %u\n", sb->s_first_data_block);
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 3165b389..7fa8406f 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -742,7 +742,7 @@ struct ext2_super_block {
 /*200*/	__u8	s_mount_opts[64];
 /*240*/	__u32	s_usr_quota_inum;	/* inode number of user quota file */
 	__u32	s_grp_quota_inum;	/* inode number of group quota file */
-	__u32	s_overhead_blocks;	/* overhead blocks/clusters in fs */
+	__u32	s_overhead_clusters;	/* overhead blocks/clusters in fs */
 /*24c*/	__u32	s_backup_bgs[2];	/* If sparse_super2 enabled */
 /*254*/	__u8	s_encrypt_algos[4];	/* Encryption algorithms in use  */
 /*258*/	__u8	s_encrypt_pw_salt[16];	/* Salt used for string2key algorithm */
diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
index a1560045..63b24330 100644
--- a/lib/ext2fs/swapfs.c
+++ b/lib/ext2fs/swapfs.c
@@ -121,7 +121,7 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
 	/* sb->s_mount_opts is __u8 and does not need swabbing */
 	sb->s_usr_quota_inum = ext2fs_swab32(sb->s_usr_quota_inum);
 	sb->s_grp_quota_inum = ext2fs_swab32(sb->s_grp_quota_inum);
-	sb->s_overhead_blocks = ext2fs_swab32(sb->s_overhead_blocks);
+	sb->s_overhead_clusters = ext2fs_swab32(sb->s_overhead_clusters);
 	sb->s_backup_bgs[0] = ext2fs_swab32(sb->s_backup_bgs[0]);
 	sb->s_backup_bgs[1] = ext2fs_swab32(sb->s_backup_bgs[1]);
 	/* sb->s_encrypt_algos is __u8 and does not need swabbing */
diff --git a/lib/ext2fs/tst_super_size.c b/lib/ext2fs/tst_super_size.c
index a932685d..ab38dd59 100644
--- a/lib/ext2fs/tst_super_size.c
+++ b/lib/ext2fs/tst_super_size.c
@@ -135,7 +135,7 @@ int main(int argc, char **argv)
 	check_field(s_mount_opts, 64);
 	check_field(s_usr_quota_inum, 4);
 	check_field(s_grp_quota_inum, 4);
-	check_field(s_overhead_blocks, 4);
+	check_field(s_overhead_clusters, 4);
 	check_field(s_backup_bgs, 8);
 	check_field(s_encrypt_algos, 4);
 	check_field(s_encrypt_pw_salt, 16);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3 4/5] mke2fs: set overhead in super block
  2019-11-20  4:35 [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 2/5] mke2fs: fix setting bad blocks in the block bitmap Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 3/5] ext2fs: rename "s_overhead_blocks" to "s_overhead_clusters" Li Dongyang
@ 2019-11-20  4:35 ` Li Dongyang
  2019-11-20  4:35 ` [PATCH v3 5/5] ext4: record " Li Dongyang
  3 siblings, 0 replies; 5+ messages in thread
From: Li Dongyang @ 2019-11-20  4:35 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger

If overhead is not recorded in the super block, it is caculated
during mount in kernel, for bigalloc file systems the it takes
O(groups**2) in time.
For a 1PB deivce with 32K cluste size it takes ~12 mins to
mount, with most of the time spent on figuring out overhead.

While we can not improve the overhead algorithm in kernel
due to the nature of bigalloc, we can work out the overhead
during mke2fs and set it in the super block, avoiding calculating
it every time when it mounts.

Overhead is s_first_data_block plus internal journal blocks plus
the block and inode bitmaps, inode table, super block backups and
group descriptor blocks for every group. This patch introduces
ext2fs_count_used_clusters(), which calculates the clusters used
in the block bitmap for the given range.

When bad blocks are involved, it gets tricky because the blocks
counted as overhead and the bad blocks can end up in the same
allocation cluster. In this case we will unmark the bad blocks from
the block bitmap, convert to cluster bitmap and get the overhead,
then mark the bad blocks back in the cluster bitmap.

Reset the overhead to zero when resizing, we can not simplly count
the used blocks as overhead like we do when mke2fs. The overhead
can be calculated by kernel side during mount.

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
---
 lib/ext2fs/ext2fs.h       |  2 ++
 lib/ext2fs/gen_bitmap64.c | 35 +++++++++++++++++++++++++++++
 misc/mke2fs.c             | 47 +++++++++++++++++++++++++++++++++++++++
 resize/resize2fs.c        |  1 +
 4 files changed, 85 insertions(+)

diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index da309947..78f84632 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1442,6 +1442,8 @@ errcode_t ext2fs_set_generic_bmap_range(ext2fs_generic_bitmap bmap,
 					void *in);
 errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
 					   ext2fs_block_bitmap *bitmap);
+errcode_t ext2fs_count_used_clusters(ext2_filsys fs, blk64_t start,
+				     blk64_t end, blk64_t *out);
 
 /* get_num_dirs.c */
 extern errcode_t ext2fs_get_num_dirs(ext2_filsys fs, ext2_ino_t *ret_num_dirs);
diff --git a/lib/ext2fs/gen_bitmap64.c b/lib/ext2fs/gen_bitmap64.c
index f1dd1891..b2370667 100644
--- a/lib/ext2fs/gen_bitmap64.c
+++ b/lib/ext2fs/gen_bitmap64.c
@@ -940,3 +940,38 @@ errcode_t ext2fs_find_first_set_generic_bmap(ext2fs_generic_bitmap bitmap,
 
 	return ENOENT;
 }
+
+errcode_t ext2fs_count_used_clusters(ext2_filsys fs, blk64_t start,
+				     blk64_t end, blk64_t *out)
+{
+	blk64_t		next;
+	blk64_t		tot_set = 0;
+	errcode_t	retval;
+
+	while (start < end) {
+		retval = ext2fs_find_first_set_block_bitmap2(fs->block_map,
+							start, end, &next);
+		if (retval) {
+			if (retval == ENOENT)
+				retval = 0;
+			break;
+		}
+		start = next;
+
+		retval = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
+							start, end, &next);
+		if (retval == 0) {
+			tot_set += next - start;
+			start  = next + 1;
+		} else if (retval == ENOENT) {
+			retval = 0;
+			tot_set += end - start + 1;
+			break;
+		} else
+			break;
+	}
+
+	if (!retval)
+		*out = EXT2FS_NUM_B2C(fs, tot_set);
+	return retval;
+}
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index be38d2c4..542a3030 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -2914,6 +2914,8 @@ int main (int argc, char *argv[])
 	errcode_t	retval = 0;
 	ext2_filsys	fs;
 	badblocks_list	bb_list = 0;
+	badblocks_iterate	bb_iter;
+	blk_t		blk;
 	unsigned int	journal_blocks = 0;
 	unsigned int	i, checkinterval;
 	int		max_mnt_count;
@@ -2924,6 +2926,7 @@ int main (int argc, char *argv[])
 	char		opt_string[40];
 	char		*hash_alg_str;
 	int		itable_zeroed = 0;
+	blk64_t		overhead;
 
 #ifdef ENABLE_NLS
 	setlocale(LC_MESSAGES, "");
@@ -3215,6 +3218,23 @@ int main (int argc, char *argv[])
 	if (!quiet)
 		printf("%s", _("done                            \n"));
 
+	/*
+	 * Unmark bad blocks to calculate overhead, because metadata
+	 * blocks and bad blocks can land on the same allocation cluster.
+	 */
+	if (bb_list) {
+		retval = ext2fs_badblocks_list_iterate_begin(bb_list,
+							     &bb_iter);
+		if (retval) {
+			com_err("ext2fs_badblocks_list_iterate_begin", retval,
+				"%s", _("while unmarking bad blocks"));
+			exit(1);
+		}
+		while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
+			ext2fs_unmark_block_bitmap2(fs->block_map, blk);
+		ext2fs_badblocks_list_iterate_end(bb_iter);
+	}
+
 	retval = ext2fs_convert_subcluster_bitmap(fs, &fs->block_map);
 	if (retval) {
 		com_err(program_name, retval, "%s",
@@ -3222,6 +3242,28 @@ int main (int argc, char *argv[])
 		exit(1);
 	}
 
+	retval = ext2fs_count_used_clusters(fs, fs->super->s_first_data_block,
+					ext2fs_blocks_count(fs->super) - 1,
+					&overhead);
+	if (retval) {
+		com_err(program_name, retval, "%s",
+			_("while calculating overhead"));
+		exit(1);
+	}
+
+	if (bb_list) {
+		retval = ext2fs_badblocks_list_iterate_begin(bb_list,
+							     &bb_iter);
+		if (retval) {
+			com_err("ext2fs_badblocks_list_iterate_begin", retval,
+				"%s", _("while marking bad blocks as used"));
+			exit(1);
+		}
+		while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
+			ext2fs_mark_block_bitmap2(fs->block_map, blk);
+		ext2fs_badblocks_list_iterate_end(bb_iter);
+	}
+
 	if (super_only) {
 		check_plausibility(device_name, CHECK_FS_EXIST, NULL);
 		printf(_("%s may be further corrupted by superblock rewrite\n"),
@@ -3319,6 +3361,7 @@ int main (int argc, char *argv[])
 		free(journal_device);
 	} else if ((journal_size) ||
 		   ext2fs_has_feature_journal(&fs_param)) {
+		overhead += EXT2FS_NUM_B2C(fs, journal_blocks);
 		if (super_only) {
 			printf("%s", _("Skipping journal creation in super-only mode\n"));
 			fs->super->s_journal_inum = EXT2_JOURNAL_INO;
@@ -3361,6 +3404,10 @@ no_journal:
 			       fs->super->s_mmp_update_interval);
 	}
 
+	overhead += fs->super->s_first_data_block;
+	if (!super_only)
+		fs->super->s_overhead_clusters = overhead;
+
 	if (ext2fs_has_feature_bigalloc(&fs_param))
 		fix_cluster_bg_counts(fs);
 	if (ext2fs_has_feature_quota(&fs_param))
diff --git a/resize/resize2fs.c b/resize/resize2fs.c
index 8a3d08db..2443ff67 100644
--- a/resize/resize2fs.c
+++ b/resize/resize2fs.c
@@ -703,6 +703,7 @@ errcode_t adjust_fs_info(ext2_filsys fs, ext2_filsys old_fs,
 	double		percent;
 
 	ext2fs_blocks_count_set(fs->super, new_size);
+	fs->super->s_overhead_clusters = 0;
 
 retry:
 	fs->group_desc_count = ext2fs_div64_ceil(ext2fs_blocks_count(fs->super) -
-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3 5/5] ext4: record overhead in super block
  2019-11-20  4:35 [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Li Dongyang
                   ` (2 preceding siblings ...)
  2019-11-20  4:35 ` [PATCH v3 4/5] mke2fs: set overhead in super block Li Dongyang
@ 2019-11-20  4:35 ` " Li Dongyang
  3 siblings, 0 replies; 5+ messages in thread
From: Li Dongyang @ 2019-11-20  4:35 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger

Store the overhead in super block so we don't have
to calculate again during next mount.
It also gets updated after online resizing.

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
---
 fs/ext4/resize.c | 1 +
 fs/ext4/super.c  | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c0e9aef376a7..edab58c8ff20 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1431,6 +1431,7 @@ static void ext4_update_super(struct super_block *sb,
 	 * Update the fs overhead information
 	 */
 	ext4_calculate_overhead(sb);
+	es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
 
 	if (test_opt(sb, DEBUG))
 		printk(KERN_DEBUG "EXT4-fs: added group %u:"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dd654e53ba3d..c859c67cd5db 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4467,6 +4467,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		err = ext4_calculate_overhead(sb);
 		if (err)
 			goto failed_mount_wq;
+		if (!sb_rdonly(sb)) {
+			es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
+			ext4_commit_super(sb, 1);
+		}
 	}
 
 	/*
-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-20  4:35 [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Li Dongyang
2019-11-20  4:35 ` [PATCH v3 2/5] mke2fs: fix setting bad blocks in the block bitmap Li Dongyang
2019-11-20  4:35 ` [PATCH v3 3/5] ext2fs: rename "s_overhead_blocks" to "s_overhead_clusters" Li Dongyang
2019-11-20  4:35 ` [PATCH v3 4/5] mke2fs: set overhead in super block Li Dongyang
2019-11-20  4:35 ` [PATCH v3 5/5] ext4: record " Li Dongyang

Linux-ext4 Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-ext4/0 linux-ext4/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-ext4 linux-ext4/ https://lore.kernel.org/linux-ext4 \
		linux-ext4@vger.kernel.org
	public-inbox-index linux-ext4

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-ext4


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git