linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Li Dongyang <dongyangli@ddn.com>
To: "linux-ext4@vger.kernel.org" <linux-ext4@vger.kernel.org>
Cc: "adilger@dilger.ca" <adilger@dilger.ca>
Subject: [PATCH v3 4/5] mke2fs: set overhead in super block
Date: Wed, 20 Nov 2019 04:35:27 +0000	[thread overview]
Message-ID: <20191120043448.249988-4-dongyangli@ddn.com> (raw)
In-Reply-To: <20191120043448.249988-1-dongyangli@ddn.com>

If overhead is not recorded in the super block, it is caculated
during mount in kernel, for bigalloc file systems the it takes
O(groups**2) in time.
For a 1PB deivce with 32K cluste size it takes ~12 mins to
mount, with most of the time spent on figuring out overhead.

While we can not improve the overhead algorithm in kernel
due to the nature of bigalloc, we can work out the overhead
during mke2fs and set it in the super block, avoiding calculating
it every time when it mounts.

Overhead is s_first_data_block plus internal journal blocks plus
the block and inode bitmaps, inode table, super block backups and
group descriptor blocks for every group. This patch introduces
ext2fs_count_used_clusters(), which calculates the clusters used
in the block bitmap for the given range.

When bad blocks are involved, it gets tricky because the blocks
counted as overhead and the bad blocks can end up in the same
allocation cluster. In this case we will unmark the bad blocks from
the block bitmap, convert to cluster bitmap and get the overhead,
then mark the bad blocks back in the cluster bitmap.

Reset the overhead to zero when resizing, we can not simplly count
the used blocks as overhead like we do when mke2fs. The overhead
can be calculated by kernel side during mount.

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
---
 lib/ext2fs/ext2fs.h       |  2 ++
 lib/ext2fs/gen_bitmap64.c | 35 +++++++++++++++++++++++++++++
 misc/mke2fs.c             | 47 +++++++++++++++++++++++++++++++++++++++
 resize/resize2fs.c        |  1 +
 4 files changed, 85 insertions(+)

diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index da309947..78f84632 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1442,6 +1442,8 @@ errcode_t ext2fs_set_generic_bmap_range(ext2fs_generic_bitmap bmap,
 					void *in);
 errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
 					   ext2fs_block_bitmap *bitmap);
+errcode_t ext2fs_count_used_clusters(ext2_filsys fs, blk64_t start,
+				     blk64_t end, blk64_t *out);
 
 /* get_num_dirs.c */
 extern errcode_t ext2fs_get_num_dirs(ext2_filsys fs, ext2_ino_t *ret_num_dirs);
diff --git a/lib/ext2fs/gen_bitmap64.c b/lib/ext2fs/gen_bitmap64.c
index f1dd1891..b2370667 100644
--- a/lib/ext2fs/gen_bitmap64.c
+++ b/lib/ext2fs/gen_bitmap64.c
@@ -940,3 +940,38 @@ errcode_t ext2fs_find_first_set_generic_bmap(ext2fs_generic_bitmap bitmap,
 
 	return ENOENT;
 }
+
+errcode_t ext2fs_count_used_clusters(ext2_filsys fs, blk64_t start,
+				     blk64_t end, blk64_t *out)
+{
+	blk64_t		next;
+	blk64_t		tot_set = 0;
+	errcode_t	retval;
+
+	while (start < end) {
+		retval = ext2fs_find_first_set_block_bitmap2(fs->block_map,
+							start, end, &next);
+		if (retval) {
+			if (retval == ENOENT)
+				retval = 0;
+			break;
+		}
+		start = next;
+
+		retval = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
+							start, end, &next);
+		if (retval == 0) {
+			tot_set += next - start;
+			start  = next + 1;
+		} else if (retval == ENOENT) {
+			retval = 0;
+			tot_set += end - start + 1;
+			break;
+		} else
+			break;
+	}
+
+	if (!retval)
+		*out = EXT2FS_NUM_B2C(fs, tot_set);
+	return retval;
+}
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index be38d2c4..542a3030 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -2914,6 +2914,8 @@ int main (int argc, char *argv[])
 	errcode_t	retval = 0;
 	ext2_filsys	fs;
 	badblocks_list	bb_list = 0;
+	badblocks_iterate	bb_iter;
+	blk_t		blk;
 	unsigned int	journal_blocks = 0;
 	unsigned int	i, checkinterval;
 	int		max_mnt_count;
@@ -2924,6 +2926,7 @@ int main (int argc, char *argv[])
 	char		opt_string[40];
 	char		*hash_alg_str;
 	int		itable_zeroed = 0;
+	blk64_t		overhead;
 
 #ifdef ENABLE_NLS
 	setlocale(LC_MESSAGES, "");
@@ -3215,6 +3218,23 @@ int main (int argc, char *argv[])
 	if (!quiet)
 		printf("%s", _("done                            \n"));
 
+	/*
+	 * Unmark bad blocks to calculate overhead, because metadata
+	 * blocks and bad blocks can land on the same allocation cluster.
+	 */
+	if (bb_list) {
+		retval = ext2fs_badblocks_list_iterate_begin(bb_list,
+							     &bb_iter);
+		if (retval) {
+			com_err("ext2fs_badblocks_list_iterate_begin", retval,
+				"%s", _("while unmarking bad blocks"));
+			exit(1);
+		}
+		while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
+			ext2fs_unmark_block_bitmap2(fs->block_map, blk);
+		ext2fs_badblocks_list_iterate_end(bb_iter);
+	}
+
 	retval = ext2fs_convert_subcluster_bitmap(fs, &fs->block_map);
 	if (retval) {
 		com_err(program_name, retval, "%s",
@@ -3222,6 +3242,28 @@ int main (int argc, char *argv[])
 		exit(1);
 	}
 
+	retval = ext2fs_count_used_clusters(fs, fs->super->s_first_data_block,
+					ext2fs_blocks_count(fs->super) - 1,
+					&overhead);
+	if (retval) {
+		com_err(program_name, retval, "%s",
+			_("while calculating overhead"));
+		exit(1);
+	}
+
+	if (bb_list) {
+		retval = ext2fs_badblocks_list_iterate_begin(bb_list,
+							     &bb_iter);
+		if (retval) {
+			com_err("ext2fs_badblocks_list_iterate_begin", retval,
+				"%s", _("while marking bad blocks as used"));
+			exit(1);
+		}
+		while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
+			ext2fs_mark_block_bitmap2(fs->block_map, blk);
+		ext2fs_badblocks_list_iterate_end(bb_iter);
+	}
+
 	if (super_only) {
 		check_plausibility(device_name, CHECK_FS_EXIST, NULL);
 		printf(_("%s may be further corrupted by superblock rewrite\n"),
@@ -3319,6 +3361,7 @@ int main (int argc, char *argv[])
 		free(journal_device);
 	} else if ((journal_size) ||
 		   ext2fs_has_feature_journal(&fs_param)) {
+		overhead += EXT2FS_NUM_B2C(fs, journal_blocks);
 		if (super_only) {
 			printf("%s", _("Skipping journal creation in super-only mode\n"));
 			fs->super->s_journal_inum = EXT2_JOURNAL_INO;
@@ -3361,6 +3404,10 @@ no_journal:
 			       fs->super->s_mmp_update_interval);
 	}
 
+	overhead += fs->super->s_first_data_block;
+	if (!super_only)
+		fs->super->s_overhead_clusters = overhead;
+
 	if (ext2fs_has_feature_bigalloc(&fs_param))
 		fix_cluster_bg_counts(fs);
 	if (ext2fs_has_feature_quota(&fs_param))
diff --git a/resize/resize2fs.c b/resize/resize2fs.c
index 8a3d08db..2443ff67 100644
--- a/resize/resize2fs.c
+++ b/resize/resize2fs.c
@@ -703,6 +703,7 @@ errcode_t adjust_fs_info(ext2_filsys fs, ext2_filsys old_fs,
 	double		percent;
 
 	ext2fs_blocks_count_set(fs->super, new_size);
+	fs->super->s_overhead_clusters = 0;
 
 retry:
 	fs->group_desc_count = ext2fs_div64_ceil(ext2fs_blocks_count(fs->super) -
-- 
2.24.0


  parent reply	other threads:[~2019-11-20  4:36 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-20  4:35 [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Li Dongyang
2019-11-20  4:35 ` [PATCH v3 2/5] mke2fs: fix setting bad blocks in the block bitmap Li Dongyang
2019-12-31  3:47   ` Theodore Y. Ts'o
2019-11-20  4:35 ` [PATCH v3 3/5] ext2fs: rename "s_overhead_blocks" to "s_overhead_clusters" Li Dongyang
2020-01-27  3:05   ` Theodore Y. Ts'o
2019-11-20  4:35 ` Li Dongyang [this message]
2020-01-27  4:15   ` [PATCH v3 4/5] mke2fs: set overhead in super block Theodore Y. Ts'o
2019-11-20  4:35 ` [PATCH v3 5/5] ext4: record " Li Dongyang
2020-01-27  2:56 ` [PATCH v3 1/5] libext2fs: optimize ext2fs_convert_subcluster_bitmap() Theodore Y. Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191120043448.249988-4-dongyangli@ddn.com \
    --to=dongyangli@ddn.com \
    --cc=adilger@dilger.ca \
    --cc=linux-ext4@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).