Linux-ext4 Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes
@ 2020-03-19 23:34 Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 1/7] e2fsck: make recovery.c identical with kernel Harshad Shirwadkar
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

This patch series adds e2fsck recovery path changes for fast commits.
Fast commit blocks are laid out in the reserved journal area. Each
block in fast commit region, corresponds to one inode. It contains
a header, a copy of the inode and any of the following tags or no
tags at all.

* EXT4_FC_TAG_ADD_DENTRY: add dentry to a directory
* EXT4_FC_TAG_DEL_DENTRY: remove dentry from a directory
* EXT4_FC_TAG_CREAT_DENTRY: new inode with corresponding dentry
* EXT4_FC_TAG_ADD_RANGE: add extent to inode
* EXT4_FC_TAG_DEL_RANGE: remove logical range from inode

High level new journal replay looks like this.

- For all fast commit blocks, invoke j_fc_replay() handler to handle
  blocks in SCAN phase.
  - In this phase, ext4 replay handler verifies that fast commit
    header is not malformed. If there's an error, it stops replay
    by marking error in replay state.
- Replay phase
  - In replay phase, for every fast commit block, fast commit tags
    are handled in following order.
    - Directory entry updates (Add / Remove / Create)
    - Data updates for inode in question.

Verified that all the tests pass:
367 tests succeeded     0 tests failed

New fast commit recovery test:
j_recover_fast_commit: : ok

Github: https://github.com/harshadjs/e2fsprogs/tree/fast-commit-submit

Harshad Shirwadkar(8):
 ext2fs: add fast_commit test
 e2fsck/jbd2: fast commit recovery changes
 e2fsck: main replay handler
 e2fsck/jbd2: add fast commit feature in jbd2
 ext2fs: make ext2fs_calculate_summary_stats() visible
 e2fsck: allow rewriting extents of a file
 e2fsck: make recovery.c identical with kernel
 e2fsck: fast commit recovery path e2fsck changes

 e2fsck/e2fsck.h                      |  26 ++
 e2fsck/extents.c                     | 160 +++++++-----
 e2fsck/jfs_user.h                    |   9 +
 e2fsck/journal.c                     | 489 ++++++++++++++++++++++++++++++++++-
 e2fsck/recovery.c                    | 113 +++++---
 lib/ext2fs/ext2_fs.h                 |  46 ++++
 lib/ext2fs/ext2fs.h                  |   1 +
 lib/ext2fs/initialize.c              |  58 +++++
 lib/ext2fs/jfs_compat.h              |   9 +
 lib/ext2fs/kernel-jbd.h              |   7 +-
 misc/tune2fs.c                       |  57 ----
 resize/resize2fs.c                   |   6 +-
 tests/j_recover_fast_commit/commands |   5 +
 tests/j_recover_fast_commit/expect   |  23 ++
 tests/j_recover_fast_commit/image.gz | Bin 0 -> 87787 bytes
 tests/j_recover_fast_commit/name     |   1 +
 tests/j_recover_fast_commit/script   |  25 ++
 17 files changed, 875 insertions(+), 160 deletions(-)

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/7] e2fsck: make recovery.c identical with kernel
  2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
@ 2020-03-19 23:34 ` Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 2/7] e2fsck: allow rewriting extents of a file Harshad Shirwadkar
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

Add endianness conversion macros identical to Kernel and fix differences
between Kernel's and e2fsprogs's recovery.c to make them identical.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 e2fsck/jfs_user.h |  9 +++++++++
 e2fsck/recovery.c | 46 ++++++++++++----------------------------------
 2 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/e2fsck/jfs_user.h b/e2fsck/jfs_user.h
index a97fcc18..62a3e8a8 100644
--- a/e2fsck/jfs_user.h
+++ b/e2fsck/jfs_user.h
@@ -267,4 +267,13 @@ extern int	jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
 extern int	jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
 extern void	jbd2_journal_clear_revoke(journal_t *);
 
+/* Macros for kernel compatibility */
+#define be32_to_cpu(x)		ext2fs_be32_to_cpu(x)
+#define cpu_to_be32(x)		ext2fs_cpu_to_be32(x)
+#define cpu_to_be16(x)		ext2fs_cpu_to_be16(x)
+#define le16_to_cpu(x)		ext2fs_le16_to_cpu(x)
+#define le32_to_cpu(x)		ext2fs_le32_to_cpu(x)
+
+#define pr_warn(...)
+
 #endif /* _JFS_USER_H */
diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c
index 5df690ad..4750f9c1 100644
--- a/e2fsck/recovery.c
+++ b/e2fsck/recovery.c
@@ -121,27 +121,6 @@ failed:
 
 #endif /* __KERNEL__ */
 
-static inline __u32 get_be32(__be32 *p)
-{
-	unsigned char *cp = (unsigned char *) p;
-	__u32 ret;
-
-	ret = *cp++;
-	ret = (ret << 8) + *cp++;
-	ret = (ret << 8) + *cp++;
-	ret = (ret << 8) + *cp++;
-	return ret;
-}
-
-static inline __u16 get_be16(__be16 *p)
-{
-	unsigned char *cp = (unsigned char *) p;
-	__u16 ret;
-
-	ret = *cp++;
-	ret = (ret << 8) + *cp++;
-	return ret;
-}
 
 /*
  * Read a block from the journal
@@ -201,7 +180,7 @@ static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
 	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
-	tail = (struct jbd2_journal_block_tail *)((char *)buf + j->j_blocksize -
+	tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
 			sizeof(struct jbd2_journal_block_tail));
 	provided = tail->t_checksum;
 	tail->t_checksum = 0;
@@ -232,10 +211,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 
 		nr++;
 		tagp += tag_bytes;
-		if (!(get_be16(&tag->t_flags) & JBD2_FLAG_SAME_UUID))
+		if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
-		if (get_be16(&tag->t_flags) & JBD2_FLAG_LAST_TAG)
+		if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
 			break;
 	}
 
@@ -358,9 +337,9 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 static inline unsigned long long read_tag_block(journal_t *journal,
 						journal_block_tag_t *tag)
 {
-	unsigned long long block = get_be32(&tag->t_blocknr);
+	unsigned long long block = be32_to_cpu(tag->t_blocknr);
 	if (jbd2_has_feature_64bit(journal))
-		block |= (u64)get_be32(&tag->t_blocknr_high) << 32;
+		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
 }
 
@@ -429,9 +408,9 @@ static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
 	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
 
 	if (jbd2_has_feature_csum3(j))
-		return get_be32(&tag3->t_checksum) == csum32;
-
-	return get_be16(&tag->t_checksum) == (csum32 & 0xFFFF);
+		return tag3->t_checksum == cpu_to_be32(csum32);
+	else
+		return tag->t_checksum == cpu_to_be16(csum32);
 }
 
 static int do_one_pass(journal_t *journal,
@@ -579,7 +558,7 @@ static int do_one_pass(journal_t *journal,
 				unsigned long io_block;
 
 				tag = (journal_block_tag_t *) tagp;
-				flags = get_be16(&tag->t_flags);
+				flags = be16_to_cpu(tag->t_flags);
 
 				io_block = next_log_block++;
 				wrap(journal, next_log_block);
@@ -643,9 +622,8 @@ static int do_one_pass(journal_t *journal,
 					memcpy(nbh->b_data, obh->b_data,
 							journal->j_blocksize);
 					if (flags & JBD2_FLAG_ESCAPE) {
-						__be32 magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
-						memcpy(nbh->b_data, &magic,
-						       sizeof(magic));
+						*((__be32 *)nbh->b_data) =
+						cpu_to_be32(JBD2_MAGIC_NUMBER);
 					}
 
 					BUFFER_TRACE(nbh, "marking dirty");
@@ -833,7 +811,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 {
 	jbd2_journal_revoke_header_t *header;
 	int offset, max;
-	unsigned csum_size = 0;
+	int csum_size = 0;
 	__u32 rcount;
 	int record_len = 4;
 
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/7] e2fsck: allow rewriting extents of a file
  2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 1/7] e2fsck: make recovery.c identical with kernel Harshad Shirwadkar
@ 2020-03-19 23:34 ` Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 3/7] ext2fs: make ext2fs_calculate_summary_stats() visible Harshad Shirwadkar
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

Add a new function e2fsck_rewrite_extent_tree() that replaces extent
tree for an inode. This allows fast_commit code in subsequent patches
to recreate a file as expected.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 e2fsck/e2fsck.h  |  17 +++++
 e2fsck/extents.c | 160 +++++++++++++++++++++++++++++------------------
 2 files changed, 117 insertions(+), 60 deletions(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 9b2b9ce8..68f7a249 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -526,6 +526,19 @@ void destroy_encryption_policy_map(e2fsck_t ctx);
 void destroy_encrypted_file_info(e2fsck_t ctx);
 
 /* extents.c */
+struct extent_list {
+	blk64_t blocks_freed;
+	struct ext2fs_extent *extents;
+	unsigned int count;
+	unsigned int size;
+	unsigned int ext_read;
+	errcode_t retval;
+	ext2_ino_t ino;
+};
+
+#define NUM_EXTENTS	341	/* about one ETB' worth of extents */
+
+
 errcode_t e2fsck_rebuild_extents_later(e2fsck_t ctx, ext2_ino_t ino);
 int e2fsck_ino_will_be_rebuilt(e2fsck_t ctx, ext2_ino_t ino);
 void e2fsck_pass1e(e2fsck_t ctx);
@@ -536,6 +549,10 @@ errcode_t e2fsck_should_rebuild_extents(e2fsck_t ctx,
 					struct problem_context *pctx,
 					struct extent_tree_info *eti,
 					struct ext2_extent_info *info);
+errcode_t e2fsck_read_extents(e2fsck_t ctx, struct extent_list *extents);
+errcode_t e2fsck_rewrite_extent_tree(e2fsck_t ctx,
+				     struct extent_list *extents);
+
 
 /* journal.c */
 extern errcode_t e2fsck_check_ext3_journal(e2fsck_t ctx);
diff --git a/e2fsck/extents.c b/e2fsck/extents.c
index e9139326..dc10cc8c 100644
--- a/e2fsck/extents.c
+++ b/e2fsck/extents.c
@@ -20,7 +20,6 @@
 #undef DEBUG_SUMMARY
 #undef DEBUG_FREE
 
-#define NUM_EXTENTS	341	/* about one ETB' worth of extents */
 
 static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino);
 
@@ -58,16 +57,6 @@ int e2fsck_ino_will_be_rebuilt(e2fsck_t ctx, ext2_ino_t ino)
 	return ext2fs_test_inode_bitmap2(ctx->inodes_to_rebuild, ino);
 }
 
-struct extent_list {
-	blk64_t blocks_freed;
-	struct ext2fs_extent *extents;
-	unsigned int count;
-	unsigned int size;
-	unsigned int ext_read;
-	errcode_t retval;
-	ext2_ino_t ino;
-};
-
 static errcode_t load_extents(e2fsck_t ctx, struct extent_list *list)
 {
 	ext2_filsys		fs = ctx->fs;
@@ -206,65 +195,35 @@ static int find_blocks(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
 	return 0;
 }
 
-static errcode_t rebuild_extent_tree(e2fsck_t ctx, struct extent_list *list,
-				     ext2_ino_t ino)
+errcode_t __e2fsck_rewrite_extent_tree(e2fsck_t ctx, struct extent_list *list,
+				       struct ext2_inode_large *inode)
 {
-	struct ext2_inode_large	inode;
 	errcode_t		retval;
 	ext2_extent_handle_t	handle;
 	unsigned int		i, ext_written;
 	struct ext2fs_extent	*ex, extent;
 	blk64_t			start_val, delta;
 
-	list->count = 0;
-	list->blocks_freed = 0;
-	list->ino = ino;
-	list->ext_read = 0;
-	e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode), sizeof(inode),
-			       "rebuild_extents");
-
-	/* Skip deleted inodes and inline data files */
-	if (inode.i_links_count == 0 ||
-	    inode.i_flags & EXT4_INLINE_DATA_FL)
-		return 0;
-
-	/* Collect lblk->pblk mappings */
-	if (inode.i_flags & EXT4_EXTENTS_FL) {
-		retval = load_extents(ctx, list);
-		if (retval)
-			goto err;
-		goto extents_loaded;
-	}
-
-	retval = ext2fs_block_iterate3(ctx->fs, ino, BLOCK_FLAG_READ_ONLY, 0,
-				       find_blocks, list);
-	if (retval)
-		goto err;
-	if (list->retval) {
-		retval = list->retval;
-		goto err;
-	}
-
-extents_loaded:
 	/* Reset extent tree */
-	inode.i_flags &= ~EXT4_EXTENTS_FL;
-	memset(inode.i_block, 0, sizeof(inode.i_block));
+	inode->i_flags &= ~EXT4_EXTENTS_FL;
+	memset(inode->i_block, 0, sizeof(inode->i_block));
 
 	/* Make a note of freed blocks */
-	quota_data_sub(ctx->qctx, &inode, ino,
+	quota_data_sub(ctx->qctx, inode, list->ino,
 		       list->blocks_freed * ctx->fs->blocksize);
-	retval = ext2fs_iblk_sub_blocks(ctx->fs, EXT2_INODE(&inode),
+	retval = ext2fs_iblk_sub_blocks(ctx->fs, EXT2_INODE(inode),
 					list->blocks_freed);
 	if (retval)
-		goto err;
+		return retval;
 
 	/* Now stuff extents into the file */
-	retval = ext2fs_extent_open2(ctx->fs, ino, EXT2_INODE(&inode), &handle);
+	retval = ext2fs_extent_open2(ctx->fs, list->ino, EXT2_INODE(inode),
+					&handle);
 	if (retval)
-		goto err;
+		return retval;
 
 	ext_written = 0;
-	start_val = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(&inode));
+	start_val = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(inode));
 	for (i = 0, ex = list->extents; i < list->count; i++, ex++) {
 		memcpy(&extent, ex, sizeof(struct ext2fs_extent));
 		extent.e_flags &= EXT2_EXTENT_FLAGS_UNINIT;
@@ -289,36 +248,117 @@ extents_loaded:
 		}
 
 #ifdef DEBUG
-		printf("W: ino=%d pblk=%llu lblk=%llu len=%u\n", ino,
+		printf("W: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino,
 				extent.e_pblk, extent.e_lblk, extent.e_len);
 #endif
 		retval = ext2fs_extent_insert(handle, EXT2_EXTENT_INSERT_AFTER,
 					      &extent);
 		if (retval)
-			goto err2;
+			goto err;
 		retval = ext2fs_extent_fix_parents(handle);
 		if (retval)
-			goto err2;
+			goto err;
 		ext_written++;
 	}
 
-	delta = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(&inode)) -
+	delta = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(inode)) -
 		start_val;
 	if (delta)
-		quota_data_add(ctx->qctx, &inode, ino, delta << 9);
+		quota_data_add(ctx->qctx, inode, list->ino, delta << 9);
 
 #if defined(DEBUG) || defined(DEBUG_SUMMARY)
 	printf("rebuild: ino=%d extents=%d->%d\n", ino, list->ext_read,
 	       ext_written);
 #endif
-	e2fsck_write_inode(ctx, ino, EXT2_INODE(&inode), "rebuild_extents");
+	e2fsck_write_inode(ctx, list->ino, EXT2_INODE(inode),
+				"rebuild_extents");
 
-err2:
-	ext2fs_extent_free(handle);
 err:
+	ext2fs_extent_free(handle);
 	return retval;
 }
 
+errcode_t e2fsck_rewrite_extent_tree(e2fsck_t ctx, struct extent_list *list)
+{
+	struct ext2_inode_large inode;
+	int i;
+
+	e2fsck_read_inode_full(ctx, list->ino, EXT2_INODE(&inode),
+				sizeof(inode), "e2fsck_rewrite_extent_tree");
+
+	/* Skip deleted inodes and inline data files */
+	if (inode.i_links_count == 0 ||
+	    inode.i_flags & EXT4_INLINE_DATA_FL)
+		return 0;
+
+	return __e2fsck_rewrite_extent_tree(ctx, list, &inode);
+}
+
+errcode_t e2fsck_read_extents(e2fsck_t ctx, struct extent_list *extents)
+{
+	struct ext2_inode_large	inode;
+	errcode_t		retval;
+
+	extents->extents = NULL;
+	extents->count = 0;
+	extents->blocks_freed = 0;
+	extents->ext_read = 0;
+	extents->size = NUM_EXTENTS;
+	retval = ext2fs_get_array(NUM_EXTENTS, sizeof(struct ext2fs_extent),
+				  &extents->extents);
+	if (retval)
+		return -ENOMEM;
+
+	e2fsck_read_inode_full(ctx, extents->ino, EXT2_INODE(&inode),
+				sizeof(inode), "read_extents");
+
+	/* Skip deleted inodes and inline data files */
+	if (inode.i_links_count == 0 || inode.i_flags & EXT4_INLINE_DATA_FL)
+		return 0;
+
+	if (!inode.i_flags & EXT4_EXTENTS_FL)
+		return 0;
+	retval = load_extents(ctx, extents);
+	if (retval) {
+		ext2fs_free_mem(&extents->extents);
+		return retval;
+	}
+	return 0;
+}
+
+static errcode_t rebuild_extent_tree(e2fsck_t ctx, struct extent_list *list,
+				     ext2_ino_t ino)
+{
+	struct ext2_inode_large	inode;
+	errcode_t		retval;
+
+	list->count = 0;
+	list->blocks_freed = 0;
+	list->ino = ino;
+	list->ext_read = 0;
+	e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode), sizeof(inode),
+			       "rebuild_extents");
+
+	/* Skip deleted inodes and inline data files */
+	if (inode.i_links_count == 0 ||
+	    inode.i_flags & EXT4_INLINE_DATA_FL)
+		return 0;
+
+	/* Collect lblk->pblk mappings */
+	if (inode.i_flags & EXT4_EXTENTS_FL) {
+		retval = load_extents(ctx, list);
+		if (retval)
+			return retval;
+		return __e2fsck_rewrite_extent_tree(ctx, list, &inode);
+	}
+
+	retval = ext2fs_block_iterate3(ctx->fs, ino, BLOCK_FLAG_READ_ONLY, 0,
+				       find_blocks, list);
+
+	return retval || list->retval ||
+		__e2fsck_rewrite_extent_tree(ctx, list, &inode);
+}
+
 /* Rebuild the extents immediately */
 static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino)
 {
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/7] ext2fs: make ext2fs_calculate_summary_stats() visible
  2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 1/7] e2fsck: make recovery.c identical with kernel Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 2/7] e2fsck: allow rewriting extents of a file Harshad Shirwadkar
@ 2020-03-19 23:34 ` Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 4/7] e2fsck/jbd2: add fast commit feature in jbd2 Harshad Shirwadkar
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

This function allows fast_commit code to recalculate summaries. Make
it visible and move it to lib.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 lib/ext2fs/ext2fs.h     |  1 +
 lib/ext2fs/initialize.c | 58 +++++++++++++++++++++++++++++++++++++++++
 misc/tune2fs.c          | 57 ----------------------------------------
 resize/resize2fs.c      |  6 ++---
 4 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index c9499839..833b6ee7 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1484,6 +1484,7 @@ errcode_t ext2fs_write_ind_block(ext2_filsys fs, blk_t blk, void *buf);
 extern errcode_t ext2fs_initialize(const char *name, int flags,
 				   struct ext2_super_block *param,
 				   io_manager manager, ext2_filsys *ret_fs);
+extern errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs);
 
 /* icount.c */
 extern void ext2fs_free_icount(ext2_icount_t icount);
diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c
index 96ec1cff..193aef80 100644
--- a/lib/ext2fs/initialize.c
+++ b/lib/ext2fs/initialize.c
@@ -575,3 +575,61 @@ cleanup:
 	ext2fs_free(fs);
 	return retval;
 }
+
+errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs)
+{
+	blk64_t		blk;
+	ext2_ino_t	ino;
+	unsigned int	group = 0;
+	unsigned int	count = 0;
+	int		total_free = 0;
+	int		group_free = 0;
+
+	/*
+	 * First calculate the block statistics
+	 */
+	for (blk = fs->super->s_first_data_block;
+	     blk < ext2fs_blocks_count(fs->super); blk++) {
+		if (!ext2fs_fast_test_block_bitmap2(fs->block_map, blk)) {
+			group_free++;
+			total_free++;
+		}
+		count++;
+		if ((count == fs->super->s_blocks_per_group) ||
+		    (blk == ext2fs_blocks_count(fs->super)-1)) {
+			ext2fs_bg_free_blocks_count_set(fs, group++,
+							group_free);
+			count = 0;
+			group_free = 0;
+		}
+	}
+	total_free = EXT2FS_C2B(fs, total_free);
+	ext2fs_free_blocks_count_set(fs->super, total_free);
+
+	/*
+	 * Next, calculate the inode statistics
+	 */
+	group_free = 0;
+	total_free = 0;
+	count = 0;
+	group = 0;
+
+	/* Protect loop from wrap-around if s_inodes_count maxed */
+	for (ino = 1; ino <= fs->super->s_inodes_count && ino > 0; ino++) {
+		if (!ext2fs_fast_test_inode_bitmap2(fs->inode_map, ino)) {
+			group_free++;
+			total_free++;
+		}
+		count++;
+		if ((count == fs->super->s_inodes_per_group) ||
+		    (ino == fs->super->s_inodes_count)) {
+			ext2fs_bg_free_inodes_count_set(fs, group++,
+							group_free);
+			count = 0;
+			group_free = 0;
+		}
+	}
+	fs->super->s_free_inodes_count = total_free;
+	ext2fs_mark_super_dirty(fs);
+	return 0;
+}
diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index f7629952..541be2d7 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -2552,63 +2552,6 @@ err_out:
 	return retval;
 }
 
-static errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs)
-{
-	blk64_t		blk;
-	ext2_ino_t	ino;
-	unsigned int	group = 0;
-	unsigned int	count = 0;
-	int		total_free = 0;
-	int		group_free = 0;
-
-	/*
-	 * First calculate the block statistics
-	 */
-	for (blk = fs->super->s_first_data_block;
-	     blk < ext2fs_blocks_count(fs->super); blk++) {
-		if (!ext2fs_fast_test_block_bitmap2(fs->block_map, blk)) {
-			group_free++;
-			total_free++;
-		}
-		count++;
-		if ((count == fs->super->s_blocks_per_group) ||
-		    (blk == ext2fs_blocks_count(fs->super)-1)) {
-			ext2fs_bg_free_blocks_count_set(fs, group++,
-							group_free);
-			count = 0;
-			group_free = 0;
-		}
-	}
-	total_free = EXT2FS_C2B(fs, total_free);
-	ext2fs_free_blocks_count_set(fs->super, total_free);
-
-	/*
-	 * Next, calculate the inode statistics
-	 */
-	group_free = 0;
-	total_free = 0;
-	count = 0;
-	group = 0;
-
-	/* Protect loop from wrap-around if s_inodes_count maxed */
-	for (ino = 1; ino <= fs->super->s_inodes_count && ino > 0; ino++) {
-		if (!ext2fs_fast_test_inode_bitmap2(fs->inode_map, ino)) {
-			group_free++;
-			total_free++;
-		}
-		count++;
-		if ((count == fs->super->s_inodes_per_group) ||
-		    (ino == fs->super->s_inodes_count)) {
-			ext2fs_bg_free_inodes_count_set(fs, group++,
-							group_free);
-			count = 0;
-			group_free = 0;
-		}
-	}
-	fs->super->s_free_inodes_count = total_free;
-	ext2fs_mark_super_dirty(fs);
-	return 0;
-}
 
 #define list_for_each_safe(pos, pnext, head) \
 	for (pos = (head)->next, pnext = pos->next; pos != (head); \
diff --git a/resize/resize2fs.c b/resize/resize2fs.c
index 2443ff67..270e4deb 100644
--- a/resize/resize2fs.c
+++ b/resize/resize2fs.c
@@ -49,7 +49,7 @@ static errcode_t inode_scan_and_fix(ext2_resize_t rfs);
 static errcode_t inode_ref_fix(ext2_resize_t rfs);
 static errcode_t move_itables(ext2_resize_t rfs);
 static errcode_t fix_resize_inode(ext2_filsys fs);
-static errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs);
+static errcode_t resize2fs_calculate_summary_stats(ext2_filsys fs);
 static errcode_t fix_sb_journal_backup(ext2_filsys fs);
 static errcode_t mark_table_blocks(ext2_filsys fs,
 				   ext2fs_block_bitmap bmap);
@@ -211,7 +211,7 @@ errcode_t resize_fs(ext2_filsys fs, blk64_t *new_size, int flags,
 		goto errout;
 
 	init_resource_track(&rtrack, "calculate_summary_stats", fs->io);
-	retval = ext2fs_calculate_summary_stats(rfs->new_fs);
+	retval = resize2fs_calculate_summary_stats(rfs->new_fs);
 	if (retval)
 		goto errout;
 	print_resource_track(rfs, &rtrack, fs->io);
@@ -2740,7 +2740,7 @@ errout:
 /*
  * Finally, recalculate the summary information
  */
-static errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs)
+static errcode_t resize2fs_calculate_summary_stats(ext2_filsys fs)
 {
 	blk64_t		blk;
 	ext2_ino_t	ino;
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 4/7] e2fsck/jbd2: add fast commit feature in jbd2
  2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
                   ` (2 preceding siblings ...)
  2020-03-19 23:34 ` [PATCH 3/7] ext2fs: make ext2fs_calculate_summary_stats() visible Harshad Shirwadkar
@ 2020-03-19 23:34 ` Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 5/7] e2fsck/jbd2: fast commit recovery changes Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 6/7] e2fsck: main fast commit replay handler Harshad Shirwadkar
  5 siblings, 0 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

Add fast_commit feature flag in jbd2. These changes are present in
kernel code too.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 lib/ext2fs/kernel-jbd.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h
index cb1bc308..1250f5f0 100644
--- a/lib/ext2fs/kernel-jbd.h
+++ b/lib/ext2fs/kernel-jbd.h
@@ -259,6 +259,7 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004
 #define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
 #define JBD2_FEATURE_INCOMPAT_CSUM_V3		0x00000010
+#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT	0x00000020
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	0
@@ -267,7 +268,8 @@ typedef struct journal_superblock_s
 					 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT| \
 					 JBD2_FEATURE_INCOMPAT_64BIT|\
 					 JBD2_FEATURE_INCOMPAT_CSUM_V2|	\
-					 JBD2_FEATURE_INCOMPAT_CSUM_V3)
+					 JBD2_FEATURE_INCOMPAT_CSUM_V3 | \
+					 JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
 
 #ifdef NO_INLINE_FUNCS
 extern size_t journal_tag_bytes(journal_t *journal);
@@ -384,6 +386,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(64bit,		64BIT)
 JBD2_FEATURE_INCOMPAT_FUNCS(async_commit,	ASYNC_COMMIT)
 JBD2_FEATURE_INCOMPAT_FUNCS(csum2,		CSUM_V2)
 JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
+JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,	FAST_COMMIT)
 
 #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS))
 /*
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 5/7] e2fsck/jbd2: fast commit recovery changes
  2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
                   ` (3 preceding siblings ...)
  2020-03-19 23:34 ` [PATCH 4/7] e2fsck/jbd2: add fast commit feature in jbd2 Harshad Shirwadkar
@ 2020-03-19 23:34 ` Harshad Shirwadkar
  2020-03-19 23:34 ` [PATCH 6/7] e2fsck: main fast commit replay handler Harshad Shirwadkar
  5 siblings, 0 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

Backport changes from kernel/fs/jbd2 related to fast commit recovery
path. This just allows jbd2 machinery to call a file-system specific
replay handler.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 e2fsck/recovery.c       | 67 ++++++++++++++++++++++++++++++++++++++---
 lib/ext2fs/jfs_compat.h |  9 ++++++
 lib/ext2fs/kernel-jbd.h |  2 ++
 3 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c
index 4750f9c1..18ad79d6 100644
--- a/e2fsck/recovery.c
+++ b/e2fsck/recovery.c
@@ -35,7 +35,6 @@ struct recovery_info
 	int		nr_revoke_hits;
 };
 
-enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
 static int do_one_pass(journal_t *journal,
 				struct recovery_info *info, enum passtype pass);
 static int scan_revoke_records(journal_t *, struct buffer_head *,
@@ -225,10 +224,63 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 /* Make sure we wrap around the log correctly! */
 #define wrap(journal, var)						\
 do {									\
-	if (var >= (journal)->j_last)					\
-		var -= ((journal)->j_last - (journal)->j_first);	\
+	unsigned long _wrap_last =					\
+		jbd2_has_feature_fast_commit(journal) ?			\
+			(journal)->j_last_fc : (journal)->j_last;	\
+									\
+	if (var >= _wrap_last)						\
+		var -= (_wrap_last - (journal)->j_first);		\
 } while (0)
 
+static int fc_do_one_pass(journal_t *journal,
+			  struct recovery_info *info, enum passtype pass)
+{
+	unsigned int expected_commit_id = info->end_transaction;
+	unsigned long next_fc_block;
+	struct buffer_head *bh;
+	unsigned int seq;
+	journal_header_t *jhdr;
+	int err = 0;
+
+	next_fc_block = journal->j_first_fc;
+
+	while (next_fc_block <= journal->j_last_fc) {
+		jbd_debug(3, "Fast commit replay: next block %ld",
+			  next_fc_block);
+		err = jread(&bh, journal, next_fc_block);
+		if (err) {
+			jbd_debug(3, "Fast commit replay: read error");
+			break;
+		}
+
+		jhdr = (journal_header_t *)bh->b_data;
+		seq = be32_to_cpu(jhdr->h_sequence);
+		if (be32_to_cpu(jhdr->h_magic) != JBD2_MAGIC_NUMBER ||
+		    seq != expected_commit_id) {
+			jbd_debug(3, "Fast commit replay: magic / commitid error [%d / %d / %d]\n",
+				  be32_to_cpu(jhdr->h_magic), seq,
+				  expected_commit_id);
+			break;
+		}
+		jbd_debug(3, "Processing fast commit blk with seq %d",
+			  seq);
+		if (journal->j_fc_replay_callback) {
+			err = journal->j_fc_replay_callback(
+						journal, bh, pass,
+						next_fc_block -
+						journal->j_first_fc);
+			if (err)
+				break;
+		}
+		next_fc_block++;
+	}
+
+	if (err)
+		jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
+
+	return err;
+}
+
 /**
  * jbd2_journal_recover - recovers a on-disk journal
  * @journal: the journal to recover
@@ -470,7 +522,7 @@ static int do_one_pass(journal_t *journal,
 				break;
 
 		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
-			  next_commit_ID, next_log_block, journal->j_last);
+			  next_commit_ID, next_log_block, journal->j_last_fc);
 
 		/* Skip over each chunk of the transaction looking
 		 * either the next descriptor block or the final commit
@@ -765,6 +817,9 @@ static int do_one_pass(journal_t *journal,
 			if (err)
 				goto failed;
 			continue;
+		case JBD2_FC_BLOCK:
+			pr_warn("Unexpectedly found fast commit block.\n");
+			continue;
 
 		default:
 			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
@@ -796,6 +851,10 @@ static int do_one_pass(journal_t *journal,
 				success = -EIO;
 		}
 	}
+
+	if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE)
+		success = fc_do_one_pass(journal, info, pass);
+
 	if (block_error && success == 0)
 		success = -EIO;
 	return success;
diff --git a/lib/ext2fs/jfs_compat.h b/lib/ext2fs/jfs_compat.h
index 2bda521d..20c0785f 100644
--- a/lib/ext2fs/jfs_compat.h
+++ b/lib/ext2fs/jfs_compat.h
@@ -64,6 +64,8 @@ static inline __u32 jbd2_chksum(journal_t *j EXT2FS_ATTR((unused)),
 #define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
 #define pr_emerg(fmt)
 
+enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
+
 struct journal_s
 {
 	unsigned long		j_flags;
@@ -73,6 +75,9 @@ struct journal_s
 	int			j_format_version;
 	unsigned long		j_head;
 	unsigned long		j_tail;
+	unsigned long		j_first_fc;
+	unsigned long		j_fc_off;
+	unsigned long		j_last_fc;
 	unsigned long		j_free;
 	unsigned long		j_first, j_last;
 	kdev_t			j_dev;
@@ -88,6 +93,10 @@ struct journal_s
 	struct jbd2_revoke_table_s *j_revoke_table[2];
 	tid_t			j_failed_commit;
 	__u32			j_csum_seed;
+	int (*j_fc_replay_callback)(struct journal_s *journal,
+				    struct buffer_head *bh,
+				    enum passtype pass, int off);
+
 };
 
 #define is_journal_abort(x) 0
diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h
index 1250f5f0..2dcc5bcc 100644
--- a/lib/ext2fs/kernel-jbd.h
+++ b/lib/ext2fs/kernel-jbd.h
@@ -74,6 +74,7 @@ extern void * __jbd_kmalloc (char *where, size_t size, int flags, int retry);
 	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
+#define JBD2_FAST_COMMIT_BLOCKS 128
 
 /*
  * Internal structures used by the logging mechanism:
@@ -94,6 +95,7 @@ extern void * __jbd_kmalloc (char *where, size_t size, int flags, int retry);
 #define JBD2_SUPERBLOCK_V1	3
 #define JBD2_SUPERBLOCK_V2	4
 #define JBD2_REVOKE_BLOCK	5
+#define JBD2_FC_BLOCK		6
 
 /*
  * Standard header for all descriptor blocks:
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 6/7] e2fsck: main fast commit replay handler
  2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
                   ` (4 preceding siblings ...)
  2020-03-19 23:34 ` [PATCH 5/7] e2fsck/jbd2: fast commit recovery changes Harshad Shirwadkar
@ 2020-03-19 23:34 ` Harshad Shirwadkar
  5 siblings, 0 replies; 7+ messages in thread
From: Harshad Shirwadkar @ 2020-03-19 23:34 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

Add main ext4 fast commit replay handler that handles replayed fast
commit blocks.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 e2fsck/e2fsck.h      |   9 +
 e2fsck/journal.c     | 491 ++++++++++++++++++++++++++++++++++++++++++-
 lib/ext2fs/ext2_fs.h |  46 ++++
 3 files changed, 545 insertions(+), 1 deletion(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 68f7a249..8ea87ac5 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -226,6 +226,12 @@ typedef struct e2fsck_struct *e2fsck_t;
 
 #define MAX_EXTENT_DEPTH_COUNT 5
 
+struct e2fsck_fc_replay_state {
+	int fc_replay_error;
+	int fc_replay_expected_off;
+	int fc_num_blks;
+};
+
 struct e2fsck_struct {
 	ext2_filsys fs;
 	const char *program_name;
@@ -418,6 +424,9 @@ struct e2fsck_struct {
 
 	/* Undo file */
 	char *undo_file;
+
+	/* Fast commit replay stuff */
+	struct e2fsck_fc_replay_state fc_replay_state;
 };
 
 /* Data structures to evaluate whether an extent tree needs rebuilding. */
diff --git a/e2fsck/journal.c b/e2fsck/journal.c
index 7d9f1b40..97fb3c24 100644
--- a/e2fsck/journal.c
+++ b/e2fsck/journal.c
@@ -278,6 +278,485 @@ static int process_journal_block(ext2_filsys fs,
 	return 0;
 }
 
+static int ext4_journal_fc_replay_scan(journal_t *j, struct buffer_head *bh,
+				       int off)
+{
+	e2fsck_t ctx = j->j_fs_dev->k_ctx;
+	struct e2fsck_fc_replay_state *state;
+	struct ext4_fc_commit_hdr *fc_hdr;
+	struct ext4_fc_tl *tl;
+	__u32 csum, old_csum;
+	__u8 *start, *end;
+
+	state = &ctx->fc_replay_state;
+	fc_hdr = (struct ext4_fc_commit_hdr *)
+		  ((__u8 *)bh->b_data + sizeof(journal_header_t));
+
+	start = (__u8 *)fc_hdr;
+	end = (__u8 *)bh->b_data + j->j_blocksize;
+
+	/* Check if we already concluded that this fast commit is not useful */
+	if (state->fc_replay_expected_off && state->fc_replay_error)
+		goto out_err;
+
+	if (le32_to_cpu(fc_hdr->fc_magic) != EXT4_FC_MAGIC) {
+		state->fc_replay_error = -EXT2_ET_BAD_MAGIC;
+		goto out_err;
+	}
+
+	if (off != state->fc_replay_expected_off) {
+		state->fc_replay_error = -EXT2_ET_CORRUPT_JOURNAL_SB;
+		goto out_err;
+	}
+
+	state->fc_replay_expected_off++;
+
+	if (le16_to_cpu(fc_hdr->fc_features)) {
+		state->fc_replay_error = -EXT2_ET_OP_NOT_SUPPORTED;
+		goto out_err;
+	}
+
+	old_csum = fc_hdr->fc_csum;
+	fc_hdr->fc_csum = 0;
+	csum = jbd2_chksum(j, 0, start, end - start);
+	fc_hdr->fc_csum = old_csum;
+
+	if (csum != le32_to_cpu(fc_hdr->fc_csum)) {
+		state->fc_replay_error = -EXT2_ET_BAD_CRC;
+		goto out_err;
+	}
+	state->fc_num_blks++;
+	return 0;
+
+out_err:
+	return state->fc_replay_error;
+}
+
+/* Get length of a particular tlv */
+static int fc_tag_len(struct ext4_fc_tl *tl)
+{
+	return le16_to_cpu(tl->fc_len);
+}
+
+/* Get a pointer to "value" of a tlv */
+static __u8 *fc_tag_val(struct ext4_fc_tl *tl)
+{
+	return (__u8 *)tl + sizeof(*tl);
+}
+
+static int ext4_fc_handle_unlink(ext2_filsys fs, int parent_ino,
+				 const char *dname, int ino)
+{
+	struct ext2_inode inode;
+	int ret;
+
+	ret = ext2fs_unlink(fs, parent_ino, dname, ino, 0);
+	if (ret)
+		return ret;
+
+	ret = ext2fs_read_inode(fs, ino, &inode);
+	if (ret)
+		return ret;
+
+	if (inode.i_links_count > 1) {
+		inode.i_links_count--;
+		ret = ext2fs_write_inode(fs, ino, &inode);
+		if (ret)
+			return ret;
+	} else {
+		memset(&inode, 0, sizeof(inode));
+		ext2fs_write_inode(fs, ino, &inode);
+		ext2fs_unmark_inode_bitmap2(fs->inode_map, ino);
+		ext2fs_mark_ib_dirty(fs);
+	}
+
+	return 0;
+}
+
+static inline int get_fc_hdr_inode_len(ext2_filsys fs,
+				       struct ext4_fc_commit_hdr *fc_hdr)
+{
+	int inode_len = EXT2_GOOD_OLD_INODE_SIZE;
+
+	if (EXT2_INODE_SIZE(fs->super)
+			> EXT2_GOOD_OLD_INODE_SIZE)
+		inode_len +=
+			ext2fs_le16_to_cpu(((struct ext2_inode_large *)
+				(fc_hdr + 1))->i_extra_isize);
+	return inode_len;
+}
+
+static inline struct ext4_fc_tl *get_first_tl(ext2_filsys fs,
+					      struct ext4_fc_commit_hdr *fc_hdr)
+{
+	return (struct ext4_fc_tl *)((__u8 *)fc_hdr +
+				   sizeof(struct ext4_fc_commit_hdr) +
+				   get_fc_hdr_inode_len(fs, fc_hdr));
+}
+
+static inline struct ext4_fc_tl *get_next_tl(struct ext4_fc_tl *tl)
+{
+	return (struct ext4_fc_tl *)((__u8 *)tl +
+					le16_to_cpu(tl->fc_len) +
+					sizeof(*tl));
+}
+
+static inline int num_tls(struct ext4_fc_commit_hdr *fc_hdr)
+{
+	return le16_to_cpu(fc_hdr->fc_num_tlvs);
+}
+
+static int fc_replay_dentries(journal_t *j,
+			struct ext4_fc_commit_hdr *fc_hdr)
+{
+	int inode_len, ret, i;
+	struct ext4_fc_dentry_info *fcd;
+	ext2_filsys fs = j->j_fs_dev->k_ctx->fs;
+	struct ext2_inode *inode;
+	struct ext4_fc_tl *tl;
+	int parent_ino, ino;
+	char *dname;
+
+	inode_len = get_fc_hdr_inode_len(fs, fc_hdr);
+	tl = get_first_tl(fs, fc_hdr);
+	for (i = 0; i < le16_to_cpu(fc_hdr->fc_num_tlvs); i++) {
+		fcd = (struct ext4_fc_dentry_info *)fc_tag_val(tl);
+
+		parent_ino = le32_to_cpu(fcd->fc_parent_ino);
+		ino = le32_to_cpu(fcd->fc_ino);
+		dname = strndup(fcd->fc_dname, fc_tag_len(tl) -
+				sizeof(struct ext4_fc_dentry_info));
+		if (le16_to_cpu(tl->fc_tag) == EXT4_FC_TAG_ADD_DENTRY) {
+			ret = ext2fs_link(fs, parent_ino, dname, ino,
+					  EXT2_FT_REG_FILE);
+			ext2fs_free_mem(&dname);
+			if (ret)
+				return ret;
+			ext2fs_mark_inode_bitmap2(
+				fs->inode_map, ino);
+			ext2fs_mark_ib_dirty(fs);
+		} else if (le16_to_cpu(tl->fc_tag) == EXT4_FC_TAG_DEL_DENTRY) {
+			ret = ext4_fc_handle_unlink(fs, parent_ino, dname, ino);
+			ext2fs_free_mem(&dname);
+			if (ret)
+				return ret;
+		} else if (le16_to_cpu(tl->fc_tag) ==
+				EXT4_FC_TAG_CREAT_DENTRY) {
+			ext2fs_mark_inode_bitmap2(fs->inode_map, ino);
+			ret = ext2fs_link(fs, parent_ino, dname, ino,
+					  EXT2_FT_REG_FILE);
+			if (ret) {
+				ext2fs_free_mem(&dname);
+				return ret;
+			}
+			ext2fs_free_mem(&dname);
+
+			ret = ext2fs_get_mem(inode_len, &inode);
+			if (ret)
+				return ret;
+			ret = ext2fs_read_inode_full(fs, ino, inode, inode_len);
+			if (ret) {
+				ext2fs_free_mem(&inode);
+				return ret;
+			}
+			memcpy(inode, (struct ext2_inode *)(fc_hdr + 1),
+				inode_len);
+			ret = ext2fs_write_inode_full(fs, ino, inode,
+						      inode_len);
+			if (ret) {
+				ext2fs_free_mem(&inode);
+				return ret;
+			}
+			ext2fs_free_mem(&inode);
+			ext2fs_mark_ib_dirty(fs);
+		}
+		tl = get_next_tl(tl);
+	}
+	return 0;
+}
+
+static int ext2fs_add_extent_to_list(struct extent_list *list,
+					struct ext2fs_extent *ex)
+{
+	int ret;
+
+	if (list->count == list->size) {
+		unsigned int new_size = (list->size + NUM_EXTENTS) *
+					sizeof(struct ext2fs_extent);
+		ret = ext2fs_resize_mem(0, new_size, &list->extents);
+		if (ret)
+			return ret;
+		list->size += NUM_EXTENTS;
+	}
+
+	memcpy(list->extents + list->count, ex, sizeof(*ex));
+	list->count++;
+	return 0;
+}
+
+static int ext2fs_del_extent_from_list(struct extent_list *list,
+				       struct ext2fs_extent *del)
+{
+	struct ext2fs_extent extent;
+	int ret, i, j, del_start, del_end, iter_start, iter_end;
+
+	i = 0;
+	del_start = del->e_lblk;
+	del_end = del->e_lblk + del->e_len - 1;
+
+	while (i < list->count) {
+		iter_start = list->extents[i].e_lblk;
+		iter_end = list->extents[i].e_lblk + list->extents[i].e_len - 1;
+
+		if (del_start > iter_end || del_end < iter_start) {
+			i++;
+			continue;
+		} else if (del_start <= iter_start && del_end >= iter_end) {
+			iter_start = iter_end + 1;
+		} else if (iter_start <= del_start && del_end <= iter_end) {
+			extent.e_lblk = del_end + 1;
+			extent.e_len = iter_end - del_end;
+			extent.e_pblk = list->extents[i].e_pblk +
+					extent.e_lblk - iter_start;
+			extent.e_flags =  list->extents[i].e_flags;
+			ret = ext2fs_add_extent_to_list(list, &extent);
+			if (ret)
+				return ret;
+			iter_end = del_start - 1;
+		} else if (del_start >= iter_start && del_start <= iter_end) {
+			iter_end = del_start - 1;
+		} else if (del_end >= iter_start && del_end <= iter_end) {
+			iter_start = del_end + 1;
+		} else {
+			/* Should not come here */
+			exit(FSCK_ERROR);
+		}
+
+		if (iter_start > iter_end) {
+			/*
+			 * If this removal resulted in iter being of zero
+			 * length, remove it right away, and start the next
+			 * iteration at current index.
+			 */
+			for (j = i; j < list->count - 1; j++)
+				list->extents[j] = list->extents[j + 1];
+			list->count--;
+		} else {
+			list->extents[i].e_lblk = iter_start;
+			list->extents[i].e_len = iter_end - iter_start + 1;
+			i++;
+		}
+	}
+
+	return 0;
+}
+
+static void ext3_to_ext2fs_extent(struct ext2fs_extent *to,
+				  struct ext3_extent *from)
+{
+	to->e_pblk = ext2fs_le32_to_cpu(from->ee_start) +
+		((__u64) ext2fs_le16_to_cpu(from->ee_start_hi)
+			<< 32);
+	to->e_lblk = ext2fs_le32_to_cpu(from->ee_block);
+	to->e_len = ext2fs_le16_to_cpu(from->ee_len);
+	to->e_flags |= EXT2_EXTENT_FLAGS_LEAF;
+	if (to->e_len > EXT_INIT_MAX_LEN) {
+		to->e_len -= EXT_INIT_MAX_LEN;
+		to->e_flags |= EXT2_EXTENT_FLAGS_UNINIT;
+	}
+}
+
+static int ex_compar(const void *arg1, const void *arg2)
+{
+	struct ext2fs_extent *ex1 = (struct ext2fs_extent *)arg1;
+	struct ext2fs_extent *ex2 = (struct ext2fs_extent *)arg2;
+
+	if (ex1->e_lblk < ex2->e_lblk)
+		return -1;
+	if (ex1->e_lblk > ex2->e_lblk)
+		return 1;
+	return ex1->e_len - ex2->e_len;
+}
+
+static void sort_and_merge_extents(struct extent_list *list)
+{
+	struct ext2fs_extent *iter;
+	blk64_t ex_end;
+	int i, j;
+
+	if (list->count < 2)
+		return;
+
+	qsort(list->extents, list->count, sizeof(list->extents[0]),
+		ex_compar);
+
+	i = 0;
+	while (i < list->count - 1) {
+		if (list->extents[i].e_lblk + list->extents[i].e_len - 1 <
+			list->extents[i + 1].e_lblk) {
+			i++;
+			continue;
+		}
+		ex_end = MAX(list->extents[i].e_lblk + list->extents[i].e_len,
+			     list->extents[i + 1].e_lblk +
+			     list->extents[i + 1].e_len) - 1;
+		list->extents[i].e_len = ex_end - list->extents[i].e_lblk + 1;
+		for (j = i + 1; j < list->count - 1; j++)
+			list->extents[j] = list->extents[j + 1];
+		list->count--;
+	}
+}
+
+static void mark_blocks_used(ext2_filsys fs, blk64_t pblk, int count)
+{
+	int i = 0;
+
+	for (i = 0; i < count; i++) {
+		if (ext2fs_test_block_bitmap2(fs->block_map, pblk + i))
+			continue;
+		ext2fs_mark_block_bitmap2(fs->block_map, pblk + i);
+	}
+}
+
+static void mark_blocks_free(ext2_filsys fs, blk64_t pblk, int count)
+{
+	int i = 0;
+
+	for (i = 0; i < count; i++) {
+		if (!ext2fs_test_block_bitmap2(fs->block_map, pblk + i))
+			continue;
+		ext2fs_unmark_block_bitmap2(fs->block_map, pblk + i);
+	}
+}
+
+static int ext4_journal_fc_replay_cb(journal_t *journal, struct buffer_head *bh,
+				     enum passtype pass, int off)
+{
+	struct ext4_fc_commit_hdr *fc_hdr;
+	struct ext4_fc_tl *tl;
+	struct ext3_extent *ex;
+	ext2_extent_handle_t handle = 0;
+	int i, j, ret, ino, num_extents;
+	struct ext2_inode *inode;
+	e2fsck_t ctx = journal->j_fs_dev->k_ctx;
+	struct ext2fs_extent extent;
+	struct extent_list extent_list = {0};
+	struct ext4_fc_lrange *lrange;
+	int inode_len;
+	blk64_t pblk;
+
+	if (pass == PASS_SCAN)
+		return ext4_journal_fc_replay_scan(journal, bh, off);
+	else if (pass != PASS_REPLAY)
+		return 0;
+	ctx->fc_replay_state.fc_num_blks--;
+
+	if (ctx->fc_replay_state.fc_replay_error) {
+		jfs_debug("Scan phase detected error. Aborting replay..\n");
+		return ctx->fc_replay_state.fc_replay_error;
+	}
+
+	ret = ext2fs_read_bitmaps(ctx->fs);
+	if (ret)
+		return ret;
+
+	fc_hdr = (struct ext4_fc_commit_hdr *)
+		  ((__u8 *)bh->b_data + sizeof(journal_header_t));
+	inode_len = get_fc_hdr_inode_len(ctx->fs, fc_hdr);
+	ret = fc_replay_dentries(journal, fc_hdr);
+	if (ret)
+		return ret;
+
+	ino = le32_to_cpu(fc_hdr->fc_ino);
+	extent_list.ino = ino;
+	ret = e2fsck_read_extents(ctx, &extent_list);
+	if (ret)
+		return ret;
+
+	tl = get_first_tl(ctx->fs, fc_hdr);
+	for (i = 0; i < num_tls(fc_hdr); i++) {
+		switch (le16_to_cpu(tl->fc_tag)) {
+		case EXT4_FC_TAG_ADD_RANGE:
+			ext3_to_ext2fs_extent(&extent,
+					      (struct ext3_extent *)(tl + 1));
+			ret = ext2fs_add_extent_to_list(&extent_list, &extent);
+			if (ret)
+				goto out;
+			mark_blocks_used(ctx->fs, extent.e_pblk,  extent.e_len);
+			break;
+		case EXT4_FC_TAG_DEL_RANGE:
+			lrange = (struct ext4_fc_lrange *)(tl + 1);
+			extent.e_lblk = ext2fs_le32_to_cpu(lrange->fc_lblk);
+			extent.e_len = ext2fs_le16_to_cpu(lrange->fc_len);
+
+			pblk = 0;
+			for (j = 0; j < extent_list.count; j++) {
+				if (extent.e_lblk >=
+				    extent_list.extents[j].e_lblk &&
+				    extent.e_lblk <
+				    extent_list.extents[j].e_lblk +
+				    extent_list.extents[j].e_len) {
+					pblk = extent_list.extents[j].e_pblk +
+						extent.e_lblk -
+						extent_list.extents[j].e_lblk;
+					break;
+				}
+			}
+			ret = ext2fs_del_extent_from_list(&extent_list,
+							  &extent);
+			if (ret)
+				goto out;
+
+			if (pblk != 0)
+				mark_blocks_free(ctx->fs, pblk, extent.e_len);
+			break;
+		default:
+			break;
+		}
+		tl = get_next_tl(tl);
+	}
+	ext2fs_mark_bb_dirty(ctx->fs);
+	sort_and_merge_extents(&extent_list);
+
+	ret = e2fsck_rewrite_extent_tree(ctx, &extent_list);
+	if (ret)
+		goto out;
+
+	ret = ext2fs_get_mem(inode_len, &inode);
+	if (ret)
+		goto out;
+	ret = ext2fs_read_inode_full(ctx->fs, ino, inode, inode_len);
+	if (ret)
+		goto out;
+
+	if (inode->i_flags & EXT4_INLINE_DATA_FL) {
+		memcpy(inode, fc_hdr + 1, inode_len);
+	} else {
+		memcpy(inode, fc_hdr + 1,
+			offsetof(struct ext2_inode_large, i_block));
+		memcpy(&inode->i_generation,
+		       &((struct ext2_inode_large *)(fc_hdr + 1))->i_generation,
+		       inode_len -
+		       offsetof(struct ext2_inode_large, i_generation));
+	}
+
+	ret = ext2fs_write_inode_full(ctx->fs, ino, inode, inode_len);
+	if (ret)
+		goto out;
+
+	if (ctx->fc_replay_state.fc_num_blks == 0) {
+		ext2fs_mark_super_dirty(ctx->fs);
+		ext2fs_write_block_bitmap(ctx->fs);
+		ext2fs_write_inode_bitmap(ctx->fs);
+		ext2fs_calculate_summary_stats(ctx->fs);
+		ext2fs_set_gdt_csum(ctx->fs);
+		ext2fs_flush(ctx->fs);
+	}
+out:
+	ext2fs_free_mem(&extent_list.extents);
+	return ret;
+}
+
 static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
 {
 	struct process_block_struct pb;
@@ -514,6 +993,10 @@ static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
 
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+	if (ext2fs_has_feature_fast_commit(ctx->fs->super))
+		journal->j_fc_replay_callback = ext4_journal_fc_replay_cb;
+	else
+		journal->j_fc_replay_callback = NULL;
 
 #ifdef USE_INODE_IO
 	if (j_inode)
@@ -688,7 +1171,13 @@ static errcode_t e2fsck_journal_load(journal_t *journal)
 	journal->j_transaction_sequence = journal->j_tail_sequence;
 	journal->j_tail = ntohl(jsb->s_start);
 	journal->j_first = ntohl(jsb->s_first);
-	journal->j_last = ntohl(jsb->s_maxlen);
+	if (jbd2_has_feature_fast_commit(journal)) {
+		journal->j_last_fc = ntohl(jsb->s_maxlen);
+		journal->j_last = journal->j_last_fc - JBD2_FAST_COMMIT_BLOCKS;
+		journal->j_first_fc = journal->j_last + 1;
+	} else {
+		journal->j_last = ntohl(jsb->s_maxlen);
+	}
 
 	return 0;
 }
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 6c20ea77..410db16a 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -490,6 +490,52 @@ struct ext2_inode_large {
 /*9c*/	__u32   i_projid;       /* Project ID */
 };
 
+/* Fast commit stuff */
+/* Ext4 fast commit related info */
+
+/* Magic of fast commit header */
+#define EXT4_FC_MAGIC			0xE2540090
+
+struct ext4_fc_commit_hdr {
+	/* Fast commit magic, should be EXT4_FC_MAGIC */
+	__u32 fc_magic;
+	/* Features used by this fast commit block */
+	__u8 fc_features;
+	/* Number of TLVs in this fast commmit block */
+	__u16 fc_num_tlvs;
+	/* Inode number */
+	__u32 fc_ino;
+	/* Csum(hdr+contents) */
+	__u32 fc_csum;
+};
+
+struct ext4_fc_lrange {
+	__le32 fc_lblk;
+	__le32 fc_len;
+};
+
+#define EXT4_FC_TAG_ADD_RANGE		0x1
+#define EXT4_FC_TAG_DEL_RANGE		0x2
+#define EXT4_FC_TAG_CREAT_DENTRY	0x3
+#define EXT4_FC_TAG_ADD_DENTRY		0x4
+#define EXT4_FC_TAG_DEL_DENTRY		0x5
+
+struct ext4_fc_tl {
+	__le16 fc_tag;
+	__le16 fc_len;
+};
+
+/* On disk fast commit tlv value structure for dirent tags:
+ *  - EXT4_FC_TAG_CREATE_DENTRY
+ *  - EXT4_FC_TAG_ADD_DENTRY
+ *  - EXT4_FC_TAG_DEL_DENTRY
+ */
+struct ext4_fc_dentry_info {
+	__le32 fc_parent_ino;
+	__le32 fc_ino;
+	__u8 fc_dname[0];
+};
+
 #define EXT4_INODE_CSUM_HI_EXTRA_END	\
 	(offsetof(struct ext2_inode_large, i_checksum_hi) + sizeof(__u16) - \
 	 EXT2_GOOD_OLD_INODE_SIZE)
-- 
2.25.1.696.g5e7596f4ac-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-19 23:34 [PATCH 0/7] e2fsck: fast commit recovery path e2fsck changes Harshad Shirwadkar
2020-03-19 23:34 ` [PATCH 1/7] e2fsck: make recovery.c identical with kernel Harshad Shirwadkar
2020-03-19 23:34 ` [PATCH 2/7] e2fsck: allow rewriting extents of a file Harshad Shirwadkar
2020-03-19 23:34 ` [PATCH 3/7] ext2fs: make ext2fs_calculate_summary_stats() visible Harshad Shirwadkar
2020-03-19 23:34 ` [PATCH 4/7] e2fsck/jbd2: add fast commit feature in jbd2 Harshad Shirwadkar
2020-03-19 23:34 ` [PATCH 5/7] e2fsck/jbd2: fast commit recovery changes Harshad Shirwadkar
2020-03-19 23:34 ` [PATCH 6/7] e2fsck: main fast commit replay handler Harshad Shirwadkar

Linux-ext4 Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-ext4/0 linux-ext4/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-ext4 linux-ext4/ https://lore.kernel.org/linux-ext4 \
		linux-ext4@vger.kernel.org
	public-inbox-index linux-ext4

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-ext4


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git