linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Theodore Y. Ts'o" <tytso@mit.edu>
To: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Cc: linux-ext4@vger.kernel.org
Subject: Re: [PATCH v3 10/13] ext4: fast-commit recovery path changes
Date: Thu, 17 Oct 2019 22:07:49 -0400	[thread overview]
Message-ID: <20191018020749.GC21137@mit.edu> (raw)
In-Reply-To: <20191001074101.256523-11-harshadshirwadkar@gmail.com>

On Tue, Oct 01, 2019 at 12:40:59AM -0700, Harshad Shirwadkar wrote:
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index 0b202e00d93f..2433f12d2d88 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -360,7 +360,12 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
>  				      struct buffer_head *bh)
>  {
>  	ext4_fsblk_t	blk;
> -	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
> +	struct ext4_group_info *grp;
> +
> +	if (EXT4_SB(sb)->s_fc_replay)
> +		return 0;

Instead of adding a bool (s_fc_replay) to sbi, why not just use
sbi->s_mount_state and define a new bit, EXT4_REPLAY_FC (alongside
EXT4_ORPHAN_FS, et. al)?

> diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
> index fd7740372438..12d6e70bf676 100644
> --- a/fs/ext4/ext4_jbd2.c
> +++ b/fs/ext4/ext4_jbd2.c

> +int ext4_fc_create_inode(struct super_block *sb, struct ext4_inode *raw_inode,
> +			 int ino, unsigned long parent, const char *dname,
> +			 int dlen)
> +{
> +	struct inode *dir = NULL, *inode = NULL;
> +	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
> +	struct qstr qstr_dname = QSTR_INIT(dname, dlen);
> +	struct ext4_dir_entry_2 *res_dir = NULL;
> +	struct buffer_head *dirent_bh;
> +	int ret = 0, inlined;
> +
	...
> +		if (le32_to_cpu(res_dir->inode) != inode->i_ino) {
> +			jbd_debug(1, "Entry exists and mismatched inode nos.");
> +			brelse(dirent_bh);
> +			ret = -EEXIST;
> +			goto out;


We have a number of statements where ret gets set to an error, but
then when look at what happens after the out label...

> +out:
	...
> +
> +	return 0;
> +}

It always returns 0; I think we should be returning ret?


> +static int ext4_journal_fc_replay_cb(journal_t *journal, struct buffer_head *bh,
> +				     enum passtype pass, int off)
> +{
> +	struct super_block *sb = journal->j_private;
> +	struct ext4_sb_info *sbi = EXT4_SB(sb);
> +	struct ext4_fc_commit_hdr *fc_hdr;
> +	struct ext4_fc_tl *tl;
> +	struct ext4_iloc iloc;
> +	struct ext4_extent *ex;
> +	struct inode *inode;
> +	char *dname = NULL;
> +	int dname_len = 0;
> +	int parent_ino = -1;
> +	int i, j, ret;
> +
> +	if (pass == PASS_SCAN)
> +		return ext4_journal_fc_replay_scan(sb, bh, off);
> +
> +	if (sbi->s_fc_replay_state.fc_replay_error) {
> +		jbd_debug(1, "FC replay error set = %d\n",
> +			  sbi->s_fc_replay_state.fc_replay_error);
> +		return sbi->s_fc_replay_state.fc_replay_error;
> +	}
> +
> +	sbi->s_fc_replay = true;
> +	fc_hdr = (struct ext4_fc_commit_hdr *)
> +		  ((__u8 *)bh->b_data + sizeof(journal_header_t));
> +
> +	jbd_debug(3, "%s: Got FC block for inode %d at [%d,%d]", __func__,
> +		  le32_to_cpu(fc_hdr->fc_ino),
> +		  be32_to_cpu(((journal_header_t *)bh->b_data)->h_sequence),
> +		  le32_to_cpu(fc_hdr->fc_subtid));
> +
> +	tl = (struct ext4_fc_tl *)(fc_hdr + 1);
> +	if (le16_to_cpu(fc_hdr->fc_num_tlvs) >= 2) {
> +		for (i = 0; i < 2; i++) {
> +			switch (le16_to_cpu(tl->fc_tag)) {
> +			case EXT4_FC_TAG_DNAME:
> +				dname = fc_tag_val(tl);
> +				dname_len = fc_tag_len(tl);
> +				break;
> +			case EXT4_FC_TAG_PARENT_INO:
> +				parent_ino = le32_to_cpu(
> +				    *(__le32 *)fc_tag_val(tl));
> +				break;
> +			}
> +			tl = (struct ext4_fc_tl *)(fc_tag_val(tl) +
> +						   fc_tag_len(tl));
> +		}
> +	}
> +
> +	if (parent_ino && dname) {
> +		ret = ext4_fc_create_inode(sb, &fc_hdr->inode,
> +				     le32_to_cpu(fc_hdr->fc_ino), parent_ino,
> +				     dname, dname_len);
> +		if (ret) {
> +			jbd_debug(1, "Failed to create ext4 inode.");
> +			return ret;
> +		}
> +	}
> +
> +	inode = ext4_iget(sb, le32_to_cpu(fc_hdr->fc_ino), EXT4_IGET_NORMAL);
> +	if (IS_ERR(inode))
> +		return 0;
> +
> +	ret = ext4_get_inode_loc(inode, &iloc);
> +	if (ret)
> +		return ret;
> +
> +	inode_lock(inode);
> +	tl = (struct ext4_fc_tl *)(fc_hdr + 1);
> +	for (i = 0; i < le16_to_cpu(fc_hdr->fc_num_tlvs); i++) {
> +		switch (le16_to_cpu(tl->fc_tag)) {
> +		case EXT4_FC_TAG_EXT:
> +			ex = (struct ext4_extent *)(tl + 1);
> +			/*
> +			 * We add block by block because part of extent may
> +			 * already have been added by a previous fast commit
> +			 * replay.
> +			 */
> +			for (j = 0; j < ext4_ext_get_actual_len(ex); j++)
> +				ext4_fc_add_block(inode,
> +						  le32_to_cpu(ex->ee_block) + j,
> +						  ext4_ext_pblock(ex) + j,
> +						  ext4_ext_is_unwritten(ex));
> +			break;
> +		case EXT4_FC_TAG_PARENT_INO:
> +		case EXT4_FC_TAG_DNAME:
> +			break;
> +		default:
> +			jbd_debug(1, "Unknown tag found.\n");
> +		}
> +		tl = (struct ext4_fc_tl *)((__u8 *)tl +
> +					   le16_to_cpu(tl->fc_len) +
> +					   sizeof(*tl));
> +	}
> +	ext4_reserve_inode_write(NULL, inode, &iloc);
> +	inode_unlock(inode);
> +
> +	/*
> +	 * Unless inode contains inline data, copy everything except
> +	 * i_blocks. i_blocks would have been set alright by ext4_fc_add_block
> +	 * call above.
> +	 */
> +	if (ext4_has_inline_data(inode)) {
> +		memcpy(ext4_raw_inode(&iloc), &fc_hdr->inode,
> +		       sizeof(struct ext4_inode));
> +	} else {
> +		memcpy(ext4_raw_inode(&iloc), &fc_hdr->inode,
> +		       offsetof(struct ext4_inode, i_block));
> +		memcpy(&ext4_raw_inode(&iloc)->i_generation,
> +		       &fc_hdr->inode.i_generation,
> +		       sizeof(struct ext4_inode) -
> +		       offsetof(struct ext4_inode, i_generation));
> +	}
> +	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
> +	ext4_reset_inode_seed(inode);
> +
> +	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
> +	ret = ext4_handle_dirty_metadata(NULL, inode, iloc.bh);
> +	brelse(iloc.bh);
> +	iput(inode);
> +	if (!ret)
> +		ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
> +
> +	sbi->s_fc_replay = false;
> +
> +	return ret;
> +}
> +
>  void ext4_init_fast_commit(struct super_block *sb, journal_t *journal)
>  {
>  	if (ext4_should_fast_commit(sb)) {
>  		journal->j_fc_commit_callback = ext4_journal_fc_commit_cb;
>  		journal->j_fc_cleanup_callback = ext4_journal_fc_cleanup_cb;
>  	}
> +
> +	/*
> +	 * We set replay callback even if fast commit disabled because we may
> +	 * could still have fast commit blocks that need to be replayed even if
> +	 * fast commit has now been turned off.
> +	 */
> +	journal->j_fc_replay_callback = ext4_journal_fc_replay_cb;
>  }
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index dea4c2632272..d70c09cbbc3f 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2903,9 +2903,11 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
>  	ext_debug("truncate since %u to %u\n", start, end);
>  
>  	/* probably first extent we're gonna free will be last in block */
> -	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
> -	if (IS_ERR(handle))
> -		return PTR_ERR(handle);
> +	if (!sbi->s_fc_replay) {
> +		handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
> +		if (IS_ERR(handle))
> +			return PTR_ERR(handle);
> +	}


I'm curious; what fast commits will result in our needing to call
ext4_ext_remove_space?  I thought we weren't supporting truncate,
punch hole, etc.

> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 47d04a33a3ca..d32dea0757fe 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -287,15 +292,17 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
	...
> +	if (!sbi->s_fc_replay) {
> +		grp = ext4_get_group_info(sb, block_group);
> +		if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
> +			fatal = -EFSCORRUPTED;
> +			goto error_return;

And ditto for ext4_free_inode?

> @@ -758,7 +765,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,

And I'm surprised we're want to use ext4_new_inode for fast commit,
since for fast commit, we already know what inode number should be
used for a newly created file.  ext4_new_inode() is going to be
searching for what inode to allocate which we wouldn't need to do for
fast_commit, no?

						- Ted

  reply	other threads:[~2019-10-18  4:54 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-01  7:40 [PATCH v3 00/13] ext4: add fast commit support Harshad Shirwadkar
2019-10-01  7:40 ` [PATCH v3 01/13] ext4: add handling for extended mount options Harshad Shirwadkar
2019-10-16  2:14   ` Theodore Y. Ts'o
2019-10-21 20:41     ` harshad shirwadkar
2019-10-01  7:40 ` [PATCH v3 02/13] jbd2: fast commit setup and enable Harshad Shirwadkar
2019-10-16 13:03   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 03/13] jbd2: fast-commit commit path changes Harshad Shirwadkar
2019-10-16 16:38   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 04/13] jbd2: fast-commit commit path new APIs Harshad Shirwadkar
2019-10-16 17:20   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 05/13] jbd2: fast-commit recovery path changes Harshad Shirwadkar
2019-10-16 17:30   ` Theodore Y. Ts'o
2019-10-22  0:51     ` harshad shirwadkar
2019-10-01  7:40 ` [PATCH v3 06/13] ext4: add fields that are needed to track changed files Harshad Shirwadkar
2019-10-16 18:26   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 07/13] ext4: track changed files for fast commit Harshad Shirwadkar
2019-10-16 20:26   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 08/13] ext4: fast-commit commit range tracking Harshad Shirwadkar
2019-10-16 21:36   ` Theodore Y. Ts'o
2019-10-30  5:12     ` harshad shirwadkar
2019-10-01  7:40 ` [PATCH v3 09/13] ext4: fast-commit commit path changes Harshad Shirwadkar
2019-10-16 22:45   ` Theodore Y. Ts'o
     [not found]     ` <CAAJeciXQiE022GqcsTr35jSqjA6eH+zBS2KNvDPj5PovButdYA@mail.gmail.com>
2019-10-23 12:44       ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 10/13] ext4: fast-commit recovery " Harshad Shirwadkar
2019-10-18  2:07   ` Theodore Y. Ts'o [this message]
2019-10-01  7:41 ` [PATCH v3 11/13] ext4: add support for asynchronous fast commits Harshad Shirwadkar
2019-10-25  6:28   ` Xiaoguang Wang
2019-10-01  7:41 ` [PATCH v3 12/13] docs: Add fast commit documentation Harshad Shirwadkar
2019-10-18  1:56   ` Theodore Y. Ts'o
2019-10-18  4:51     ` Andreas Dilger
2019-10-18 13:28       ` Theodore Y. Ts'o
2019-10-31 18:53         ` Andreas Dilger
2019-10-31  5:34     ` harshad shirwadkar
2019-10-31  6:41       ` harshad shirwadkar
2019-10-04 19:12 ` [PATCH v3 00/13] ext4: add fast commit support Theodore Y. Ts'o
2019-10-04 20:11   ` harshad shirwadkar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191018020749.GC21137@mit.edu \
    --to=tytso@mit.edu \
    --cc=harshadshirwadkar@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).