All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Theodore Y. Ts'o" <tytso@mit.edu>
To: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Cc: linux-ext4@vger.kernel.org
Subject: Re: [PATCH v3 10/13] ext4: fast-commit recovery path changes
Date: Thu, 17 Oct 2019 22:07:49 -0400	[thread overview]
Message-ID: <20191018020749.GC21137@mit.edu> (raw)
In-Reply-To: <20191001074101.256523-11-harshadshirwadkar@gmail.com>

On Tue, Oct 01, 2019 at 12:40:59AM -0700, Harshad Shirwadkar wrote:
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index 0b202e00d93f..2433f12d2d88 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -360,7 +360,12 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
>  				      struct buffer_head *bh)
>  {
>  	ext4_fsblk_t	blk;
> -	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
> +	struct ext4_group_info *grp;
> +
> +	if (EXT4_SB(sb)->s_fc_replay)
> +		return 0;

Instead of adding a bool (s_fc_replay) to sbi, why not just use
sbi->s_mount_state and define a new bit, EXT4_REPLAY_FC (alongside
EXT4_ORPHAN_FS, et. al)?

> diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
> index fd7740372438..12d6e70bf676 100644
> --- a/fs/ext4/ext4_jbd2.c
> +++ b/fs/ext4/ext4_jbd2.c

> +int ext4_fc_create_inode(struct super_block *sb, struct ext4_inode *raw_inode,
> +			 int ino, unsigned long parent, const char *dname,
> +			 int dlen)
> +{
> +	struct inode *dir = NULL, *inode = NULL;
> +	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
> +	struct qstr qstr_dname = QSTR_INIT(dname, dlen);
> +	struct ext4_dir_entry_2 *res_dir = NULL;
> +	struct buffer_head *dirent_bh;
> +	int ret = 0, inlined;
> +
	...
> +		if (le32_to_cpu(res_dir->inode) != inode->i_ino) {
> +			jbd_debug(1, "Entry exists and mismatched inode nos.");
> +			brelse(dirent_bh);
> +			ret = -EEXIST;
> +			goto out;


We have a number of statements where ret gets set to an error, but
then when look at what happens after the out label...

> +out:
	...
> +
> +	return 0;
> +}

It always returns 0; I think we should be returning ret?


> +static int ext4_journal_fc_replay_cb(journal_t *journal, struct buffer_head *bh,
> +				     enum passtype pass, int off)
> +{
> +	struct super_block *sb = journal->j_private;
> +	struct ext4_sb_info *sbi = EXT4_SB(sb);
> +	struct ext4_fc_commit_hdr *fc_hdr;
> +	struct ext4_fc_tl *tl;
> +	struct ext4_iloc iloc;
> +	struct ext4_extent *ex;
> +	struct inode *inode;
> +	char *dname = NULL;
> +	int dname_len = 0;
> +	int parent_ino = -1;
> +	int i, j, ret;
> +
> +	if (pass == PASS_SCAN)
> +		return ext4_journal_fc_replay_scan(sb, bh, off);
> +
> +	if (sbi->s_fc_replay_state.fc_replay_error) {
> +		jbd_debug(1, "FC replay error set = %d\n",
> +			  sbi->s_fc_replay_state.fc_replay_error);
> +		return sbi->s_fc_replay_state.fc_replay_error;
> +	}
> +
> +	sbi->s_fc_replay = true;
> +	fc_hdr = (struct ext4_fc_commit_hdr *)
> +		  ((__u8 *)bh->b_data + sizeof(journal_header_t));
> +
> +	jbd_debug(3, "%s: Got FC block for inode %d at [%d,%d]", __func__,
> +		  le32_to_cpu(fc_hdr->fc_ino),
> +		  be32_to_cpu(((journal_header_t *)bh->b_data)->h_sequence),
> +		  le32_to_cpu(fc_hdr->fc_subtid));
> +
> +	tl = (struct ext4_fc_tl *)(fc_hdr + 1);
> +	if (le16_to_cpu(fc_hdr->fc_num_tlvs) >= 2) {
> +		for (i = 0; i < 2; i++) {
> +			switch (le16_to_cpu(tl->fc_tag)) {
> +			case EXT4_FC_TAG_DNAME:
> +				dname = fc_tag_val(tl);
> +				dname_len = fc_tag_len(tl);
> +				break;
> +			case EXT4_FC_TAG_PARENT_INO:
> +				parent_ino = le32_to_cpu(
> +				    *(__le32 *)fc_tag_val(tl));
> +				break;
> +			}
> +			tl = (struct ext4_fc_tl *)(fc_tag_val(tl) +
> +						   fc_tag_len(tl));
> +		}
> +	}
> +
> +	if (parent_ino && dname) {
> +		ret = ext4_fc_create_inode(sb, &fc_hdr->inode,
> +				     le32_to_cpu(fc_hdr->fc_ino), parent_ino,
> +				     dname, dname_len);
> +		if (ret) {
> +			jbd_debug(1, "Failed to create ext4 inode.");
> +			return ret;
> +		}
> +	}
> +
> +	inode = ext4_iget(sb, le32_to_cpu(fc_hdr->fc_ino), EXT4_IGET_NORMAL);
> +	if (IS_ERR(inode))
> +		return 0;
> +
> +	ret = ext4_get_inode_loc(inode, &iloc);
> +	if (ret)
> +		return ret;
> +
> +	inode_lock(inode);
> +	tl = (struct ext4_fc_tl *)(fc_hdr + 1);
> +	for (i = 0; i < le16_to_cpu(fc_hdr->fc_num_tlvs); i++) {
> +		switch (le16_to_cpu(tl->fc_tag)) {
> +		case EXT4_FC_TAG_EXT:
> +			ex = (struct ext4_extent *)(tl + 1);
> +			/*
> +			 * We add block by block because part of extent may
> +			 * already have been added by a previous fast commit
> +			 * replay.
> +			 */
> +			for (j = 0; j < ext4_ext_get_actual_len(ex); j++)
> +				ext4_fc_add_block(inode,
> +						  le32_to_cpu(ex->ee_block) + j,
> +						  ext4_ext_pblock(ex) + j,
> +						  ext4_ext_is_unwritten(ex));
> +			break;
> +		case EXT4_FC_TAG_PARENT_INO:
> +		case EXT4_FC_TAG_DNAME:
> +			break;
> +		default:
> +			jbd_debug(1, "Unknown tag found.\n");
> +		}
> +		tl = (struct ext4_fc_tl *)((__u8 *)tl +
> +					   le16_to_cpu(tl->fc_len) +
> +					   sizeof(*tl));
> +	}
> +	ext4_reserve_inode_write(NULL, inode, &iloc);
> +	inode_unlock(inode);
> +
> +	/*
> +	 * Unless inode contains inline data, copy everything except
> +	 * i_blocks. i_blocks would have been set alright by ext4_fc_add_block
> +	 * call above.
> +	 */
> +	if (ext4_has_inline_data(inode)) {
> +		memcpy(ext4_raw_inode(&iloc), &fc_hdr->inode,
> +		       sizeof(struct ext4_inode));
> +	} else {
> +		memcpy(ext4_raw_inode(&iloc), &fc_hdr->inode,
> +		       offsetof(struct ext4_inode, i_block));
> +		memcpy(&ext4_raw_inode(&iloc)->i_generation,
> +		       &fc_hdr->inode.i_generation,
> +		       sizeof(struct ext4_inode) -
> +		       offsetof(struct ext4_inode, i_generation));
> +	}
> +	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
> +	ext4_reset_inode_seed(inode);
> +
> +	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
> +	ret = ext4_handle_dirty_metadata(NULL, inode, iloc.bh);
> +	brelse(iloc.bh);
> +	iput(inode);
> +	if (!ret)
> +		ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
> +
> +	sbi->s_fc_replay = false;
> +
> +	return ret;
> +}
> +
>  void ext4_init_fast_commit(struct super_block *sb, journal_t *journal)
>  {
>  	if (ext4_should_fast_commit(sb)) {
>  		journal->j_fc_commit_callback = ext4_journal_fc_commit_cb;
>  		journal->j_fc_cleanup_callback = ext4_journal_fc_cleanup_cb;
>  	}
> +
> +	/*
> +	 * We set replay callback even if fast commit disabled because we may
> +	 * could still have fast commit blocks that need to be replayed even if
> +	 * fast commit has now been turned off.
> +	 */
> +	journal->j_fc_replay_callback = ext4_journal_fc_replay_cb;
>  }
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index dea4c2632272..d70c09cbbc3f 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2903,9 +2903,11 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
>  	ext_debug("truncate since %u to %u\n", start, end);
>  
>  	/* probably first extent we're gonna free will be last in block */
> -	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
> -	if (IS_ERR(handle))
> -		return PTR_ERR(handle);
> +	if (!sbi->s_fc_replay) {
> +		handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
> +		if (IS_ERR(handle))
> +			return PTR_ERR(handle);
> +	}


I'm curious; what fast commits will result in our needing to call
ext4_ext_remove_space?  I thought we weren't supporting truncate,
punch hole, etc.

> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 47d04a33a3ca..d32dea0757fe 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -287,15 +292,17 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
	...
> +	if (!sbi->s_fc_replay) {
> +		grp = ext4_get_group_info(sb, block_group);
> +		if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
> +			fatal = -EFSCORRUPTED;
> +			goto error_return;

And ditto for ext4_free_inode?

> @@ -758,7 +765,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,

And I'm surprised we're want to use ext4_new_inode for fast commit,
since for fast commit, we already know what inode number should be
used for a newly created file.  ext4_new_inode() is going to be
searching for what inode to allocate which we wouldn't need to do for
fast_commit, no?

						- Ted

  reply	other threads:[~2019-10-18  4:54 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-01  7:40 [PATCH v3 00/13] ext4: add fast commit support Harshad Shirwadkar
2019-10-01  7:40 ` [PATCH v3 01/13] ext4: add handling for extended mount options Harshad Shirwadkar
2019-10-16  2:14   ` Theodore Y. Ts'o
2019-10-21 20:41     ` harshad shirwadkar
2019-10-01  7:40 ` [PATCH v3 02/13] jbd2: fast commit setup and enable Harshad Shirwadkar
2019-10-16 13:03   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 03/13] jbd2: fast-commit commit path changes Harshad Shirwadkar
2019-10-16 16:38   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 04/13] jbd2: fast-commit commit path new APIs Harshad Shirwadkar
2019-10-16 17:20   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 05/13] jbd2: fast-commit recovery path changes Harshad Shirwadkar
2019-10-16 17:30   ` Theodore Y. Ts'o
2019-10-22  0:51     ` harshad shirwadkar
2019-10-01  7:40 ` [PATCH v3 06/13] ext4: add fields that are needed to track changed files Harshad Shirwadkar
2019-10-16 18:26   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 07/13] ext4: track changed files for fast commit Harshad Shirwadkar
2019-10-16 20:26   ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 08/13] ext4: fast-commit commit range tracking Harshad Shirwadkar
2019-10-16 21:36   ` Theodore Y. Ts'o
2019-10-30  5:12     ` harshad shirwadkar
2019-10-01  7:40 ` [PATCH v3 09/13] ext4: fast-commit commit path changes Harshad Shirwadkar
2019-10-16 22:45   ` Theodore Y. Ts'o
     [not found]     ` <CAAJeciXQiE022GqcsTr35jSqjA6eH+zBS2KNvDPj5PovButdYA@mail.gmail.com>
2019-10-23 12:44       ` Theodore Y. Ts'o
2019-10-01  7:40 ` [PATCH v3 10/13] ext4: fast-commit recovery " Harshad Shirwadkar
2019-10-18  2:07   ` Theodore Y. Ts'o [this message]
2019-10-01  7:41 ` [PATCH v3 11/13] ext4: add support for asynchronous fast commits Harshad Shirwadkar
2019-10-25  6:28   ` Xiaoguang Wang
2019-10-01  7:41 ` [PATCH v3 12/13] docs: Add fast commit documentation Harshad Shirwadkar
2019-10-18  1:56   ` Theodore Y. Ts'o
2019-10-18  4:51     ` Andreas Dilger
2019-10-18 13:28       ` Theodore Y. Ts'o
2019-10-31 18:53         ` Andreas Dilger
2019-10-31  5:34     ` harshad shirwadkar
2019-10-31  6:41       ` harshad shirwadkar
2019-10-04 19:12 ` [PATCH v3 00/13] ext4: add fast commit support Theodore Y. Ts'o
2019-10-04 20:11   ` harshad shirwadkar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191018020749.GC21137@mit.edu \
    --to=tytso@mit.edu \
    --cc=harshadshirwadkar@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.