linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Sterba <dsterba@suse.cz>
To: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: David Sterba <dsterba@suse.com>,
	Nikolay Borisov <nborisov@suse.com>,
	linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
Date: Fri, 17 Jan 2020 14:38:09 +0100	[thread overview]
Message-ID: <20200117133809.GB3929@twin.jikos.cz> (raw)
In-Reply-To: <20200117125105.20989-3-johannes.thumshirn@wdc.com>

On Fri, Jan 17, 2020 at 09:51:02PM +0900, Johannes Thumshirn wrote:
> Similar to the superblock read path, change the write path to using BIOs
> and pages instead of buffer_heads.
> 
> This is based on a patch originally authored by Nikolay Borisov.
> 
> Co-developed-by: Nikolay Borisov <nborisov@suse.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
>  1 file changed, 61 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 50c93ffe8d03..51e7b832c8fd 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
>  }
>  ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
>  
> -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> +static void btrfs_end_super_write(struct bio *bio)
>  {
> -	if (uptodate) {
> -		set_buffer_uptodate(bh);
> -	} else {
> -		struct btrfs_device *device = (struct btrfs_device *)
> -			bh->b_private;
> -
> -		btrfs_warn_rl_in_rcu(device->fs_info,
> -				"lost page write due to IO error on %s",
> -					  rcu_str_deref(device->name));
> -		/* note, we don't set_buffer_write_io_error because we have
> -		 * our own ways of dealing with the IO errors
> -		 */
> -		clear_buffer_uptodate(bh);
> -		btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
> +	struct btrfs_device *device = bio->bi_private;
> +	struct bio_vec *bvec;
> +	struct bvec_iter_all iter_all;
> +	struct page *page;
> +
> +	bio_for_each_segment_all(bvec, bio, iter_all) {
> +		page = bvec->bv_page;
> +
> +		if (blk_status_to_errno(bio->bi_status)) {
> +			btrfs_warn_rl_in_rcu(device->fs_info,
> +					     "lost page write due to IO error on %s",
> +					     rcu_str_deref(device->name));
> +			ClearPageUptodate(page);
> +			SetPageError(page);
> +			btrfs_dev_stat_inc_and_print(device,
> +						     BTRFS_DEV_STAT_WRITE_ERRS);
> +		} else {
> +			SetPageUptodate(page);
> +		}
> +
> +		put_page(page);
> +		unlock_page(page);
>  	}
> -	unlock_buffer(bh);
> -	put_bh(bh);
> +
> +	bio_put(bio);
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> @@ -3462,16 +3470,15 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>   * the expected device size at commit time. Note that max_mirrors must be
>   * same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or submission fails.
> + * Return number of errors when page is not found or submission fails.
>   */
>  static int write_dev_supers(struct btrfs_device *device,
>  			    struct btrfs_super_block *sb, int max_mirrors)
>  {
>  	struct btrfs_fs_info *fs_info = device->fs_info;
>  	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
> -	struct buffer_head *bh;
> +	gfp_t gfp_mask;
>  	int i;
> -	int ret;
>  	int errors = 0;
>  	u64 bytenr;
>  	int op_flags;
> @@ -3481,7 +3488,13 @@ static int write_dev_supers(struct btrfs_device *device,
>  
>  	shash->tfm = fs_info->csum_shash;
>  
> +	gfp_mask = mapping_gfp_constraint(device->bdev->bd_inode->i_mapping,
> +					  ~__GFP_FS) | __GFP_NOFAIL;
> +
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +		struct bio *bio;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
> @@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
>  				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
>  		crypto_shash_final(shash, sb->csum);
>  
> -		/* One reference for us, and we leave it for the caller */
> -		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
> -			      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_or_create_page(device->bdev->bd_inode->i_mapping,
> +					   bytenr >> PAGE_SHIFT, gfp_mask);

This has NOFAIL again, but now we're in write_dev_supers, so this has
some implications regarding the potential unbounded waiting

> +		if (!page) {
>  			btrfs_err(device->fs_info,
> -			    "couldn't get super buffer head for bytenr %llu",
> +			    "couldn't get superblock page for bytenr %llu",
>  			    bytenr);
>  			errors++;
>  			continue;
>  		}
>  
> -		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
> +		/* Bump the refcount for wait_dev_supers() */
> +		get_page(page);
>  
> -		/* one reference for submit_bh */
> -		get_bh(bh);
> -
> -		set_buffer_uptodate(bh);
> -		lock_buffer(bh);
> -		bh->b_end_io = btrfs_end_buffer_write_sync;
> -		bh->b_private = device;
> +		memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
>  
>  		/*
>  		 * we fua the first super.  The others we allow
> @@ -3522,9 +3529,17 @@ static int write_dev_supers(struct btrfs_device *device,
>  		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
>  		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
>  			op_flags |= REQ_FUA;
> -		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
> -		if (ret)
> -			errors++;
> +
> +		bio = bio_alloc(gfp_mask, 1);

And allocating a new bio when we have to write the superblock is also
not very nice. This should do something like the device::flush_bio that
could be reused.

> +		bio_set_dev(bio, device->bdev);
> +		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio->bi_private = device;
> +		bio->bi_end_io = btrfs_end_super_write;
> +		bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
> +			     offset_in_page(bytenr));
> +
> +		bio_set_op_attrs(bio, REQ_OP_WRITE, op_flags);
> +		btrfsic_submit_bio(bio);
>  	}
>  	return errors < i ? 0 : -1;
>  }
> @@ -3533,12 +3548,11 @@ static int write_dev_supers(struct btrfs_device *device,
>   * Wait for write completion of superblocks done by write_dev_supers,
>   * @max_mirrors same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or not marked up to
> + * Return number of errors when page is not found or not marked up to
>   * date.
>   */
>  static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  {
> -	struct buffer_head *bh;
>  	int i;
>  	int errors = 0;
>  	bool primary_failed = false;
> @@ -3548,32 +3562,33 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
>  
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
>  			break;
>  
> -		bh = __find_get_block(device->bdev,
> -				      bytenr / BTRFS_BDEV_BLOCKSIZE,
> -				      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_get_page(device->bdev->bd_inode->i_mapping,
> +				     bytenr >> PAGE_SHIFT);
> +		if (!page) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  			continue;
>  		}
> -		wait_on_buffer(bh);
> -		if (!buffer_uptodate(bh)) {
> +		wait_on_page_locked(page);

What locks the page?

> +		if (PageError(page)) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  		}
>  
>  		/* drop our reference */
> -		brelse(bh);
> +		put_page(page);
>  
>  		/* drop the reference from the writing run */
> -		brelse(bh);
> +		put_page(page);
>  	}
>  
>  	/* log error, force error return */
> -- 
> 2.24.1

  reply	other threads:[~2020-01-17 13:38 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
2020-01-17 13:31   ` David Sterba
2020-01-17 14:30   ` Nikolay Borisov
2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
2020-01-17 13:38   ` David Sterba [this message]
2020-01-17 14:51   ` David Sterba
2020-01-17 15:01   ` Nikolay Borisov
2020-01-17 15:11     ` David Sterba
2020-01-22 15:48     ` Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 3/5] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
2020-01-17 15:05   ` Nikolay Borisov
2020-01-17 12:51 ` [PATCH 4/5] btrfs: remove buffer_heads from btrfsic_process_written_block() Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
2020-01-17 15:10   ` Nikolay Borisov
2020-01-17 15:13     ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200117133809.GB3929@twin.jikos.cz \
    --to=dsterba@suse.cz \
    --cc=dsterba@suse.com \
    --cc=johannes.thumshirn@wdc.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=nborisov@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).