All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Sterba <dsterba@suse.cz>
To: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: David Sterba <dsterba@suse.com>,
	Nikolay Borisov <nborisov@suse.com>,
	"linux-btrfs @ vger . kernel . org" <linux-btrfs@vger.kernel.org>
Subject: Re: [PATCH v2 2/6] btrfs: remove buffer heads from super block reading
Date: Fri, 24 Jan 2020 15:03:12 +0100	[thread overview]
Message-ID: <20200124140312.GJ3929@twin.jikos.cz> (raw)
In-Reply-To: <20200123081849.23397-3-johannes.thumshirn@wdc.com>

On Thu, Jan 23, 2020 at 05:18:45PM +0900, Johannes Thumshirn wrote:
> Super-block reading in BTRFS is done using buffer_heads. Buffer_heads have
> some drawbacks, like not being able to propagate errors from the lower
> layers.
> 
> Change the buffer_heads to BIOs and utilize the page cache for the page
> allocation. Compared to buffer_heads using BIOs are more lightweight and
> we skip several layers of buffer_head code until we either reach the page
> cache or build a BIO and submit it to read the blocks from disk.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> ---
> Changes to v1:
> - move 'super_page' into for-loop in btrfs_scratch_superblocks() (Nikolay)
> - switch to using pagecahce instead of alloc_pages() (Nikolay, David)
> ---
>  fs/btrfs/disk-io.c | 83 ++++++++++++++++++++++++++++++----------------
>  fs/btrfs/disk-io.h |  4 +--
>  fs/btrfs/volumes.c | 62 ++++++++++++++++++++--------------
>  fs/btrfs/volumes.h |  2 --
>  4 files changed, 94 insertions(+), 57 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index aea48d6ddc0c..b111f32108cc 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2635,11 +2635,12 @@ int __cold open_ctree(struct super_block *sb,
>  	u64 features;
>  	u16 csum_type;
>  	struct btrfs_key location;
> -	struct buffer_head *bh;
>  	struct btrfs_super_block *disk_super;
>  	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
>  	struct btrfs_root *tree_root;
>  	struct btrfs_root *chunk_root;
> +	struct page *super_page;
> +	u8 *superblock;
>  	int ret;
>  	int err = -EINVAL;
>  	int clear_free_space_tree = 0;
> @@ -2832,28 +2833,31 @@ int __cold open_ctree(struct super_block *sb,
>  	/*
>  	 * Read super block and check the signature bytes only
>  	 */
> -	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
> -	if (IS_ERR(bh)) {
> -		err = PTR_ERR(bh);
> +	ret = btrfs_read_dev_super(fs_devices->latest_bdev, &super_page);
> +	if (ret) {
> +		err = ret;
>  		goto fail_alloc;
>  	}
>  
> +	superblock = kmap(super_page);
>  	/*
>  	 * Verify the type first, if that or the the checksum value are
>  	 * corrupted, we'll find out
>  	 */
> -	csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
> +	csum_type = btrfs_super_csum_type((struct btrfs_super_block *)
> +					  superblock);
>  	if (!btrfs_supported_super_csum(csum_type)) {
>  		btrfs_err(fs_info, "unsupported checksum algorithm: %u",
>  			  csum_type);
>  		err = -EINVAL;
> -		brelse(bh);
> +		btrfs_release_disk_super(super_page);
>  		goto fail_alloc;
>  	}
>  
>  	ret = btrfs_init_csum_hash(fs_info, csum_type);
>  	if (ret) {
>  		err = ret;
> +		btrfs_release_disk_super(super_page);
>  		goto fail_alloc;
>  	}
>  
> @@ -2861,10 +2865,10 @@ int __cold open_ctree(struct super_block *sb,
>  	 * We want to check superblock checksum, the type is stored inside.
>  	 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
>  	 */
> -	if (btrfs_check_super_csum(fs_info, bh->b_data)) {
> +	if (btrfs_check_super_csum(fs_info, superblock)) {
>  		btrfs_err(fs_info, "superblock checksum mismatch");
>  		err = -EINVAL;
> -		brelse(bh);
> +		btrfs_release_disk_super(super_page);
>  		goto fail_csum;
>  	}
>  
> @@ -2873,8 +2877,8 @@ int __cold open_ctree(struct super_block *sb,
>  	 * following bytes up to INFO_SIZE, the checksum is calculated from
>  	 * the whole block of INFO_SIZE
>  	 */
> -	memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
> -	brelse(bh);
> +	memcpy(fs_info->super_copy, superblock, sizeof(*fs_info->super_copy));
> +	btrfs_release_disk_super(super_page);
>  
>  	disk_super = fs_info->super_copy;
>  
> @@ -3374,40 +3378,60 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> -			struct buffer_head **bh_ret)
> +			struct page **super_page)
>  {
> -	struct buffer_head *bh;
>  	struct btrfs_super_block *super;
> +	struct bio_vec bio_vec;
> +	struct bio bio;
> +	struct page *page;
>  	u64 bytenr;
> +	struct address_space *mapping = bdev->bd_inode->i_mapping;
> +	gfp_t gfp_mask;
> +	int ret;
>  
>  	bytenr = btrfs_sb_offset(copy_num);
>  	if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
>  		return -EINVAL;
>  
> -	bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
> +	gfp_mask = mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL;
> +	page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT, gfp_mask);
> +	if (!page)
> +		return -ENOMEM;
> +
> +	bio_init(&bio, &bio_vec, 1);
> +	bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +	bio_set_dev(&bio, bdev);
> +	bio_set_op_attrs(&bio, REQ_OP_READ, 0);

The comment in blk_types.h says:

390 /* obsolete, don't use in new code */
391 static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
392                 unsigned op_flags)

please open code it. Other uses of bio_set_op_attrs have been cleaned up
already.

> +	bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
> +		     offset_in_page(bytenr));

For a fresh bio this never fails right?

> +
> +	ret = submit_bio_wait(&bio);
> +	unlock_page(page);
>  	/*
>  	 * If we fail to read from the underlying devices, as of now
>  	 * the best option we have is to mark it EIO.
>  	 */
> -	if (!bh)
> +	if (ret) {
> +		put_page(page);
>  		return -EIO;
> +	}
>  
> -	super = (struct btrfs_super_block *)bh->b_data;
> +	super = kmap(page);
>  	if (btrfs_super_bytenr(super) != bytenr ||
>  		    btrfs_super_magic(super) != BTRFS_MAGIC) {
> -		brelse(bh);
> +		btrfs_release_disk_super(page);
>  		return -EINVAL;
>  	}
> +	kunmap(page);
>  
> -	*bh_ret = bh;
> +	*super_page = page;
>  	return 0;
>  }
>  
>  
> -struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
> +int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>  {
> -	struct buffer_head *bh;
> -	struct buffer_head *latest = NULL;
> +	struct page *latest = NULL;
>  	struct btrfs_super_block *super;
>  	int i;
>  	u64 transid = 0;
> @@ -3419,25 +3443,28 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
>  	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
>  	 */
>  	for (i = 0; i < 1; i++) {
> -		ret = btrfs_read_dev_one_super(bdev, i, &bh);
> +		ret = btrfs_read_dev_one_super(bdev, i, page);
>  		if (ret)
>  			continue;
>  
> -		super = (struct btrfs_super_block *)bh->b_data;
> +		super = kmap(*page);
>  
>  		if (!latest || btrfs_super_generation(super) > transid) {
> -			brelse(latest);
> -			latest = bh;
> +			if (latest)
> +				btrfs_release_disk_super(latest);
> +			latest = *page;
>  			transid = btrfs_super_generation(super);
>  		} else {
> -			brelse(bh);
> +			btrfs_release_disk_super(*page);
>  		}
> +
> +		kunmap(*page);

This looks like double unmap, once in btrfs_release_disk_super and then
unconditinally as kunmap.

The first part of the if/else block also calls kunmap, but depending on
latest.

So another point for opencoding btrfs_release_disk_super, this kind of
mistakes is too easy.

>  	}
>  
> -	if (!latest)
> -		return ERR_PTR(ret);
> +	if (ret)
> +		return ret;
>  
> -	return latest;
> +	return 0;
>  }
>  
>  /*

  parent reply	other threads:[~2020-01-24 14:03 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-23  8:18 [PATCH v2 0/6] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
2020-01-23  8:18 ` [PATCH v2 1/6] btrfs: export btrfs_release_disk_super Johannes Thumshirn
2020-01-23 13:42   ` Josef Bacik
2020-01-23  8:18 ` [PATCH v2 2/6] btrfs: remove buffer heads from super block reading Johannes Thumshirn
2020-01-23 13:51   ` Josef Bacik
2020-01-23 13:53     ` Johannes Thumshirn
2020-01-23 14:13   ` Nikolay Borisov
2020-01-23 16:45     ` Johannes Thumshirn
2020-01-23 22:25   ` David Sterba
2020-01-24 13:54   ` David Sterba
2020-01-24 14:03   ` David Sterba [this message]
2020-01-23  8:18 ` [PATCH v2 3/6] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
2020-01-23 13:59   ` Josef Bacik
2020-01-23 14:19   ` Nikolay Borisov
2020-01-24 14:15   ` David Sterba
2020-01-23  8:18 ` [PATCH v2 4/6] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
2020-01-23 14:00   ` Josef Bacik
2020-01-23  8:18 ` [PATCH v2 5/6] btrfs: remove buffer_heads from btrfsic_process_written_block() Johannes Thumshirn
2020-01-23 14:00   ` Josef Bacik
2020-01-23  8:18 ` [PATCH v2 6/6] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
2020-01-23 14:03   ` Josef Bacik
2020-01-23 14:23   ` Nikolay Borisov
2020-01-24 14:22   ` David Sterba
2020-01-24 14:59     ` Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200124140312.GJ3929@twin.jikos.cz \
    --to=dsterba@suse.cz \
    --cc=dsterba@suse.com \
    --cc=johannes.thumshirn@wdc.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=nborisov@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.