From: David Sterba <dsterba@suse.cz>
To: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: David Sterba <dsterba@suse.com>,
Nikolay Borisov <nborisov@suse.com>,
linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
Date: Fri, 17 Jan 2020 14:38:09 +0100 [thread overview]
Message-ID: <20200117133809.GB3929@twin.jikos.cz> (raw)
In-Reply-To: <20200117125105.20989-3-johannes.thumshirn@wdc.com>
On Fri, Jan 17, 2020 at 09:51:02PM +0900, Johannes Thumshirn wrote:
> Similar to the superblock read path, change the write path to using BIOs
> and pages instead of buffer_heads.
>
> This is based on a patch originally authored by Nikolay Borisov.
>
> Co-developed-by: Nikolay Borisov <nborisov@suse.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
> 1 file changed, 61 insertions(+), 46 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 50c93ffe8d03..51e7b832c8fd 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
> }
> ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
>
> -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> +static void btrfs_end_super_write(struct bio *bio)
> {
> - if (uptodate) {
> - set_buffer_uptodate(bh);
> - } else {
> - struct btrfs_device *device = (struct btrfs_device *)
> - bh->b_private;
> -
> - btrfs_warn_rl_in_rcu(device->fs_info,
> - "lost page write due to IO error on %s",
> - rcu_str_deref(device->name));
> - /* note, we don't set_buffer_write_io_error because we have
> - * our own ways of dealing with the IO errors
> - */
> - clear_buffer_uptodate(bh);
> - btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
> + struct btrfs_device *device = bio->bi_private;
> + struct bio_vec *bvec;
> + struct bvec_iter_all iter_all;
> + struct page *page;
> +
> + bio_for_each_segment_all(bvec, bio, iter_all) {
> + page = bvec->bv_page;
> +
> + if (blk_status_to_errno(bio->bi_status)) {
> + btrfs_warn_rl_in_rcu(device->fs_info,
> + "lost page write due to IO error on %s",
> + rcu_str_deref(device->name));
> + ClearPageUptodate(page);
> + SetPageError(page);
> + btrfs_dev_stat_inc_and_print(device,
> + BTRFS_DEV_STAT_WRITE_ERRS);
> + } else {
> + SetPageUptodate(page);
> + }
> +
> + put_page(page);
> + unlock_page(page);
> }
> - unlock_buffer(bh);
> - put_bh(bh);
> +
> + bio_put(bio);
> }
>
> int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> @@ -3462,16 +3470,15 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
> * the expected device size at commit time. Note that max_mirrors must be
> * same for write and wait phases.
> *
> - * Return number of errors when buffer head is not found or submission fails.
> + * Return number of errors when page is not found or submission fails.
> */
> static int write_dev_supers(struct btrfs_device *device,
> struct btrfs_super_block *sb, int max_mirrors)
> {
> struct btrfs_fs_info *fs_info = device->fs_info;
> SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
> - struct buffer_head *bh;
> + gfp_t gfp_mask;
> int i;
> - int ret;
> int errors = 0;
> u64 bytenr;
> int op_flags;
> @@ -3481,7 +3488,13 @@ static int write_dev_supers(struct btrfs_device *device,
>
> shash->tfm = fs_info->csum_shash;
>
> + gfp_mask = mapping_gfp_constraint(device->bdev->bd_inode->i_mapping,
> + ~__GFP_FS) | __GFP_NOFAIL;
> +
> for (i = 0; i < max_mirrors; i++) {
> + struct page *page;
> + struct bio *bio;
> +
> bytenr = btrfs_sb_offset(i);
> if (bytenr + BTRFS_SUPER_INFO_SIZE >=
> device->commit_total_bytes)
> @@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
> BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
> crypto_shash_final(shash, sb->csum);
>
> - /* One reference for us, and we leave it for the caller */
> - bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
> - BTRFS_SUPER_INFO_SIZE);
> - if (!bh) {
> + page = find_or_create_page(device->bdev->bd_inode->i_mapping,
> + bytenr >> PAGE_SHIFT, gfp_mask);
This has NOFAIL again, but now we're in write_dev_supers, so this has
some implications regarding the potential unbounded waiting
> + if (!page) {
> btrfs_err(device->fs_info,
> - "couldn't get super buffer head for bytenr %llu",
> + "couldn't get superblock page for bytenr %llu",
> bytenr);
> errors++;
> continue;
> }
>
> - memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
> + /* Bump the refcount for wait_dev_supers() */
> + get_page(page);
>
> - /* one reference for submit_bh */
> - get_bh(bh);
> -
> - set_buffer_uptodate(bh);
> - lock_buffer(bh);
> - bh->b_end_io = btrfs_end_buffer_write_sync;
> - bh->b_private = device;
> + memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
>
> /*
> * we fua the first super. The others we allow
> @@ -3522,9 +3529,17 @@ static int write_dev_supers(struct btrfs_device *device,
> op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
> if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
> op_flags |= REQ_FUA;
> - ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
> - if (ret)
> - errors++;
> +
> + bio = bio_alloc(gfp_mask, 1);
And allocating a new bio when we have to write the superblock is also
not very nice. This should do something like the device::flush_bio that
could be reused.
> + bio_set_dev(bio, device->bdev);
> + bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> + bio->bi_private = device;
> + bio->bi_end_io = btrfs_end_super_write;
> + bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
> + offset_in_page(bytenr));
> +
> + bio_set_op_attrs(bio, REQ_OP_WRITE, op_flags);
> + btrfsic_submit_bio(bio);
> }
> return errors < i ? 0 : -1;
> }
> @@ -3533,12 +3548,11 @@ static int write_dev_supers(struct btrfs_device *device,
> * Wait for write completion of superblocks done by write_dev_supers,
> * @max_mirrors same for write and wait phases.
> *
> - * Return number of errors when buffer head is not found or not marked up to
> + * Return number of errors when page is not found or not marked up to
> * date.
> */
> static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
> {
> - struct buffer_head *bh;
> int i;
> int errors = 0;
> bool primary_failed = false;
> @@ -3548,32 +3562,33 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
> max_mirrors = BTRFS_SUPER_MIRROR_MAX;
>
> for (i = 0; i < max_mirrors; i++) {
> + struct page *page;
> +
> bytenr = btrfs_sb_offset(i);
> if (bytenr + BTRFS_SUPER_INFO_SIZE >=
> device->commit_total_bytes)
> break;
>
> - bh = __find_get_block(device->bdev,
> - bytenr / BTRFS_BDEV_BLOCKSIZE,
> - BTRFS_SUPER_INFO_SIZE);
> - if (!bh) {
> + page = find_get_page(device->bdev->bd_inode->i_mapping,
> + bytenr >> PAGE_SHIFT);
> + if (!page) {
> errors++;
> if (i == 0)
> primary_failed = true;
> continue;
> }
> - wait_on_buffer(bh);
> - if (!buffer_uptodate(bh)) {
> + wait_on_page_locked(page);
What locks the page?
> + if (PageError(page)) {
> errors++;
> if (i == 0)
> primary_failed = true;
> }
>
> /* drop our reference */
> - brelse(bh);
> + put_page(page);
>
> /* drop the reference from the writing run */
> - brelse(bh);
> + put_page(page);
> }
>
> /* log error, force error return */
> --
> 2.24.1
next prev parent reply other threads:[~2020-01-17 13:38 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
2020-01-17 13:31 ` David Sterba
2020-01-17 14:30 ` Nikolay Borisov
2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
2020-01-17 13:38 ` David Sterba [this message]
2020-01-17 14:51 ` David Sterba
2020-01-17 15:01 ` Nikolay Borisov
2020-01-17 15:11 ` David Sterba
2020-01-22 15:48 ` Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 3/5] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
2020-01-17 15:05 ` Nikolay Borisov
2020-01-17 12:51 ` [PATCH 4/5] btrfs: remove buffer_heads from btrfsic_process_written_block() Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
2020-01-17 15:10 ` Nikolay Borisov
2020-01-17 15:13 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200117133809.GB3929@twin.jikos.cz \
--to=dsterba@suse.cz \
--cc=dsterba@suse.com \
--cc=johannes.thumshirn@wdc.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=nborisov@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).