From: Jan Kara <jack@suse.cz>
To: Yu Kuai <yukuai1@huaweicloud.com>
Cc: axboe@kernel.dk, roger.pau@citrix.com, colyli@suse.de,
kent.overstreet@gmail.com, joern@lazybastard.org,
miquel.raynal@bootlin.com, richard@nod.at, vigneshr@ti.com,
sth@linux.ibm.com, hoeppner@linux.ibm.com, hca@linux.ibm.com,
gor@linux.ibm.com, agordeev@linux.ibm.com, jejb@linux.ibm.com,
martin.petersen@oracle.com, clm@fb.com, josef@toxicpanda.com,
dsterba@suse.com, viro@zeniv.linux.org.uk, brauner@kernel.org,
nico@fluxnic.net, xiang@kernel.org, chao@kernel.org,
tytso@mit.edu, adilger.kernel@dilger.ca, jack@suse.com,
konishi.ryusuke@gmail.com, willy@infradead.org,
akpm@linux-foundation.org, hare@suse.de, p.raghav@samsung.com,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
xen-devel@lists.xenproject.org, linux-bcache@vger.kernel.org,
linux-mtd@lists.infradead.org, linux-s390@vger.kernel.org,
linux-scsi@vger.kernel.org, linux-bcachefs@vger.kernel.org,
linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-erofs@lists.ozlabs.org, linux-ext4@vger.kernel.org,
linux-nilfs@vger.kernel.org, yukuai3@huawei.com,
yi.zhang@huawei.com, yangerkun@huawei.com
Subject: Re: [PATCH RFC v3 for-6.8/block 14/17] buffer: add a new helper to read sb block
Date: Thu, 4 Jan 2024 13:22:14 +0100 [thread overview]
Message-ID: <20240104122214.jndsqygnmljxmj5d@quack3> (raw)
In-Reply-To: <20231221085853.1770062-1-yukuai1@huaweicloud.com>
On Thu 21-12-23 16:58:53, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Unlike __bread_gfp(), ext4 has special handing while reading sb block:
>
> 1) __GFP_NOFAIL is not set, and memory allocation can fail;
> 2) If buffer write failed before, set buffer uptodate and don't read
> block from disk;
> 3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
> 4) If failed, return error ptr instead of NULL;
>
> This patch add a new helper __bread_gfp2() that will match above 2 and 3(
> 1 will be used, and 4 will still be encapsulated by ext4), and prepare to
> prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
> in ext4.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
I'm not enthusiastic about this but I guess it is as good as it gets
without larger cleanups in this area. So feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> fs/buffer.c | 68 ++++++++++++++++++++++++++-----------
> include/linux/buffer_head.h | 18 +++++++++-
> 2 files changed, 65 insertions(+), 21 deletions(-)
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 967f34b70aa8..188bd36c9fea 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1255,16 +1255,19 @@ void __bforget(struct buffer_head *bh)
> }
> EXPORT_SYMBOL(__bforget);
>
> -static struct buffer_head *__bread_slow(struct buffer_head *bh)
> +static struct buffer_head *__bread_slow(struct buffer_head *bh,
> + blk_opf_t op_flags,
> + bool check_write_error)
> {
> lock_buffer(bh);
> - if (buffer_uptodate(bh)) {
> + if (buffer_uptodate(bh) ||
> + (check_write_error && buffer_uptodate_or_error(bh))) {
> unlock_buffer(bh);
> return bh;
> } else {
> get_bh(bh);
> bh->b_end_io = end_buffer_read_sync;
> - submit_bh(REQ_OP_READ, bh);
> + submit_bh(REQ_OP_READ | op_flags, bh);
> wait_on_buffer(bh);
> if (buffer_uptodate(bh))
> return bh;
> @@ -1445,6 +1448,31 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
> }
> EXPORT_SYMBOL(__breadahead);
>
> +static struct buffer_head *
> +bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
> + blk_opf_t op_flags, gfp_t gfp, bool check_write_error)
> +{
> + struct buffer_head *bh;
> +
> + gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
> +
> + /*
> + * Prefer looping in the allocator rather than here, at least that
> + * code knows what it's doing.
> + */
> + gfp |= __GFP_NOFAIL;
> +
> + bh = bdev_getblk(bdev, block, size, gfp);
> + if (unlikely(!bh))
> + return NULL;
> +
> + if (buffer_uptodate(bh) ||
> + (check_write_error && buffer_uptodate_or_error(bh)))
> + return bh;
> +
> + return __bread_slow(bh, op_flags, check_write_error);
> +}
> +
> /**
> * __bread_gfp() - reads a specified block and returns the bh
> * @bdev: the block_device to read from
> @@ -1458,27 +1486,27 @@ EXPORT_SYMBOL(__breadahead);
> * It returns NULL if the block was unreadable.
> */
> struct buffer_head *
> -__bread_gfp(struct block_device *bdev, sector_t block,
> - unsigned size, gfp_t gfp)
> +__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
> + gfp_t gfp)
> {
> - struct buffer_head *bh;
> -
> - gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
> -
> - /*
> - * Prefer looping in the allocator rather than here, at least that
> - * code knows what it's doing.
> - */
> - gfp |= __GFP_NOFAIL;
> -
> - bh = bdev_getblk(bdev, block, size, gfp);
> -
> - if (likely(bh) && !buffer_uptodate(bh))
> - bh = __bread_slow(bh);
> - return bh;
> + return bread_gfp(bdev, block, size, 0, gfp, false);
> }
> EXPORT_SYMBOL(__bread_gfp);
>
> +/*
> + * This works like __bread_gfp() except:
> + * 1) If buffer write failed before, set buffer uptodate and don't read
> + * block from disk;
> + * 2) Caller can pass in additional op_flags like REQ_META;
> + */
> +struct buffer_head *
> +__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
> + blk_opf_t op_flags, gfp_t gfp)
> +{
> + return bread_gfp(bdev, block, size, op_flags, gfp, true);
> +}
> +EXPORT_SYMBOL(__bread_gfp2);
> +
> static void __invalidate_bh_lrus(struct bh_lru *b)
> {
> int i;
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index 5f23ee599889..751b2744b4ae 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -171,6 +171,18 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh)
> return test_bit_acquire(BH_Uptodate, &bh->b_state);
> }
>
> +static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
> +{
> + /*
> + * If the buffer has the write error flag, data was failed to write
> + * out in the block. In this case, set buffer uptodate to prevent
> + * reading old data.
> + */
> + if (buffer_write_io_error(bh))
> + set_buffer_uptodate(bh);
> + return buffer_uptodate(bh);
> +}
> +
> static inline unsigned long bh_offset(const struct buffer_head *bh)
> {
> return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1);
> @@ -231,7 +243,11 @@ void __brelse(struct buffer_head *);
> void __bforget(struct buffer_head *);
> void __breadahead(struct block_device *, sector_t block, unsigned int size);
> struct buffer_head *__bread_gfp(struct block_device *,
> - sector_t block, unsigned size, gfp_t gfp);
> + sector_t block, unsigned int size, gfp_t gfp);
> +struct buffer_head *__bread_gfp2(struct block_device *bdev, sector_t block,
> + unsigned int size, blk_opf_t op_flags,
> + gfp_t gfp);
> +
> struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
> void free_buffer_head(struct buffer_head * bh);
> void unlock_buffer(struct buffer_head *bh);
> --
> 2.39.2
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
next prev parent reply other threads:[~2024-01-04 12:22 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-21 8:56 [PATCH RFC v3 for-6.8/block 00/17] block: don't access bd_inode directly from other modules Yu Kuai
2023-12-21 8:56 ` [PATCH RFC v3 for-6.8/block 01/17] block: add some bdev apis Yu Kuai
2023-12-21 8:56 ` [PATCH RFC v3 for-6.8/block 02/17] xen/blkback: use bdev api in xen_update_blkif_status() Yu Kuai
2024-01-04 11:06 ` Jan Kara
2024-01-04 12:19 ` Yu Kuai
2024-01-04 15:16 ` Jan Kara
2024-01-05 6:08 ` Christoph Hellwig
2023-12-21 8:56 ` [PATCH RFC v3 for-6.8/block 03/17] bcache: use bdev api in read_super() Yu Kuai
2023-12-21 8:56 ` [PATCH RFC v3 for-6.8/block 04/17] mtd: block2mtd: use bdev apis Yu Kuai
2024-01-04 11:28 ` Jan Kara
2024-01-04 12:22 ` Yu Kuai
2024-01-05 6:10 ` Christoph Hellwig
2024-01-05 10:31 ` Yu Kuai
2023-12-21 8:57 ` [PATCH RFC v3 for-6.8/block 05/17] s390/dasd: use bdev api in dasd_format() Yu Kuai
2023-12-21 8:57 ` [PATCH RFC v3 for-6.8/block 06/17] scsicam: use bdev api in scsi_bios_ptable() Yu Kuai
2023-12-21 8:57 ` [PATCH RFC v3 for-6.8/block 07/17] bcachefs: remove dead function bdev_sectors() Yu Kuai
2024-04-11 17:49 ` Kent Overstreet
2023-12-21 8:57 ` [PATCH RFC v3 for-6.8/block 08/17] bio: export bio_add_folio_nofail() Yu Kuai
2023-12-21 8:57 ` [PATCH RFC v3 for-6.8/block 09/17] btrfs: use bdev apis Yu Kuai
2023-12-23 17:31 ` Matthew Wilcox
2023-12-23 18:39 ` Kent Overstreet
2024-01-04 11:49 ` Jan Kara
2024-04-10 17:28 ` David Sterba
2023-12-21 8:57 ` [PATCH RFC v3 for-6.8/block 10/17] cramfs: use bdev apis in cramfs_blkdev_read() Yu Kuai
2023-12-21 8:58 ` [PATCH RFC v3 for-6.8/block 11/17] erofs: use bdev api Yu Kuai
2024-01-04 12:02 ` Jan Kara
2024-01-04 12:32 ` Yu Kuai
2024-01-05 4:43 ` Gao Xiang
2023-12-21 8:58 ` [PATCH RFC v3 for-6.8/block 12/17] nilfs2: use bdev api in nilfs_attach_log_writer() Yu Kuai
2023-12-21 14:54 ` Ryusuke Konishi
2023-12-21 8:58 ` [PATCH RFC v3 for-6.8/block 13/17] jbd2: use bdev apis Yu Kuai
2024-01-04 12:11 ` Jan Kara
2023-12-21 8:58 ` [PATCH RFC v3 for-6.8/block 14/17] buffer: add a new helper to read sb block Yu Kuai
2024-01-04 12:22 ` Jan Kara [this message]
2023-12-21 8:58 ` [PATCH RFC v3 for-6.8/block 15/17] ext4: use " Yu Kuai
2023-12-21 8:59 ` [PATCH RFC v3 for-6.8/block 16/17] ext4: remove block_device_ejected() Yu Kuai
2023-12-21 8:59 ` [PATCH RFC v3 for-6.8/block 17/17] ext4: use bdev apis Yu Kuai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240104122214.jndsqygnmljxmj5d@quack3 \
--to=jack@suse.cz \
--cc=adilger.kernel@dilger.ca \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=chao@kernel.org \
--cc=clm@fb.com \
--cc=colyli@suse.de \
--cc=dsterba@suse.com \
--cc=gor@linux.ibm.com \
--cc=hare@suse.de \
--cc=hca@linux.ibm.com \
--cc=hoeppner@linux.ibm.com \
--cc=jack@suse.com \
--cc=jejb@linux.ibm.com \
--cc=joern@lazybastard.org \
--cc=josef@toxicpanda.com \
--cc=kent.overstreet@gmail.com \
--cc=konishi.ryusuke@gmail.com \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-bcachefs@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-erofs@lists.ozlabs.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mtd@lists.infradead.org \
--cc=linux-nilfs@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=miquel.raynal@bootlin.com \
--cc=nico@fluxnic.net \
--cc=p.raghav@samsung.com \
--cc=richard@nod.at \
--cc=roger.pau@citrix.com \
--cc=sth@linux.ibm.com \
--cc=tytso@mit.edu \
--cc=vigneshr@ti.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
--cc=xen-devel@lists.xenproject.org \
--cc=xiang@kernel.org \
--cc=yangerkun@huawei.com \
--cc=yi.zhang@huawei.com \
--cc=yukuai1@huaweicloud.com \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).