From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v2 06/17] btrfs: replace btrfs_dio_private::refs with btrfs_dio_private::pending_bytes
Date: Mon, 6 Dec 2021 10:29:26 +0800 [thread overview]
Message-ID: <20211206022937.26465-7-wqu@suse.com> (raw)
In-Reply-To: <20211206022937.26465-1-wqu@suse.com>
This mostly follows the behavior of compressed_bio::pending_sectors.
The point here is, dip::refs is not split bio friendly, as if a bio with
its bi_private = dip, and the bio get split, we can easily underflow
dip::refs.
By using the same sector based solution as compressed_bio, dio can
handle both unsplit and split bios.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/btrfs_inode.h | 10 +++----
fs/btrfs/inode.c | 67 +++++++++++++++++++++---------------------
2 files changed, 38 insertions(+), 39 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b3e46aabc3d8..196f74ee102e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -358,11 +358,11 @@ struct btrfs_dio_private {
/* Used for bio::bi_size */
u32 bytes;
- /*
- * References to this structure. There is one reference per in-flight
- * bio plus one while we're still setting up.
- */
- refcount_t refs;
+ /* Hit any error for the whole DIO bio */
+ bool errors;
+
+ /* How many bytes are still under IO or not submitted */
+ atomic_t pending_bytes;
/* dio_bio came from fs/direct-io.c */
struct bio *dio_bio;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 007a20a9b076..1aa060de917c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8053,20 +8053,28 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
return ret;
}
-static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
+static bool dec_and_test_dio_private(struct btrfs_dio_private *dip, bool error,
+ u32 bytes)
{
- /*
- * This implies a barrier so that stores to dio_bio->bi_status before
- * this and loads of dio_bio->bi_status after this are fully ordered.
- */
- if (!refcount_dec_and_test(&dip->refs))
+ ASSERT(bytes <= dip->bytes);
+ ASSERT(bytes <= atomic_read(&dip->pending_bytes));
+
+ if (error)
+ dip->errors = true;
+ return atomic_sub_and_test(bytes, &dip->pending_bytes);
+}
+
+static void dio_private_finish(struct btrfs_dio_private *dip, bool error,
+ u32 bytes)
+{
+ if (!dec_and_test_dio_private(dip, error, bytes))
return;
if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) {
__endio_write_update_ordered(BTRFS_I(dip->inode),
dip->file_offset,
dip->bytes,
- !dip->dio_bio->bi_status);
+ !dip->errors);
} else {
unlock_extent(&BTRFS_I(dip->inode)->io_tree,
dip->file_offset,
@@ -8087,10 +8095,10 @@ static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
BUG_ON(bio_op(bio) == REQ_OP_WRITE);
- refcount_inc(&dip->refs);
+ atomic_add(bio->bi_iter.bi_size, &dip->pending_bytes);
ret = btrfs_map_bio(fs_info, bio, mirror_num);
if (ret)
- refcount_dec(&dip->refs);
+ atomic_sub(bio->bi_iter.bi_size, &dip->pending_bytes);
return ret;
}
@@ -8166,20 +8174,20 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
+ struct bvec_iter iter;
+ struct bio_vec bvec;
+ u32 bi_size = 0;
blk_status_t err = bio->bi_status;
- if (err) {
- struct bvec_iter_all iter_all;
- struct bio_vec *bvec;
- u32 bi_size = 0;
-
- bio_for_each_segment_all(bvec, bio, iter_all)
- bi_size += bvec->bv_len;
+ __bio_for_each_segment(bvec, bio, iter, btrfs_bio(bio)->iter)
+ bi_size += bvec.bv_len;
+ if (err) {
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
"direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
bio->bi_opf, bio->bi_iter.bi_sector, bi_size, err);
+ dip->errors = true;
}
if (bio_op(bio) == REQ_OP_READ)
@@ -8191,7 +8199,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
bio_put(bio);
- btrfs_dio_private_put(dip);
+ dio_private_finish(dip, err, bi_size);
}
static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
@@ -8250,7 +8258,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
*/
static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
struct inode *inode,
- loff_t file_offset)
+ loff_t file_offset,
+ u32 length)
{
const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
@@ -8270,12 +8279,12 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
if (!dip)
return NULL;
+ atomic_set(&dip->pending_bytes, length);
dip->inode = inode;
dip->file_offset = file_offset;
dip->bytes = dio_bio->bi_iter.bi_size;
dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
dip->dio_bio = dio_bio;
- refcount_set(&dip->refs, 1);
return dip;
}
@@ -8289,6 +8298,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
BTRFS_BLOCK_GROUP_RAID56_MASK);
struct btrfs_dio_private *dip;
struct bio *bio;
+ const u32 length = dio_bio->bi_iter.bi_size;
+ u32 submitted_bytes = 0;
u64 start_sector;
int async_submit = 0;
u64 submit_len;
@@ -8301,7 +8312,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
struct btrfs_dio_data *dio_data = iter->iomap.private;
struct extent_map *em = NULL;
- dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
+ dip = btrfs_create_dio_private(dio_bio, inode, file_offset, length);
if (!dip) {
if (!write) {
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
@@ -8311,7 +8322,6 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
bio_endio(dio_bio);
return;
}
-
if (!write) {
/*
* Load the csums up front to reduce csum tree searches and
@@ -8365,17 +8375,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
ASSERT(submit_len >= clone_len);
submit_len -= clone_len;
- /*
- * Increase the count before we submit the bio so we know
- * the end IO handler won't happen before we increase the
- * count. Otherwise, the dip might get freed before we're
- * done setting it up.
- *
- * We transfer the initial reference to the last bio, so we
- * don't need to increment the reference count for the last one.
- */
if (submit_len > 0) {
- refcount_inc(&dip->refs);
/*
* If we are submitting more than one bio, submit them
* all asynchronously. The exception is RAID 5 or 6, as
@@ -8390,11 +8390,10 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
async_submit);
if (status) {
bio_put(bio);
- if (submit_len > 0)
- refcount_dec(&dip->refs);
goto out_err_em;
}
+ submitted_bytes += clone_len;
dio_data->submitted += clone_len;
clone_offset += clone_len;
start_sector += clone_len >> 9;
@@ -8408,7 +8407,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
free_extent_map(em);
out_err:
dip->dio_bio->bi_status = status;
- btrfs_dio_private_put(dip);
+ dio_private_finish(dip, status, length - submitted_bytes);
}
const struct iomap_ops btrfs_dio_iomap_ops = {
--
2.34.1
next prev parent reply other threads:[~2021-12-06 2:30 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-06 2:29 [PATCH v2 00/17] btrfs: split bio at btrfs_map_bio() time Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 01/17] btrfs: update an stale comment on btrfs_submit_bio_hook() Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 02/17] btrfs: save bio::bi_iter into btrfs_bio::iter before any endio Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 03/17] btrfs: use correct bio size for error message in btrfs_end_dio_bio() Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 04/17] btrfs: refactor btrfs_map_bio() Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 05/17] btrfs: move btrfs_bio_wq_end_io() calls into submit_stripe_bio() Qu Wenruo
2021-12-06 2:29 ` Qu Wenruo [this message]
2021-12-09 10:02 ` [PATCH v2 06/17] btrfs: replace btrfs_dio_private::refs with btrfs_dio_private::pending_bytes Johannes Thumshirn
2021-12-09 10:35 ` Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 07/17] btrfs: introduce btrfs_bio_split() helper Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 08/17] btrfs: make data buffered read path to handle split bio properly Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 09/17] btrfs: make data buffered write endio function to be split bio compatible Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 10/17] btrfs: make metadata write endio functions " Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 11/17] btrfs: make dec_and_test_compressed_bio() " Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 12/17] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block() Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 13/17] btrfs: allow btrfs_map_bio() to split bio according to chunk stripe boundaries Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 14/17] btrfs: remove buffered IO stripe boundary calculation Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 15/17] btrfs: remove stripe boundary calculation for compressed IO Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 16/17] btrfs: remove the stripe boundary calculation for direct IO Qu Wenruo
2021-12-06 2:29 ` [PATCH v2 17/17] btrfs: unexport btrfs_get_io_geometry() Qu Wenruo
2021-12-09 10:06 ` [PATCH v2 00/17] btrfs: split bio at btrfs_map_bio() time Johannes Thumshirn
2021-12-09 10:52 ` Johannes Thumshirn
2021-12-09 11:08 ` Qu Wenruo
2021-12-09 11:13 ` Johannes Thumshirn
2022-01-12 0:33 ` Qu Wenruo
2022-01-12 9:00 ` Johannes Thumshirn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211206022937.26465-7-wqu@suse.com \
--to=wqu@suse.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).