linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v2 06/17] btrfs: replace btrfs_dio_private::refs with btrfs_dio_private::pending_bytes
Date: Mon,  6 Dec 2021 10:29:26 +0800	[thread overview]
Message-ID: <20211206022937.26465-7-wqu@suse.com> (raw)
In-Reply-To: <20211206022937.26465-1-wqu@suse.com>

This mostly follows the behavior of compressed_bio::pending_sectors.

The point here is, dip::refs is not split bio friendly, as if a bio with
its bi_private = dip, and the bio get split, we can easily underflow
dip::refs.

By using the same sector based solution as compressed_bio, dio can
handle both unsplit and split bios.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/btrfs_inode.h | 10 +++----
 fs/btrfs/inode.c       | 67 +++++++++++++++++++++---------------------
 2 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b3e46aabc3d8..196f74ee102e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -358,11 +358,11 @@ struct btrfs_dio_private {
 	/* Used for bio::bi_size */
 	u32 bytes;
 
-	/*
-	 * References to this structure. There is one reference per in-flight
-	 * bio plus one while we're still setting up.
-	 */
-	refcount_t refs;
+	/* Hit any error for the whole DIO bio */
+	bool errors;
+
+	/* How many bytes are still under IO or not submitted */
+	atomic_t pending_bytes;
 
 	/* dio_bio came from fs/direct-io.c */
 	struct bio *dio_bio;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 007a20a9b076..1aa060de917c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8053,20 +8053,28 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 	return ret;
 }
 
-static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
+static bool dec_and_test_dio_private(struct btrfs_dio_private *dip, bool error,
+	    			     u32 bytes)
 {
-	/*
-	 * This implies a barrier so that stores to dio_bio->bi_status before
-	 * this and loads of dio_bio->bi_status after this are fully ordered.
-	 */
-	if (!refcount_dec_and_test(&dip->refs))
+	ASSERT(bytes <= dip->bytes);
+	ASSERT(bytes <= atomic_read(&dip->pending_bytes));
+
+	if (error)
+		dip->errors = true;
+	return atomic_sub_and_test(bytes, &dip->pending_bytes);
+}
+
+static void dio_private_finish(struct btrfs_dio_private *dip, bool error,
+			       u32 bytes)
+{
+	if (!dec_and_test_dio_private(dip, error, bytes))
 		return;
 
 	if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) {
 		__endio_write_update_ordered(BTRFS_I(dip->inode),
 					     dip->file_offset,
 					     dip->bytes,
-					     !dip->dio_bio->bi_status);
+					     !dip->errors);
 	} else {
 		unlock_extent(&BTRFS_I(dip->inode)->io_tree,
 			      dip->file_offset,
@@ -8087,10 +8095,10 @@ static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 
 	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
-	refcount_inc(&dip->refs);
+	atomic_add(bio->bi_iter.bi_size, &dip->pending_bytes);
 	ret = btrfs_map_bio(fs_info, bio, mirror_num);
 	if (ret)
-		refcount_dec(&dip->refs);
+		atomic_sub(bio->bi_iter.bi_size, &dip->pending_bytes);
 	return ret;
 }
 
@@ -8166,20 +8174,20 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
+	struct bvec_iter iter;
+	struct bio_vec bvec;
+	u32 bi_size = 0;
 	blk_status_t err = bio->bi_status;
 
-	if (err) {
-		struct bvec_iter_all iter_all;
-		struct bio_vec *bvec;
-		u32 bi_size = 0;
-
-		bio_for_each_segment_all(bvec, bio, iter_all)
-			bi_size += bvec->bv_len;
+	__bio_for_each_segment(bvec, bio, iter, btrfs_bio(bio)->iter)
+		bi_size += bvec.bv_len;
 
+	if (err) {
 		btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
 			   "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
 			   btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
 			   bio->bi_opf, bio->bi_iter.bi_sector, bi_size, err);
+		dip->errors = true;
 	}
 
 	if (bio_op(bio) == REQ_OP_READ)
@@ -8191,7 +8199,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 	btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
 
 	bio_put(bio);
-	btrfs_dio_private_put(dip);
+	dio_private_finish(dip, err, bi_size);
 }
 
 static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
@@ -8250,7 +8258,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
  */
 static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
 							  struct inode *inode,
-							  loff_t file_offset)
+							  loff_t file_offset,
+							  u32 length)
 {
 	const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
 	const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
@@ -8270,12 +8279,12 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
 	if (!dip)
 		return NULL;
 
+	atomic_set(&dip->pending_bytes, length);
 	dip->inode = inode;
 	dip->file_offset = file_offset;
 	dip->bytes = dio_bio->bi_iter.bi_size;
 	dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
 	dip->dio_bio = dio_bio;
-	refcount_set(&dip->refs, 1);
 	return dip;
 }
 
@@ -8289,6 +8298,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 			     BTRFS_BLOCK_GROUP_RAID56_MASK);
 	struct btrfs_dio_private *dip;
 	struct bio *bio;
+	const u32 length = dio_bio->bi_iter.bi_size;
+	u32 submitted_bytes = 0;
 	u64 start_sector;
 	int async_submit = 0;
 	u64 submit_len;
@@ -8301,7 +8312,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 	struct btrfs_dio_data *dio_data = iter->iomap.private;
 	struct extent_map *em = NULL;
 
-	dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
+	dip = btrfs_create_dio_private(dio_bio, inode, file_offset, length);
 	if (!dip) {
 		if (!write) {
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
@@ -8311,7 +8322,6 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 		bio_endio(dio_bio);
 		return;
 	}
-
 	if (!write) {
 		/*
 		 * Load the csums up front to reduce csum tree searches and
@@ -8365,17 +8375,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 		ASSERT(submit_len >= clone_len);
 		submit_len -= clone_len;
 
-		/*
-		 * Increase the count before we submit the bio so we know
-		 * the end IO handler won't happen before we increase the
-		 * count. Otherwise, the dip might get freed before we're
-		 * done setting it up.
-		 *
-		 * We transfer the initial reference to the last bio, so we
-		 * don't need to increment the reference count for the last one.
-		 */
 		if (submit_len > 0) {
-			refcount_inc(&dip->refs);
 			/*
 			 * If we are submitting more than one bio, submit them
 			 * all asynchronously. The exception is RAID 5 or 6, as
@@ -8390,11 +8390,10 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 						async_submit);
 		if (status) {
 			bio_put(bio);
-			if (submit_len > 0)
-				refcount_dec(&dip->refs);
 			goto out_err_em;
 		}
 
+		submitted_bytes += clone_len;
 		dio_data->submitted += clone_len;
 		clone_offset += clone_len;
 		start_sector += clone_len >> 9;
@@ -8408,7 +8407,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 	free_extent_map(em);
 out_err:
 	dip->dio_bio->bi_status = status;
-	btrfs_dio_private_put(dip);
+	dio_private_finish(dip, status, length - submitted_bytes);
 }
 
 const struct iomap_ops btrfs_dio_iomap_ops = {
-- 
2.34.1


  parent reply	other threads:[~2021-12-06  2:30 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-06  2:29 [PATCH v2 00/17] btrfs: split bio at btrfs_map_bio() time Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 01/17] btrfs: update an stale comment on btrfs_submit_bio_hook() Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 02/17] btrfs: save bio::bi_iter into btrfs_bio::iter before any endio Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 03/17] btrfs: use correct bio size for error message in btrfs_end_dio_bio() Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 04/17] btrfs: refactor btrfs_map_bio() Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 05/17] btrfs: move btrfs_bio_wq_end_io() calls into submit_stripe_bio() Qu Wenruo
2021-12-06  2:29 ` Qu Wenruo [this message]
2021-12-09 10:02   ` [PATCH v2 06/17] btrfs: replace btrfs_dio_private::refs with btrfs_dio_private::pending_bytes Johannes Thumshirn
2021-12-09 10:35     ` Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 07/17] btrfs: introduce btrfs_bio_split() helper Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 08/17] btrfs: make data buffered read path to handle split bio properly Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 09/17] btrfs: make data buffered write endio function to be split bio compatible Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 10/17] btrfs: make metadata write endio functions " Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 11/17] btrfs: make dec_and_test_compressed_bio() " Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 12/17] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block() Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 13/17] btrfs: allow btrfs_map_bio() to split bio according to chunk stripe boundaries Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 14/17] btrfs: remove buffered IO stripe boundary calculation Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 15/17] btrfs: remove stripe boundary calculation for compressed IO Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 16/17] btrfs: remove the stripe boundary calculation for direct IO Qu Wenruo
2021-12-06  2:29 ` [PATCH v2 17/17] btrfs: unexport btrfs_get_io_geometry() Qu Wenruo
2021-12-09 10:06 ` [PATCH v2 00/17] btrfs: split bio at btrfs_map_bio() time Johannes Thumshirn
2021-12-09 10:52   ` Johannes Thumshirn
2021-12-09 11:08     ` Qu Wenruo
2021-12-09 11:13       ` Johannes Thumshirn
2022-01-12  0:33         ` Qu Wenruo
2022-01-12  9:00           ` Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211206022937.26465-7-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).