All of lore.kernel.org
 help / color / mirror / Atom feed
* cleanup btrfs bio submission v2
@ 2022-06-17 10:04 Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments Christoph Hellwig
                   ` (11 more replies)
  0 siblings, 12 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Hi all,

this series removes a bunch of pointlessly passed arguments in the
raid56 code and then cleans up the btrfs_bio submission to always
return errors through the end I/O handler.

Changes since v1:
 - split the raid submission patches up a little better
 - fix a whitespace issue
 - add a new patch to properly deal with memory allocation failures in
   btrfs_wq_submit_bio
 - add more clean to prepare for and go along with the above new change

Diffstat:
 compression.c |    8 ---
 disk-io.c     |   46 +++++++++----------
 disk-io.h     |    6 +-
 inode.c       |   86 +++++++++++++++---------------------
 raid56.c      |  127 ++++++++++++++++++++++++-----------------------------
 raid56.h      |   14 ++---
 scrub.c       |   19 ++-----
 volumes.c     |  138 ++++++++++++++++++++++++++++------------------------------
 volumes.h     |    5 --
 9 files changed, 202 insertions(+), 247 deletions(-)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-20 17:16   ` David Sterba
  2022-06-17 10:04 ` [PATCH 02/10] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block() Christoph Hellwig
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs, Johannes Thumshirn

The raid56 code assumes a fixed stripe length.  To start simplifying the
bio mapping code remove a few arguments and explicitly hard code that
fixes stripe length.

Partially based on a patch from Qu Wenruo.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/raid56.c  | 61 ++++++++++++++++++++--------------------------
 fs/btrfs/raid56.h  | 12 +++------
 fs/btrfs/scrub.c   |  9 +++----
 fs/btrfs/volumes.c | 13 ++++------
 4 files changed, 39 insertions(+), 56 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0f0368e63e5af..233dcef6ce3ad 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -474,9 +474,9 @@ static int rbio_is_full(struct btrfs_raid_bio *rbio)
 	int ret = 1;
 
 	spin_lock_irqsave(&rbio->bio_list_lock, flags);
-	if (size != rbio->nr_data * rbio->stripe_len)
+	if (size != rbio->nr_data * BTRFS_STRIPE_LEN)
 		ret = 0;
-	BUG_ON(size > rbio->nr_data * rbio->stripe_len);
+	BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN);
 	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
 
 	return ret;
@@ -913,18 +913,17 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
  * this does not allocate any pages for rbio->pages.
  */
 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
-					 struct btrfs_io_context *bioc,
-					 u32 stripe_len)
+					 struct btrfs_io_context *bioc)
 {
 	const unsigned int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
-	const unsigned int stripe_npages = stripe_len >> PAGE_SHIFT;
+	const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT;
 	const unsigned int num_pages = stripe_npages * real_stripes;
-	const unsigned int stripe_nsectors = stripe_len >> fs_info->sectorsize_bits;
+	const unsigned int stripe_nsectors =
+		BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
 	const unsigned int num_sectors = stripe_nsectors * real_stripes;
 	struct btrfs_raid_bio *rbio;
 	void *p;
 
-	ASSERT(IS_ALIGNED(stripe_len, PAGE_SIZE));
 	/* PAGE_SIZE must also be aligned to sectorsize for subpage support */
 	ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
 	/*
@@ -948,7 +947,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
 	INIT_LIST_HEAD(&rbio->stripe_cache);
 	INIT_LIST_HEAD(&rbio->hash_list);
 	rbio->bioc = bioc;
-	rbio->stripe_len = stripe_len;
 	rbio->nr_pages = num_pages;
 	rbio->nr_sectors = num_sectors;
 	rbio->real_stripes = real_stripes;
@@ -1020,7 +1018,6 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
 			      struct sector_ptr *sector,
 			      unsigned int stripe_nr,
 			      unsigned int sector_nr,
-			      unsigned long bio_max_len,
 			      unsigned int opf)
 {
 	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
@@ -1065,7 +1062,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
 	}
 
 	/* put a new bio on the list */
-	bio = bio_alloc(stripe->dev->bdev, max(bio_max_len >> PAGE_SHIFT, 1UL),
+	bio = bio_alloc(stripe->dev->bdev,
+			max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1),
 			opf, GFP_NOFS);
 	bio->bi_iter.bi_sector = disk_start >> 9;
 	bio->bi_private = rbio;
@@ -1287,8 +1285,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 		}
 
 		ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
-					 sectornr, rbio->stripe_len,
-					 REQ_OP_WRITE);
+					 sectornr, REQ_OP_WRITE);
 		if (ret)
 			goto cleanup;
 	}
@@ -1327,8 +1324,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 
 		ret = rbio_add_io_sector(rbio, &bio_list, sector,
 					 rbio->bioc->tgtdev_map[stripe],
-					 sectornr, rbio->stripe_len,
-					 REQ_OP_WRITE);
+					 sectornr, REQ_OP_WRITE);
 		if (ret)
 			goto cleanup;
 	}
@@ -1373,7 +1369,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
 
 	for (i = 0; i < rbio->bioc->num_stripes; i++) {
 		stripe = &rbio->bioc->stripes[i];
-		if (in_range(physical, stripe->physical, rbio->stripe_len) &&
+		if (in_range(physical, stripe->physical, BTRFS_STRIPE_LEN) &&
 		    stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
 			return i;
 		}
@@ -1395,7 +1391,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
 	for (i = 0; i < rbio->nr_data; i++) {
 		u64 stripe_start = rbio->bioc->raid_map[i];
 
-		if (in_range(logical, stripe_start, rbio->stripe_len))
+		if (in_range(logical, stripe_start, BTRFS_STRIPE_LEN))
 			return i;
 	}
 	return -1;
@@ -1580,8 +1576,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 			continue;
 
 		ret = rbio_add_io_sector(rbio, &bio_list, sector,
-			       stripe, sectornr, rbio->stripe_len,
-			       REQ_OP_READ);
+			       stripe, sectornr, REQ_OP_READ);
 		if (ret)
 			goto cleanup;
 	}
@@ -1790,7 +1785,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
 
 	ASSERT(orig_logical >= full_stripe_start &&
 	       orig_logical + orig_len <= full_stripe_start +
-	       rbio->nr_data * rbio->stripe_len);
+	       rbio->nr_data * BTRFS_STRIPE_LEN);
 
 	bio_list_add(&rbio->bio_list, orig_bio);
 	rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
@@ -1808,7 +1803,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
 /*
  * our main entry point for writes from the rest of the FS.
  */
-int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stripe_len)
+int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
 	struct btrfs_raid_bio *rbio;
@@ -1816,7 +1811,7 @@ int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stri
 	struct blk_plug_cb *cb;
 	int ret;
 
-	rbio = alloc_rbio(fs_info, bioc, stripe_len);
+	rbio = alloc_rbio(fs_info, bioc);
 	if (IS_ERR(rbio)) {
 		btrfs_put_bioc(bioc);
 		return PTR_ERR(rbio);
@@ -2141,8 +2136,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 			continue;
 
 		ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
-					 sectornr, rbio->stripe_len,
-					 REQ_OP_READ);
+					 sectornr, REQ_OP_READ);
 		if (ret < 0)
 			goto cleanup;
 	}
@@ -2200,7 +2194,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
  * of the drive.
  */
 int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
-			  u32 stripe_len, int mirror_num, int generic_io)
+			  int mirror_num, int generic_io)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
 	struct btrfs_raid_bio *rbio;
@@ -2211,7 +2205,7 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 		btrfs_bio(bio)->mirror_num = mirror_num;
 	}
 
-	rbio = alloc_rbio(fs_info, bioc, stripe_len);
+	rbio = alloc_rbio(fs_info, bioc);
 	if (IS_ERR(rbio)) {
 		if (generic_io)
 			btrfs_put_bioc(bioc);
@@ -2305,14 +2299,14 @@ static void read_rebuild_work(struct work_struct *work)
 
 struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
 				struct btrfs_io_context *bioc,
-				u32 stripe_len, struct btrfs_device *scrub_dev,
+				struct btrfs_device *scrub_dev,
 				unsigned long *dbitmap, int stripe_nsectors)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
 	struct btrfs_raid_bio *rbio;
 	int i;
 
-	rbio = alloc_rbio(fs_info, bioc, stripe_len);
+	rbio = alloc_rbio(fs_info, bioc);
 	if (IS_ERR(rbio))
 		return NULL;
 	bio_list_add(&rbio->bio_list, bio);
@@ -2357,7 +2351,7 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
 
 	ASSERT(logical >= rbio->bioc->raid_map[0]);
 	ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] +
-				rbio->stripe_len * rbio->nr_data);
+				BTRFS_STRIPE_LEN * rbio->nr_data);
 	stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
 	index = stripe_offset / sectorsize;
 	rbio->bio_sectors[index].page = page;
@@ -2513,7 +2507,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 
 		sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
 		ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp,
-					 sectornr, rbio->stripe_len, REQ_OP_WRITE);
+					 sectornr, REQ_OP_WRITE);
 		if (ret)
 			goto cleanup;
 	}
@@ -2527,7 +2521,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 		sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
 		ret = rbio_add_io_sector(rbio, &bio_list, sector,
 				       bioc->tgtdev_map[rbio->scrubp],
-				       sectornr, rbio->stripe_len, REQ_OP_WRITE);
+				       sectornr, REQ_OP_WRITE);
 		if (ret)
 			goto cleanup;
 	}
@@ -2694,7 +2688,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 			continue;
 
 		ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
-					 sectornr, rbio->stripe_len, REQ_OP_READ);
+					 sectornr, REQ_OP_READ);
 		if (ret)
 			goto cleanup;
 	}
@@ -2759,13 +2753,12 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
 /* The following code is used for dev replace of a missing RAID 5/6 device. */
 
 struct btrfs_raid_bio *
-raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
-			  u64 length)
+raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
 	struct btrfs_raid_bio *rbio;
 
-	rbio = alloc_rbio(fs_info, bioc, length);
+	rbio = alloc_rbio(fs_info, bioc);
 	if (IS_ERR(rbio))
 		return NULL;
 
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index c73bceb2b4616..1dce205b79bf9 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -56,9 +56,6 @@ struct btrfs_raid_bio {
 	 */
 	enum btrfs_rbio_ops operation;
 
-	/* Size of each individual stripe on disk */
-	u32 stripe_len;
-
 	/* How many pages there are for the full stripe including P/Q */
 	u16 nr_pages;
 
@@ -169,21 +166,20 @@ static inline int nr_data_stripes(const struct map_lookup *map)
 struct btrfs_device;
 
 int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
-			  u32 stripe_len, int mirror_num, int generic_io);
-int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, u32 stripe_len);
+			  int mirror_num, int generic_io);
+int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
 
 void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
 			    unsigned int pgoff, u64 logical);
 
 struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
-				struct btrfs_io_context *bioc, u32 stripe_len,
+				struct btrfs_io_context *bioc,
 				struct btrfs_device *scrub_dev,
 				unsigned long *dbitmap, int stripe_nsectors);
 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
 
 struct btrfs_raid_bio *
-raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
-			  u64 length);
+raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc);
 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
 
 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a0c45e92bd6cb..ad7958d18158f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1216,7 +1216,6 @@ static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
 
 static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
 						 u64 *raid_map,
-						 u64 mapped_length,
 						 int nstripes, int mirror,
 						 int *stripe_index,
 						 u64 *stripe_offset)
@@ -1231,7 +1230,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
 				continue;
 
 			if (logical >= raid_map[i] &&
-			    logical < raid_map[i] + mapped_length)
+			    logical < raid_map[i] + BTRFS_STRIPE_LEN)
 				break;
 		}
 
@@ -1335,7 +1334,6 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 			scrub_stripe_index_and_offset(logical,
 						      bioc->map_type,
 						      bioc->raid_map,
-						      mapped_length,
 						      bioc->num_stripes -
 						      bioc->num_tgtdevs,
 						      mirror_index,
@@ -1387,7 +1385,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 
 	mirror_num = sector->sblock->sectors[0]->mirror_num;
 	ret = raid56_parity_recover(bio, sector->recover->bioc,
-				    sector->recover->map_length,
 				    mirror_num, 0);
 	if (ret)
 		return ret;
@@ -2195,7 +2192,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
 	bio->bi_private = sblock;
 	bio->bi_end_io = scrub_missing_raid56_end_io;
 
-	rbio = raid56_alloc_missing_rbio(bio, bioc, length);
+	rbio = raid56_alloc_missing_rbio(bio, bioc);
 	if (!rbio)
 		goto rbio_out;
 
@@ -2829,7 +2826,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
 	bio->bi_private = sparity;
 	bio->bi_end_io = scrub_parity_bio_endio;
 
-	rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, length,
+	rbio = raid56_parity_alloc_scrub_rbio(bio, bioc,
 					      sparity->scrub_dev,
 					      &sparity->dbitmap,
 					      sparity->nsectors);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 80636fbf28b7a..556a18dca6e9f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6466,6 +6466,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+		ASSERT(map->stripe_len == BTRFS_STRIPE_LEN);
 		if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
 			/* push stripe_nr back to the start of the full stripe */
 			stripe_nr = div64_u64(raid56_full_stripe_start,
@@ -6763,14 +6764,10 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
 	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
-		/* In this case, map_length has been set to the length of
-		   a single stripe; not the whole write */
-		if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
-			ret = raid56_parity_write(bio, bioc, map_length);
-		} else {
-			ret = raid56_parity_recover(bio, bioc, map_length,
-						    mirror_num, 1);
-		}
+		if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+			ret = raid56_parity_write(bio, bioc);
+		else
+			ret = raid56_parity_recover(bio, bioc, mirror_num, 1);
 		goto out_dec;
 	}
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 02/10] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block()
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 03/10] btrfs: remove the btrfs_map_bio return value Christoph Hellwig
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs, Johannes Thumshirn

From: Qu Wenruo <wqu@suse.com>

For profiles other than RAID56, __btrfs_map_block() returns @map_length
as min(stripe_end, logical + *length), which is also the same result
from btrfs_get_io_geometry().

But for RAID56, __btrfs_map_block() returns @map_length as stripe_len.

This strange behavior is going to hurt incoming bio split at
btrfs_map_bio() time, as we will use @map_length as bio split size.

Fix this behavior by return @map_length by the same calculatioin as
other profiles

Signed-off-by: Qu Wenruo <wqu@suse.com>
[hch: rebased]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/volumes.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 556a18dca6e9f..943ac43f58e9e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6476,7 +6476,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			num_stripes = map->num_stripes;
 			max_errors = btrfs_chunk_max_errors(map);
 
-			*length = map->stripe_len;
+			/* Return the length to the full stripe end */
+			*length = min(raid56_full_stripe_start + em->start +
+				      data_stripes * stripe_len,
+				      logical + *length) - logical;
 			stripe_index = 0;
 			stripe_offset = 0;
 		} else {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 03/10] btrfs: remove the btrfs_map_bio return value
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 02/10] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block() Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 04/10] btrfs: remove the raid56_parity_write " Christoph Hellwig
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs, Johannes Thumshirn

Always consume the bio and call the end_io handler on error instead of
returning an error and letting the caller handle it.  This matches
what the block layer submission does and avoids any confusion on who
needs to handle errors.

As this requires touching all the callers, rename the function to
btrfs_submit_bio, which describes the functionality much better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/compression.c |  8 ++------
 fs/btrfs/disk-io.c     | 21 ++++++++++-----------
 fs/btrfs/inode.c       | 25 ++++++++++---------------
 fs/btrfs/volumes.c     | 13 ++++++++-----
 fs/btrfs/volumes.h     |  4 ++--
 5 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 63d542961b78a..907fc8a4c092c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -593,9 +593,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			}
 
 			ASSERT(bio->bi_iter.bi_size);
-			ret = btrfs_map_bio(fs_info, bio, 0);
-			if (ret)
-				goto finish_cb;
+			btrfs_submit_bio(fs_info, bio, 0);
 			bio = NULL;
 		}
 		cond_resched();
@@ -931,9 +929,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			sums += fs_info->csum_size * nr_sectors;
 
 			ASSERT(comp_bio->bi_iter.bi_size);
-			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
-			if (ret)
-				goto finish_cb;
+			btrfs_submit_bio(fs_info, comp_bio, mirror_num);
 			comp_bio = NULL;
 		}
 	}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 800ad3a9c68ed..5df6865428a5c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -728,7 +728,6 @@ static void run_one_async_done(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
 	struct inode *inode;
-	blk_status_t ret;
 
 	async = container_of(work, struct  async_submit_bio, work);
 	inode = async->inode;
@@ -746,11 +745,7 @@ static void run_one_async_done(struct btrfs_work *work)
 	 * This changes nothing when cgroups aren't in use.
 	 */
 	async->bio->bi_opf |= REQ_CGROUP_PUNT;
-	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
-	if (ret) {
-		async->bio->bi_status = ret;
-		bio_endio(async->bio);
-	}
+	btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
 }
 
 static void run_one_async_free(struct btrfs_work *work)
@@ -814,7 +809,7 @@ static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio,
 {
 	/*
 	 * when we're called for a write, we're already in the async
-	 * submission context.  Just jump into btrfs_map_bio
+	 * submission context.  Just jump into btrfs_submit_bio.
 	 */
 	return btree_csum_one_bio(bio);
 }
@@ -839,11 +834,15 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
 	bio->bi_opf |= REQ_META;
 
 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-		ret = btrfs_map_bio(fs_info, bio, mirror_num);
-	} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
+		btrfs_submit_bio(fs_info, bio, mirror_num);
+		return;
+	}
+	if (!should_async_write(fs_info, BTRFS_I(inode))) {
 		ret = btree_csum_one_bio(bio);
-		if (!ret)
-			ret = btrfs_map_bio(fs_info, bio, mirror_num);
+		if (!ret) {
+			btrfs_submit_bio(fs_info, bio, mirror_num);
+			return;
+		}
 	} else {
 		/*
 		 * kthread helpers are used to submit writes so that
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 33ba4d22e1430..0f8513d3a8a88 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2617,7 +2617,8 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro
 			goto out;
 		}
 	}
-	ret = btrfs_map_bio(fs_info, bio, mirror_num);
+	btrfs_submit_bio(fs_info, bio, mirror_num);
+	return;
 out:
 	if (ret) {
 		bio->bi_status = ret;
@@ -2645,14 +2646,13 @@ void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
 	 * not, which is why we ignore skip_sum here.
 	 */
 	ret = btrfs_lookup_bio_sums(inode, bio, NULL);
-	if (ret)
-		goto out;
-	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-out:
 	if (ret) {
 		bio->bi_status = ret;
 		bio_endio(bio);
+		return;
 	}
+
+	btrfs_submit_bio(fs_info, bio, mirror_num);
 }
 
 /*
@@ -7863,8 +7863,7 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
 	BUG_ON(bio_op(bio) == REQ_OP_WRITE);
 
 	refcount_inc(&dip->refs);
-	if (btrfs_map_bio(fs_info, bio, mirror_num))
-		refcount_dec(&dip->refs);
+	btrfs_submit_bio(fs_info, bio, mirror_num);
 }
 
 static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
@@ -7972,7 +7971,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 						      file_offset - dip->file_offset);
 	}
 map:
-	return btrfs_map_bio(fs_info, bio, 0);
+	btrfs_submit_bio(fs_info, bio, 0);
+	return BLK_STS_OK;
 }
 
 static void btrfs_submit_direct(const struct iomap_iter *iter,
@@ -10277,7 +10277,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
 					    struct bio *bio, int mirror_num)
 {
 	struct btrfs_encoded_read_private *priv = bio->bi_private;
-	struct btrfs_bio *bbio = btrfs_bio(bio);
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	blk_status_t ret;
 
@@ -10288,12 +10287,8 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
 	}
 
 	atomic_inc(&priv->pending);
-	ret = btrfs_map_bio(fs_info, bio, mirror_num);
-	if (ret) {
-		atomic_dec(&priv->pending);
-		btrfs_bio_free_csum(bbio);
-	}
-	return ret;
+	btrfs_submit_bio(fs_info, bio, mirror_num);
+	return BLK_STS_OK;
 }
 
 static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 943ac43f58e9e..bdcefc19cd51e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6731,8 +6731,8 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 		}
 	}
 	btrfs_debug_in_rcu(fs_info,
-	"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
-		bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
+	"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
+		__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
 		(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
 		dev->devid, bio->bi_iter.bi_size);
 
@@ -6742,8 +6742,8 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 	submit_bio(bio);
 }
 
-blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
-			   int mirror_num)
+void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+		      int mirror_num)
 {
 	u64 logical = bio->bi_iter.bi_sector << 9;
 	u64 length = bio->bi_iter.bi_size;
@@ -6788,7 +6788,10 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	}
 out_dec:
 	btrfs_bio_counter_dec(fs_info);
-	return errno_to_blk_status(ret);
+	if (ret) {
+		bio->bi_status = errno_to_blk_status(ret);
+		bio_endio(bio);
+	}
 }
 
 static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b61508723d5d2..cc9966fe0e517 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -573,8 +573,8 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
 struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
 					    u64 type);
 void btrfs_mapping_tree_free(struct extent_map_tree *tree);
-blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
-			   int mirror_num);
+void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+		      int mirror_num);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 04/10] btrfs: remove the raid56_parity_write return value
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 03/10] btrfs: remove the btrfs_map_bio return value Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-17 10:38   ` Qu Wenruo
  2022-06-18 11:04   ` Johannes Thumshirn
  2022-06-17 10:04 ` [PATCH 05/10] btrfs: remove the raid56_parity_recover " Christoph Hellwig
                   ` (7 subsequent siblings)
  11 siblings, 2 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Always consume the bio and call the end_io handler on error instead of
returning an error and letting the caller handle it.  This matches what
the block layer submission does and avoids any confusion on who
needs to handle errors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/raid56.c  | 23 +++++++++++++++--------
 fs/btrfs/raid56.h  |  2 +-
 fs/btrfs/volumes.c |  2 +-
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 233dcef6ce3ad..c0cc2d99509c9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1803,18 +1803,19 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
 /*
  * our main entry point for writes from the rest of the FS.
  */
-int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
+void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
 	struct btrfs_raid_bio *rbio;
 	struct btrfs_plug_cb *plug = NULL;
 	struct blk_plug_cb *cb;
-	int ret;
+	int ret = 0;
 
 	rbio = alloc_rbio(fs_info, bioc);
 	if (IS_ERR(rbio)) {
 		btrfs_put_bioc(bioc);
-		return PTR_ERR(rbio);
+		ret = PTR_ERR(rbio);
+		goto out;
 	}
 	rbio->operation = BTRFS_RBIO_WRITE;
 	rbio_add_bio(rbio, bio);
@@ -1829,8 +1830,8 @@ int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
 	if (rbio_is_full(rbio)) {
 		ret = full_stripe_write(rbio);
 		if (ret)
-			btrfs_bio_counter_dec(fs_info);
-		return ret;
+			goto out_dec_counter;
+		return;
 	}
 
 	cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
@@ -1841,13 +1842,19 @@ int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
 			INIT_LIST_HEAD(&plug->rbio_list);
 		}
 		list_add_tail(&rbio->plug_list, &plug->rbio_list);
-		ret = 0;
 	} else {
 		ret = __raid56_parity_write(rbio);
 		if (ret)
-			btrfs_bio_counter_dec(fs_info);
+			goto out_dec_counter;
 	}
-	return ret;
+
+	return;
+
+out_dec_counter:
+	btrfs_bio_counter_dec(fs_info);
+out:
+	bio->bi_status = errno_to_blk_status(ret);
+	bio_endio(bio);
 }
 
 /*
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 1dce205b79bf9..3f223ae39462a 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -167,7 +167,7 @@ struct btrfs_device;
 
 int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 			  int mirror_num, int generic_io);
-int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
+void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
 
 void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
 			    unsigned int pgoff, u64 logical);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bdcefc19cd51e..b30d4449ef18d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6768,7 +6768,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
 		if (btrfs_op(bio) == BTRFS_MAP_WRITE)
-			ret = raid56_parity_write(bio, bioc);
+			raid56_parity_write(bio, bioc);
 		else
 			ret = raid56_parity_recover(bio, bioc, mirror_num, 1);
 		goto out_dec;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 05/10] btrfs: remove the raid56_parity_recover return value
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 04/10] btrfs: remove the raid56_parity_write " Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-18 11:06   ` Johannes Thumshirn
  2022-06-19 10:35   ` Qu Wenruo
  2022-06-17 10:04 ` [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers Christoph Hellwig
                   ` (6 subsequent siblings)
  11 siblings, 2 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Always consume the bio and call the end_io handler on error instead of
returning an error and letting the caller handle it.  This matches what
the block layer submission does and avoids any confusion on who
needs to handle errors.

Also use the proper bool type for the generic_io argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/raid56.c  | 39 ++++++++++++++++-----------------------
 fs/btrfs/raid56.h  |  4 ++--
 fs/btrfs/scrub.c   | 10 ++--------
 fs/btrfs/volumes.c |  2 +-
 4 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index c0cc2d99509c9..cd39c233dfdeb 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2200,12 +2200,11 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
  * so we assume the bio they send down corresponds to a failed part
  * of the drive.
  */
-int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
-			  int mirror_num, int generic_io)
+void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
+			   int mirror_num, bool generic_io)
 {
 	struct btrfs_fs_info *fs_info = bioc->fs_info;
 	struct btrfs_raid_bio *rbio;
-	int ret;
 
 	if (generic_io) {
 		ASSERT(bioc->mirror_num == mirror_num);
@@ -2214,9 +2213,8 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 
 	rbio = alloc_rbio(fs_info, bioc);
 	if (IS_ERR(rbio)) {
-		if (generic_io)
-			btrfs_put_bioc(bioc);
-		return PTR_ERR(rbio);
+		bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
+		goto out_end_bio;
 	}
 
 	rbio->operation = BTRFS_RBIO_READ_REBUILD;
@@ -2228,10 +2226,9 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
 			   __func__, bio->bi_iter.bi_sector << 9,
 			   (u64)bio->bi_iter.bi_size, bioc->map_type);
-		if (generic_io)
-			btrfs_put_bioc(bioc);
 		kfree(rbio);
-		return -EIO;
+		bio->bi_status = BLK_STS_IOERR;
+		goto out_end_bio;
 	}
 
 	if (generic_io) {
@@ -2258,24 +2255,20 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 			rbio->failb--;
 	}
 
-	ret = lock_stripe_add(rbio);
+	if (lock_stripe_add(rbio))
+		return;
 
 	/*
-	 * __raid56_parity_recover will end the bio with
-	 * any errors it hits.  We don't want to return
-	 * its error value up the stack because our caller
-	 * will end up calling bio_endio with any nonzero
-	 * return
+	 * This adds our rbio to the list of rbios that will be handled after
+	 * the current lock owner is done.
 	 */
-	if (ret == 0)
-		__raid56_parity_recover(rbio);
-	/*
-	 * our rbio has been added to the list of
-	 * rbios that will be handled after the
-	 * currently lock owner is done
-	 */
-	return 0;
+	__raid56_parity_recover(rbio);
+	return;
 
+out_end_bio:
+	if (generic_io)
+		btrfs_put_bioc(bioc);
+	bio_endio(bio);
 }
 
 static void rmw_work(struct work_struct *work)
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 3f223ae39462a..6f48f9e4c8694 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -165,8 +165,8 @@ static inline int nr_data_stripes(const struct map_lookup *map)
 
 struct btrfs_device;
 
-int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
-			  int mirror_num, int generic_io);
+void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
+			   int mirror_num, bool generic_io);
 void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
 
 void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ad7958d18158f..3afe5fa50a631 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1376,18 +1376,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 					struct scrub_sector *sector)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
-	int ret;
-	int mirror_num;
 
 	bio->bi_iter.bi_sector = sector->logical >> 9;
 	bio->bi_private = &done;
 	bio->bi_end_io = scrub_bio_wait_endio;
-
-	mirror_num = sector->sblock->sectors[0]->mirror_num;
-	ret = raid56_parity_recover(bio, sector->recover->bioc,
-				    mirror_num, 0);
-	if (ret)
-		return ret;
+	raid56_parity_recover(bio, sector->recover->bioc,
+			      sector->sblock->sectors[0]->mirror_num, false);
 
 	wait_for_completion_io(&done);
 	return blk_status_to_errno(bio->bi_status);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b30d4449ef18d..844ad637a0269 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6770,7 +6770,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		if (btrfs_op(bio) == BTRFS_MAP_WRITE)
 			raid56_parity_write(bio, bioc);
 		else
-			ret = raid56_parity_recover(bio, bioc, mirror_num, 1);
+			raid56_parity_recover(bio, bioc, mirror_num, true);
 		goto out_dec;
 	}
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 05/10] btrfs: remove the raid56_parity_recover " Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-19 10:45   ` Qu Wenruo
  2022-06-17 10:04 ` [PATCH 07/10] btrfs: simplify the reloc root check in btrfs_submit_data_write_bio Christoph Hellwig
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Transfer the bio counter reference acquired by btrfs_submit_bio to
raid56_parity_write and raid56_parity_recovery together with the bio that
the reference was acuired for instead of acquiring another reference in
those helpers and dropping the original one in btrfs_submit_bio.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/raid56.c  | 16 ++++++----------
 fs/btrfs/volumes.c | 15 +++++++--------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index cd39c233dfdeb..00a0a2d472d88 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1815,12 +1815,11 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
 	if (IS_ERR(rbio)) {
 		btrfs_put_bioc(bioc);
 		ret = PTR_ERR(rbio);
-		goto out;
+		goto out_dec_counter;
 	}
 	rbio->operation = BTRFS_RBIO_WRITE;
 	rbio_add_bio(rbio, bio);
 
-	btrfs_bio_counter_inc_noblocked(fs_info);
 	rbio->generic_bio_cnt = 1;
 
 	/*
@@ -1852,7 +1851,6 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
 
 out_dec_counter:
 	btrfs_bio_counter_dec(fs_info);
-out:
 	bio->bi_status = errno_to_blk_status(ret);
 	bio_endio(bio);
 }
@@ -2209,6 +2207,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 	if (generic_io) {
 		ASSERT(bioc->mirror_num == mirror_num);
 		btrfs_bio(bio)->mirror_num = mirror_num;
+	} else {
+		btrfs_get_bioc(bioc);
 	}
 
 	rbio = alloc_rbio(fs_info, bioc);
@@ -2231,12 +2231,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 		goto out_end_bio;
 	}
 
-	if (generic_io) {
-		btrfs_bio_counter_inc_noblocked(fs_info);
+	if (generic_io)
 		rbio->generic_bio_cnt = 1;
-	} else {
-		btrfs_get_bioc(bioc);
-	}
 
 	/*
 	 * Loop retry:
@@ -2266,8 +2262,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
 	return;
 
 out_end_bio:
-	if (generic_io)
-		btrfs_put_bioc(bioc);
+	btrfs_bio_counter_dec(fs_info);
+	btrfs_put_bioc(bioc);
 	bio_endio(bio);
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 844ad637a0269..fea139d628c04 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6756,8 +6756,12 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	btrfs_bio_counter_inc_blocked(fs_info);
 	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
 				&map_length, &bioc, mirror_num, 1);
-	if (ret)
-		goto out_dec;
+	if (ret) {
+		btrfs_bio_counter_dec(fs_info);
+		bio->bi_status = errno_to_blk_status(ret);
+		bio_endio(bio);
+		return;
+	}
 
 	total_devs = bioc->num_stripes;
 	bioc->orig_bio = bio;
@@ -6771,7 +6775,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			raid56_parity_write(bio, bioc);
 		else
 			raid56_parity_recover(bio, bioc, mirror_num, true);
-		goto out_dec;
+		return;
 	}
 
 	if (map_length < length) {
@@ -6786,12 +6790,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
 		submit_stripe_bio(bioc, bio, dev_nr, should_clone);
 	}
-out_dec:
 	btrfs_bio_counter_dec(fs_info);
-	if (ret) {
-		bio->bi_status = errno_to_blk_status(ret);
-		bio_endio(bio);
-	}
 }
 
 static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 07/10] btrfs: simplify the reloc root check in btrfs_submit_data_write_bio
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 08/10] btrfs: handle allocation failure in btrfs_wq_submit_bio gracefully Christoph Hellwig
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

btrfs_submit_data_write_bio special cases the reloc root because the
checksums are preloaded, but only does so for the !sync case.  The sync
case can't happen for data relocation, but just handling it more generally
significantly simplifies the lgic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0f8513d3a8a88..5a90fc129aea9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2594,28 +2594,25 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro
 	}
 
 	/*
-	 * Rules for async/sync submit:
-	 *   a) write without checksum:			sync submit
-	 *   b) write with checksum:
-	 *      b-1) if bio is issued by fsync:		sync submit
-	 *           (sync_writers != 0)
-	 *      b-2) if root is reloc root:		sync submit
-	 *           (only in case of buffered IO)
-	 *      b-3) otherwise:				async submit
+	 * If we need to checksum, and the I/O is not issued by fsync and
+	 * friends, that is ->sync_writers != 0, defer the submission to a
+	 * workqueue to parallelize it.
+	 *
+	 * Csum items for reloc roots have already been cloned at this point,
+	 * so they are handled as part of the no-checksum case.
 	 */
 	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
-	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
-		if (atomic_read(&bi->sync_writers)) {
-			ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
-			if (ret)
-				goto out;
-		} else if (btrfs_is_data_reloc_root(bi->root)) {
-			; /* Csum items have already been cloned */
-		} else {
+	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
+	    !btrfs_is_data_reloc_root(bi->root)) {
+		if (!atomic_read(&bi->sync_writers)) {
 			ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
 						  btrfs_submit_bio_start);
 			goto out;
 		}
+
+		ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
+		if (ret)
+			goto out;
 	}
 	btrfs_submit_bio(fs_info, bio, mirror_num);
 	return;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 08/10] btrfs: handle allocation failure in btrfs_wq_submit_bio gracefully
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 07/10] btrfs: simplify the reloc root check in btrfs_submit_data_write_bio Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-28 15:20   ` Boris Burkov
  2022-06-17 10:04 ` [PATCH 09/10] btrfs: remove the btrfs_submit_dio_bio return value Christoph Hellwig
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

btrfs_wq_submit_bio is used for writeback under memory pressure.  Instead
of failing the I/O when we can't allocate the async_submit_bio, just
punt back to the synchronous submission path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/disk-io.c | 37 ++++++++++++++++++-------------------
 fs/btrfs/disk-io.h |  6 +++---
 fs/btrfs/inode.c   | 17 +++++++++--------
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5df6865428a5c..eaa643f38783c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -756,16 +756,16 @@ static void run_one_async_free(struct btrfs_work *work)
 	kfree(async);
 }
 
-blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
-				 int mirror_num, u64 dio_file_offset,
-				 extent_submit_bio_start_t *submit_bio_start)
+bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
+			 u64 dio_file_offset,
+			 extent_submit_bio_start_t *submit_bio_start)
 {
 	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 	struct async_submit_bio *async;
 
 	async = kmalloc(sizeof(*async), GFP_NOFS);
 	if (!async)
-		return BLK_STS_RESOURCE;
+		return false;
 
 	async->inode = inode;
 	async->bio = bio;
@@ -783,7 +783,7 @@ blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
 		btrfs_queue_work(fs_info->hipri_workers, &async->work);
 	else
 		btrfs_queue_work(fs_info->workers, &async->work);
-	return 0;
+	return true;
 }
 
 static blk_status_t btree_csum_one_bio(struct bio *bio)
@@ -837,25 +837,24 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
 		btrfs_submit_bio(fs_info, bio, mirror_num);
 		return;
 	}
-	if (!should_async_write(fs_info, BTRFS_I(inode))) {
-		ret = btree_csum_one_bio(bio);
-		if (!ret) {
-			btrfs_submit_bio(fs_info, bio, mirror_num);
-			return;
-		}
-	} else {
-		/*
-		 * kthread helpers are used to submit writes so that
-		 * checksumming can happen in parallel across all CPUs
-		 */
-		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
-					  btree_submit_bio_start);
-	}
 
+	/*
+	 * Kthread helpers are used to submit writes so that checksumming can
+	 * happen in parallel across all CPUs
+	 */
+	if (should_async_write(fs_info, BTRFS_I(inode)) &&
+	    btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
+				btree_submit_bio_start))
+		return;
+
+	ret = btree_csum_one_bio(bio);
 	if (ret) {
 		bio->bi_status = ret;
 		bio_endio(bio);
+		return;
 	}
+
+	btrfs_submit_bio(fs_info, bio, mirror_num);
 }
 
 #ifdef CONFIG_MIGRATION
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 05e779a41a997..8993b428e09ce 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -114,9 +114,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
 int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
 			     int level, struct btrfs_key *first_key);
-blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
-				 int mirror_num, u64 dio_file_offset,
-				 extent_submit_bio_start_t *submit_bio_start);
+bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
+			 u64 dio_file_offset,
+			 extent_submit_bio_start_t *submit_bio_start);
 blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
 			  int mirror_num);
 int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5a90fc129aea9..38af980d1cf1f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2604,11 +2604,10 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro
 	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
 	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
 	    !btrfs_is_data_reloc_root(bi->root)) {
-		if (!atomic_read(&bi->sync_writers)) {
-			ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
-						  btrfs_submit_bio_start);
-			goto out;
-		}
+		if (!atomic_read(&bi->sync_writers) &&
+		    btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
+					btrfs_submit_bio_start))
+			return;
 
 		ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
 		if (ret)
@@ -7953,9 +7952,11 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 
 	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
 		/* Check btrfs_submit_data_write_bio() for async submit rules */
-		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
-			return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
-					btrfs_submit_bio_start_direct_io);
+		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers) &&
+		    btrfs_wq_submit_bio(inode, bio, 0, file_offset,
+					btrfs_submit_bio_start_direct_io))
+			return BLK_STS_OK;
+
 		/*
 		 * If we aren't doing async submit, calculate the csum of the
 		 * bio now.
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 09/10] btrfs: remove the btrfs_submit_dio_bio return value
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 08/10] btrfs: handle allocation failure in btrfs_wq_submit_bio gracefully Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-17 10:04 ` [PATCH 10/10] btrfs: remove bioc->stripes_pending Christoph Hellwig
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Always consume the bio and call the end_io handler on error instead of
returning an error and letting the caller handle it.  This matches what
the block layer submission and the other btrfs bio submission handlers do
and avoids any confusion on who needs to handle errors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 38af980d1cf1f..0c2ef87af302f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7940,8 +7940,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
 	btrfs_dio_private_put(dip);
 }
 
-static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
-		struct inode *inode, u64 file_offset, int async_submit)
+static inline void btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
+		u64 file_offset, int async_submit)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
@@ -7955,22 +7955,24 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers) &&
 		    btrfs_wq_submit_bio(inode, bio, 0, file_offset,
 					btrfs_submit_bio_start_direct_io))
-			return BLK_STS_OK;
+			return;
 
 		/*
 		 * If we aren't doing async submit, calculate the csum of the
 		 * bio now.
 		 */
 		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
-		if (ret)
-			return ret;
+		if (ret) {
+			bio->bi_status = ret;
+			bio_endio(bio);
+			return;
+		}
 	} else {
 		btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums,
 						      file_offset - dip->file_offset);
 	}
 map:
 	btrfs_submit_bio(fs_info, bio, 0);
-	return BLK_STS_OK;
 }
 
 static void btrfs_submit_direct(const struct iomap_iter *iter,
@@ -8083,14 +8085,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
 				async_submit = 1;
 		}
 
-		status = btrfs_submit_dio_bio(bio, inode, file_offset,
-						async_submit);
-		if (status) {
-			bio_put(bio);
-			if (submit_len > 0)
-				refcount_dec(&dip->refs);
-			goto out_err_em;
-		}
+		btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
 
 		dio_data->submitted += clone_len;
 		clone_offset += clone_len;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (8 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 09/10] btrfs: remove the btrfs_submit_dio_bio return value Christoph Hellwig
@ 2022-06-17 10:04 ` Christoph Hellwig
  2022-06-20  8:18   ` Nikolay Borisov
  2022-06-22 16:07   ` David Sterba
  2022-06-20 13:04 ` cleanup btrfs bio submission v2 Nikolay Borisov
  2022-07-07 18:35 ` David Sterba
  11 siblings, 2 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-17 10:04 UTC (permalink / raw)
  To: David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Replace the stripes_pending field with the pending counter in the bio.
This avoid an extra field and prepares splitting the btrfs_bio at the
stripe boundary.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/volumes.c | 100 ++++++++++++++++++++++-----------------------
 fs/btrfs/volumes.h |   1 -
 2 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fea139d628c04..c1497bde713ad 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5896,7 +5896,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
 		sizeof(u64) * (total_stripes),
 		GFP_NOFS|__GFP_NOFAIL);
 
-	atomic_set(&bioc->error, 0);
 	refcount_set(&bioc->refs, 1);
 
 	bioc->fs_info = fs_info;
@@ -6626,46 +6625,21 @@ static void btrfs_end_bio_work(struct work_struct *work)
 	struct btrfs_bio *bbio =
 		container_of(work, struct btrfs_bio, end_io_work);
 
-	bio_endio(&bbio->bio);
-}
-
-static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
-{
-	struct bio *orig_bio = bioc->orig_bio;
-	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
-
-	bbio->mirror_num = bioc->mirror_num;
-	orig_bio->bi_private = bioc->private;
-	orig_bio->bi_end_io = bioc->end_io;
-
-	/*
-	 * Only send an error to the higher layers if it is beyond the tolerance
-	 * threshold.
-	 */
-	if (atomic_read(&bioc->error) > bioc->max_errors)
-		orig_bio->bi_status = BLK_STS_IOERR;
-	else
-		orig_bio->bi_status = BLK_STS_OK;
-
-	if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
-		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
-		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
-	} else {
-		bio_endio(orig_bio);
-	}
-
-	btrfs_put_bioc(bioc);
+	bbio->bio.bi_end_io(&bbio->bio);
 }
 
 static void btrfs_end_bio(struct bio *bio)
 {
 	struct btrfs_io_stripe *stripe = bio->bi_private;
 	struct btrfs_io_context *bioc = stripe->bioc;
+	struct bio *orig_bio = bioc->orig_bio;
+	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
 
 	if (bio->bi_status) {
 		atomic_inc(&bioc->error);
-		if (bio->bi_status == BLK_STS_IOERR ||
-		    bio->bi_status == BLK_STS_TARGET) {
+		if (stripe->dev && stripe->dev->bdev &&
+		    (bio->bi_status == BLK_STS_IOERR ||
+		     bio->bi_status == BLK_STS_TARGET)) {
 			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
 				btrfs_dev_stat_inc_and_print(stripe->dev,
 						BTRFS_DEV_STAT_WRITE_ERRS);
@@ -6678,12 +6652,35 @@ static void btrfs_end_bio(struct bio *bio)
 		}
 	}
 
-	if (bio != bioc->orig_bio)
+	btrfs_bio_counter_dec(bioc->fs_info);
+
+	if (bio != orig_bio) {
+		bio_endio(orig_bio);
 		bio_put(bio);
+		return;
+	}
 
-	btrfs_bio_counter_dec(bioc->fs_info);
-	if (atomic_dec_and_test(&bioc->stripes_pending))
-		btrfs_end_bioc(bioc, true);
+	/*
+	 * Only send an error to the higher layers if it is beyond the tolerance
+	 * threshold.
+	 */
+	if (atomic_read(&bioc->error) > bioc->max_errors)
+		orig_bio->bi_status = BLK_STS_IOERR;
+	else
+		orig_bio->bi_status = BLK_STS_OK;
+
+	bbio->mirror_num = bioc->mirror_num;
+	orig_bio->bi_end_io = bioc->end_io;
+	orig_bio->bi_private = bioc->private;
+	if (btrfs_op(orig_bio) == BTRFS_MAP_READ) {
+		bbio->device = stripe->dev;
+		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
+		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
+	} else {
+		orig_bio->bi_end_io(orig_bio);
+	}
+
+	btrfs_put_bioc(bioc);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc,
@@ -6694,28 +6691,30 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 	u64 physical = bioc->stripes[dev_nr].physical;
 	struct bio *bio;
 
-	if (!dev || !dev->bdev ||
-	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
-	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
-	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-		atomic_inc(&bioc->error);
-		if (atomic_dec_and_test(&bioc->stripes_pending))
-			btrfs_end_bioc(bioc, false);
-		return;
-	}
-
 	if (clone) {
-		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
+		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
+		bio_inc_remaining(orig_bio);
 	} else {
 		bio = orig_bio;
-		bio_set_dev(bio, dev->bdev);
-		btrfs_bio(bio)->device = dev;
 	}
 
 	bioc->stripes[dev_nr].bioc = bioc;
 	bio->bi_private = &bioc->stripes[dev_nr];
 	bio->bi_end_io = btrfs_end_bio;
 	bio->bi_iter.bi_sector = physical >> 9;
+
+	btrfs_bio_counter_inc_noblocked(fs_info);
+
+	if (!dev || !dev->bdev ||
+	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
+	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
+	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
+		bio_io_error(bio);
+		return;
+	}
+
+	bio_set_dev(bio, dev->bdev);
+
 	/*
 	 * For zone append writing, bi_sector must point the beginning of the
 	 * zone
@@ -6736,8 +6735,6 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 		(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
 		dev->devid, bio->bi_iter.bi_size);
 
-	btrfs_bio_counter_inc_noblocked(fs_info);
-
 	btrfsic_check_bio(bio);
 	submit_bio(bio);
 }
@@ -6767,7 +6764,6 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	bioc->orig_bio = bio;
 	bioc->private = bio->bi_private;
 	bioc->end_io = bio->bi_end_io;
-	atomic_set(&bioc->stripes_pending, total_devs);
 
 	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index cc9966fe0e517..05713adbbf499 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -444,7 +444,6 @@ struct btrfs_discard_stripe {
  */
 struct btrfs_io_context {
 	refcount_t refs;
-	atomic_t stripes_pending;
 	struct btrfs_fs_info *fs_info;
 	u64 map_type; /* get from map_lookup->type */
 	bio_end_io_t *end_io;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 04/10] btrfs: remove the raid56_parity_write return value
  2022-06-17 10:04 ` [PATCH 04/10] btrfs: remove the raid56_parity_write " Christoph Hellwig
@ 2022-06-17 10:38   ` Qu Wenruo
  2022-06-18 11:04   ` Johannes Thumshirn
  1 sibling, 0 replies; 40+ messages in thread
From: Qu Wenruo @ 2022-06-17 10:38 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 2022/6/17 18:04, Christoph Hellwig wrote:
> Always consume the bio and call the end_io handler on error instead of
> returning an error and letting the caller handle it.  This matches what
> the block layer submission does and avoids any confusion on who
> needs to handle errors.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu
> ---
>   fs/btrfs/raid56.c  | 23 +++++++++++++++--------
>   fs/btrfs/raid56.h  |  2 +-
>   fs/btrfs/volumes.c |  2 +-
>   3 files changed, 17 insertions(+), 10 deletions(-)
>
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index 233dcef6ce3ad..c0cc2d99509c9 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -1803,18 +1803,19 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
>   /*
>    * our main entry point for writes from the rest of the FS.
>    */
> -int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
> +void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
>   {
>   	struct btrfs_fs_info *fs_info = bioc->fs_info;
>   	struct btrfs_raid_bio *rbio;
>   	struct btrfs_plug_cb *plug = NULL;
>   	struct blk_plug_cb *cb;
> -	int ret;
> +	int ret = 0;
>
>   	rbio = alloc_rbio(fs_info, bioc);
>   	if (IS_ERR(rbio)) {
>   		btrfs_put_bioc(bioc);
> -		return PTR_ERR(rbio);
> +		ret = PTR_ERR(rbio);
> +		goto out;
>   	}
>   	rbio->operation = BTRFS_RBIO_WRITE;
>   	rbio_add_bio(rbio, bio);
> @@ -1829,8 +1830,8 @@ int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
>   	if (rbio_is_full(rbio)) {
>   		ret = full_stripe_write(rbio);
>   		if (ret)
> -			btrfs_bio_counter_dec(fs_info);
> -		return ret;
> +			goto out_dec_counter;
> +		return;
>   	}
>
>   	cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
> @@ -1841,13 +1842,19 @@ int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
>   			INIT_LIST_HEAD(&plug->rbio_list);
>   		}
>   		list_add_tail(&rbio->plug_list, &plug->rbio_list);
> -		ret = 0;
>   	} else {
>   		ret = __raid56_parity_write(rbio);
>   		if (ret)
> -			btrfs_bio_counter_dec(fs_info);
> +			goto out_dec_counter;
>   	}
> -	return ret;
> +
> +	return;
> +
> +out_dec_counter:
> +	btrfs_bio_counter_dec(fs_info);
> +out:
> +	bio->bi_status = errno_to_blk_status(ret);
> +	bio_endio(bio);
>   }
>
>   /*
> diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
> index 1dce205b79bf9..3f223ae39462a 100644
> --- a/fs/btrfs/raid56.h
> +++ b/fs/btrfs/raid56.h
> @@ -167,7 +167,7 @@ struct btrfs_device;
>
>   int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>   			  int mirror_num, int generic_io);
> -int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
> +void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
>
>   void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
>   			    unsigned int pgoff, u64 logical);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index bdcefc19cd51e..b30d4449ef18d 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6768,7 +6768,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
>   	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
>   		if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> -			ret = raid56_parity_write(bio, bioc);
> +			raid56_parity_write(bio, bioc);
>   		else
>   			ret = raid56_parity_recover(bio, bioc, mirror_num, 1);
>   		goto out_dec;

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 04/10] btrfs: remove the raid56_parity_write return value
  2022-06-17 10:04 ` [PATCH 04/10] btrfs: remove the raid56_parity_write " Christoph Hellwig
  2022-06-17 10:38   ` Qu Wenruo
@ 2022-06-18 11:04   ` Johannes Thumshirn
  1 sibling, 0 replies; 40+ messages in thread
From: Johannes Thumshirn @ 2022-06-18 11:04 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/10] btrfs: remove the raid56_parity_recover return value
  2022-06-17 10:04 ` [PATCH 05/10] btrfs: remove the raid56_parity_recover " Christoph Hellwig
@ 2022-06-18 11:06   ` Johannes Thumshirn
  2022-06-19  6:35     ` Christoph Hellwig
  2022-06-19 10:35   ` Qu Wenruo
  1 sibling, 1 reply; 40+ messages in thread
From: Johannes Thumshirn @ 2022-06-18 11:06 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs

On 17.06.22 12:04, Christoph Hellwig wrote:
> -	ret = lock_stripe_add(rbio);
> +	if (lock_stripe_add(rbio))
> +		return;
>  

I kinda have the feeling lock_stripe_add() should return bool,
but that's out of scope for this patch.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/10] btrfs: remove the raid56_parity_recover return value
  2022-06-18 11:06   ` Johannes Thumshirn
@ 2022-06-19  6:35     ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-19  6:35 UTC (permalink / raw)
  To: Johannes Thumshirn
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Sat, Jun 18, 2022 at 11:06:06AM +0000, Johannes Thumshirn wrote:
> On 17.06.22 12:04, Christoph Hellwig wrote:
> > -	ret = lock_stripe_add(rbio);
> > +	if (lock_stripe_add(rbio))
> > +		return;
> >  
> 
> I kinda have the feeling lock_stripe_add() should return bool,
> but that's out of scope for this patch.

Yes, it should.  But it does not fit this patch, and even the series
seems big enough already as-is..

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/10] btrfs: remove the raid56_parity_recover return value
  2022-06-17 10:04 ` [PATCH 05/10] btrfs: remove the raid56_parity_recover " Christoph Hellwig
  2022-06-18 11:06   ` Johannes Thumshirn
@ 2022-06-19 10:35   ` Qu Wenruo
  1 sibling, 0 replies; 40+ messages in thread
From: Qu Wenruo @ 2022-06-19 10:35 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 2022/6/17 18:04, Christoph Hellwig wrote:
> Always consume the bio and call the end_io handler on error instead of
> returning an error and letting the caller handle it.  This matches what
> the block layer submission does and avoids any confusion on who
> needs to handle errors.
>
> Also use the proper bool type for the generic_io argument.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu
> ---
>   fs/btrfs/raid56.c  | 39 ++++++++++++++++-----------------------
>   fs/btrfs/raid56.h  |  4 ++--
>   fs/btrfs/scrub.c   | 10 ++--------
>   fs/btrfs/volumes.c |  2 +-
>   4 files changed, 21 insertions(+), 34 deletions(-)
>
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index c0cc2d99509c9..cd39c233dfdeb 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -2200,12 +2200,11 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
>    * so we assume the bio they send down corresponds to a failed part
>    * of the drive.
>    */
> -int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
> -			  int mirror_num, int generic_io)
> +void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
> +			   int mirror_num, bool generic_io)
>   {
>   	struct btrfs_fs_info *fs_info = bioc->fs_info;
>   	struct btrfs_raid_bio *rbio;
> -	int ret;
>
>   	if (generic_io) {
>   		ASSERT(bioc->mirror_num == mirror_num);
> @@ -2214,9 +2213,8 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>
>   	rbio = alloc_rbio(fs_info, bioc);
>   	if (IS_ERR(rbio)) {
> -		if (generic_io)
> -			btrfs_put_bioc(bioc);
> -		return PTR_ERR(rbio);
> +		bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
> +		goto out_end_bio;
>   	}
>
>   	rbio->operation = BTRFS_RBIO_READ_REBUILD;
> @@ -2228,10 +2226,9 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>   "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
>   			   __func__, bio->bi_iter.bi_sector << 9,
>   			   (u64)bio->bi_iter.bi_size, bioc->map_type);
> -		if (generic_io)
> -			btrfs_put_bioc(bioc);
>   		kfree(rbio);
> -		return -EIO;
> +		bio->bi_status = BLK_STS_IOERR;
> +		goto out_end_bio;
>   	}
>
>   	if (generic_io) {
> @@ -2258,24 +2255,20 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>   			rbio->failb--;
>   	}
>
> -	ret = lock_stripe_add(rbio);
> +	if (lock_stripe_add(rbio))
> +		return;
>
>   	/*
> -	 * __raid56_parity_recover will end the bio with
> -	 * any errors it hits.  We don't want to return
> -	 * its error value up the stack because our caller
> -	 * will end up calling bio_endio with any nonzero
> -	 * return
> +	 * This adds our rbio to the list of rbios that will be handled after
> +	 * the current lock owner is done.
>   	 */
> -	if (ret == 0)
> -		__raid56_parity_recover(rbio);
> -	/*
> -	 * our rbio has been added to the list of
> -	 * rbios that will be handled after the
> -	 * currently lock owner is done
> -	 */
> -	return 0;
> +	__raid56_parity_recover(rbio);
> +	return;
>
> +out_end_bio:
> +	if (generic_io)
> +		btrfs_put_bioc(bioc);
> +	bio_endio(bio);
>   }
>
>   static void rmw_work(struct work_struct *work)
> diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
> index 3f223ae39462a..6f48f9e4c8694 100644
> --- a/fs/btrfs/raid56.h
> +++ b/fs/btrfs/raid56.h
> @@ -165,8 +165,8 @@ static inline int nr_data_stripes(const struct map_lookup *map)
>
>   struct btrfs_device;
>
> -int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
> -			  int mirror_num, int generic_io);
> +void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
> +			   int mirror_num, bool generic_io);
>   void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
>
>   void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index ad7958d18158f..3afe5fa50a631 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -1376,18 +1376,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
>   					struct scrub_sector *sector)
>   {
>   	DECLARE_COMPLETION_ONSTACK(done);
> -	int ret;
> -	int mirror_num;
>
>   	bio->bi_iter.bi_sector = sector->logical >> 9;
>   	bio->bi_private = &done;
>   	bio->bi_end_io = scrub_bio_wait_endio;
> -
> -	mirror_num = sector->sblock->sectors[0]->mirror_num;
> -	ret = raid56_parity_recover(bio, sector->recover->bioc,
> -				    mirror_num, 0);
> -	if (ret)
> -		return ret;
> +	raid56_parity_recover(bio, sector->recover->bioc,
> +			      sector->sblock->sectors[0]->mirror_num, false);
>
>   	wait_for_completion_io(&done);
>   	return blk_status_to_errno(bio->bi_status);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index b30d4449ef18d..844ad637a0269 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6770,7 +6770,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   		if (btrfs_op(bio) == BTRFS_MAP_WRITE)
>   			raid56_parity_write(bio, bioc);
>   		else
> -			ret = raid56_parity_recover(bio, bioc, mirror_num, 1);
> +			raid56_parity_recover(bio, bioc, mirror_num, true);
>   		goto out_dec;
>   	}
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-17 10:04 ` [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers Christoph Hellwig
@ 2022-06-19 10:45   ` Qu Wenruo
  2022-06-19 21:50     ` Qu Wenruo
  2022-06-20  7:37     ` Christoph Hellwig
  0 siblings, 2 replies; 40+ messages in thread
From: Qu Wenruo @ 2022-06-19 10:45 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 2022/6/17 18:04, Christoph Hellwig wrote:
> Transfer the bio counter reference acquired by btrfs_submit_bio to
> raid56_parity_write and raid56_parity_recovery together with the bio that
> the reference was acuired for instead of acquiring another reference in
> those helpers and dropping the original one in btrfs_submit_bio.

Btrfs_submit_bio() has called btrfs_bio_counter_inc_blocked(), then call
btrfs_bio_counter_dec() in its out_dec: tag.

Thus the bio counter is already paired.

Then why we want to dec the counter again in RAID56 path?

Or did I miss some patches in the past modifying the behavior?

Thanks,
Qu

>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/raid56.c  | 16 ++++++----------
>   fs/btrfs/volumes.c | 15 +++++++--------
>   2 files changed, 13 insertions(+), 18 deletions(-)
>
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index cd39c233dfdeb..00a0a2d472d88 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -1815,12 +1815,11 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
>   	if (IS_ERR(rbio)) {
>   		btrfs_put_bioc(bioc);
>   		ret = PTR_ERR(rbio);
> -		goto out;
> +		goto out_dec_counter;
>   	}
>   	rbio->operation = BTRFS_RBIO_WRITE;
>   	rbio_add_bio(rbio, bio);
>
> -	btrfs_bio_counter_inc_noblocked(fs_info);
>   	rbio->generic_bio_cnt = 1;
>
>   	/*
> @@ -1852,7 +1851,6 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
>
>   out_dec_counter:
>   	btrfs_bio_counter_dec(fs_info);
> -out:
>   	bio->bi_status = errno_to_blk_status(ret);
>   	bio_endio(bio);
>   }
> @@ -2209,6 +2207,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>   	if (generic_io) {
>   		ASSERT(bioc->mirror_num == mirror_num);
>   		btrfs_bio(bio)->mirror_num = mirror_num;
> +	} else {
> +		btrfs_get_bioc(bioc);
>   	}
>
>   	rbio = alloc_rbio(fs_info, bioc);
> @@ -2231,12 +2231,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>   		goto out_end_bio;
>   	}
>
> -	if (generic_io) {
> -		btrfs_bio_counter_inc_noblocked(fs_info);
> +	if (generic_io)
>   		rbio->generic_bio_cnt = 1;
> -	} else {
> -		btrfs_get_bioc(bioc);
> -	}
>
>   	/*
>   	 * Loop retry:
> @@ -2266,8 +2262,8 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
>   	return;
>
>   out_end_bio:
> -	if (generic_io)
> -		btrfs_put_bioc(bioc);
> +	btrfs_bio_counter_dec(fs_info);
> +	btrfs_put_bioc(bioc);
>   	bio_endio(bio);
>   }
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 844ad637a0269..fea139d628c04 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6756,8 +6756,12 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   	btrfs_bio_counter_inc_blocked(fs_info);
>   	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
>   				&map_length, &bioc, mirror_num, 1);
> -	if (ret)
> -		goto out_dec;
> +	if (ret) {
> +		btrfs_bio_counter_dec(fs_info);
> +		bio->bi_status = errno_to_blk_status(ret);
> +		bio_endio(bio);
> +		return;
> +	}
>
>   	total_devs = bioc->num_stripes;
>   	bioc->orig_bio = bio;
> @@ -6771,7 +6775,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   			raid56_parity_write(bio, bioc);
>   		else
>   			raid56_parity_recover(bio, bioc, mirror_num, true);
> -		goto out_dec;
> +		return;
>   	}
>
>   	if (map_length < length) {
> @@ -6786,12 +6790,7 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>
>   		submit_stripe_bio(bioc, bio, dev_nr, should_clone);
>   	}
> -out_dec:
>   	btrfs_bio_counter_dec(fs_info);
> -	if (ret) {
> -		bio->bi_status = errno_to_blk_status(ret);
> -		bio_endio(bio);
> -	}
>   }
>
>   static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-19 10:45   ` Qu Wenruo
@ 2022-06-19 21:50     ` Qu Wenruo
  2022-06-20  7:47       ` Christoph Hellwig
  2022-06-20  7:37     ` Christoph Hellwig
  1 sibling, 1 reply; 40+ messages in thread
From: Qu Wenruo @ 2022-06-19 21:50 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 2022/6/19 18:45, Qu Wenruo wrote:
>
>
> On 2022/6/17 18:04, Christoph Hellwig wrote:
>> Transfer the bio counter reference acquired by btrfs_submit_bio to
>> raid56_parity_write and raid56_parity_recovery together with the bio that
>> the reference was acuired for instead of acquiring another reference in
>> those helpers and dropping the original one in btrfs_submit_bio.
>
> Btrfs_submit_bio() has called btrfs_bio_counter_inc_blocked(), then call
> btrfs_bio_counter_dec() in its out_dec: tag.
>
> Thus the bio counter is already paired.
>
> Then why we want to dec the counter again in RAID56 path?
>
> Or did I miss some patches in the past modifying the behavior?

In fact, the bio counter for btrfs_map_bio() is just increased and to
allow the real bios (either the RAID56 code, or submit_stripe_bio()) to
grab extra counter to cover the full lifespan of the real bio.

Thus I don't think there is any bio counter to be "transferred" here.

Yes, you can argue in that case, btrfs_map_bio() should not grab the
counter instead, but unfortunately I'm not able to answer if that's the
case.

But for now, all the existing call patterns are that, the real bios
submission code grab the counter to cover the full lifespan of the bios,
while the counter from btrfs_map_bio() really only covers its execution
time (which is not covering the underlying bios)

Thanks,
Qu
>
> Thanks,
> Qu
>
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>   fs/btrfs/raid56.c  | 16 ++++++----------
>>   fs/btrfs/volumes.c | 15 +++++++--------
>>   2 files changed, 13 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
>> index cd39c233dfdeb..00a0a2d472d88 100644
>> --- a/fs/btrfs/raid56.c
>> +++ b/fs/btrfs/raid56.c
>> @@ -1815,12 +1815,11 @@ void raid56_parity_write(struct bio *bio,
>> struct btrfs_io_context *bioc)
>>       if (IS_ERR(rbio)) {
>>           btrfs_put_bioc(bioc);
>>           ret = PTR_ERR(rbio);
>> -        goto out;
>> +        goto out_dec_counter;
>>       }
>>       rbio->operation = BTRFS_RBIO_WRITE;
>>       rbio_add_bio(rbio, bio);
>>
>> -    btrfs_bio_counter_inc_noblocked(fs_info);
>>       rbio->generic_bio_cnt = 1;
>>
>>       /*
>> @@ -1852,7 +1851,6 @@ void raid56_parity_write(struct bio *bio, struct
>> btrfs_io_context *bioc)
>>
>>   out_dec_counter:
>>       btrfs_bio_counter_dec(fs_info);
>> -out:
>>       bio->bi_status = errno_to_blk_status(ret);
>>       bio_endio(bio);
>>   }
>> @@ -2209,6 +2207,8 @@ void raid56_parity_recover(struct bio *bio,
>> struct btrfs_io_context *bioc,
>>       if (generic_io) {
>>           ASSERT(bioc->mirror_num == mirror_num);
>>           btrfs_bio(bio)->mirror_num = mirror_num;
>> +    } else {
>> +        btrfs_get_bioc(bioc);
>>       }
>>
>>       rbio = alloc_rbio(fs_info, bioc);
>> @@ -2231,12 +2231,8 @@ void raid56_parity_recover(struct bio *bio,
>> struct btrfs_io_context *bioc,
>>           goto out_end_bio;
>>       }
>>
>> -    if (generic_io) {
>> -        btrfs_bio_counter_inc_noblocked(fs_info);
>> +    if (generic_io)
>>           rbio->generic_bio_cnt = 1;
>> -    } else {
>> -        btrfs_get_bioc(bioc);
>> -    }
>>
>>       /*
>>        * Loop retry:
>> @@ -2266,8 +2262,8 @@ void raid56_parity_recover(struct bio *bio,
>> struct btrfs_io_context *bioc,
>>       return;
>>
>>   out_end_bio:
>> -    if (generic_io)
>> -        btrfs_put_bioc(bioc);
>> +    btrfs_bio_counter_dec(fs_info);
>> +    btrfs_put_bioc(bioc);
>>       bio_endio(bio);
>>   }
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 844ad637a0269..fea139d628c04 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -6756,8 +6756,12 @@ void btrfs_submit_bio(struct btrfs_fs_info
>> *fs_info, struct bio *bio,
>>       btrfs_bio_counter_inc_blocked(fs_info);
>>       ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
>>                   &map_length, &bioc, mirror_num, 1);
>> -    if (ret)
>> -        goto out_dec;
>> +    if (ret) {
>> +        btrfs_bio_counter_dec(fs_info);
>> +        bio->bi_status = errno_to_blk_status(ret);
>> +        bio_endio(bio);
>> +        return;
>> +    }
>>
>>       total_devs = bioc->num_stripes;
>>       bioc->orig_bio = bio;
>> @@ -6771,7 +6775,7 @@ void btrfs_submit_bio(struct btrfs_fs_info
>> *fs_info, struct bio *bio,
>>               raid56_parity_write(bio, bioc);
>>           else
>>               raid56_parity_recover(bio, bioc, mirror_num, true);
>> -        goto out_dec;
>> +        return;
>>       }
>>
>>       if (map_length < length) {
>> @@ -6786,12 +6790,7 @@ void btrfs_submit_bio(struct btrfs_fs_info
>> *fs_info, struct bio *bio,
>>
>>           submit_stripe_bio(bioc, bio, dev_nr, should_clone);
>>       }
>> -out_dec:
>>       btrfs_bio_counter_dec(fs_info);
>> -    if (ret) {
>> -        bio->bi_status = errno_to_blk_status(ret);
>> -        bio_endio(bio);
>> -    }
>>   }
>>
>>   static bool dev_args_match_fs_devices(const struct
>> btrfs_dev_lookup_args *args,

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-19 10:45   ` Qu Wenruo
  2022-06-19 21:50     ` Qu Wenruo
@ 2022-06-20  7:37     ` Christoph Hellwig
  2022-06-20  7:45       ` Qu Wenruo
  1 sibling, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20  7:37 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Sun, Jun 19, 2022 at 06:45:11PM +0800, Qu Wenruo wrote:
>> Transfer the bio counter reference acquired by btrfs_submit_bio to
>> raid56_parity_write and raid56_parity_recovery together with the bio that
>> the reference was acuired for instead of acquiring another reference in
>> those helpers and dropping the original one in btrfs_submit_bio.
>
> Btrfs_submit_bio() has called btrfs_bio_counter_inc_blocked(), then call
> btrfs_bio_counter_dec() in its out_dec: tag.
>
> Thus the bio counter is already paired.
>
> Then why we want to dec the counter again in RAID56 path?
>
> Or did I miss some patches in the past modifying the behavior?

The behviour before this patch is:

btrfs_submit_bio does:

	btrfs_bio_counter_inc_blocked
	call raid56_parity_write / raid56_parity_recover
	btrfs_bio_counter_dec

raid56_parity_write / raid56_parity_recover then do:

	btrfs_bio_counter_inc_noblocked
	btrfs_bio_counter_dec on error only

The new behavior is:

btrfs_submit_bio does:

	btrfs_bio_counter_inc_blocked
	call raid56_parity_write / raid56_parity_recover
	return

raid56_parity_write / raid56_parity_recover then do:

	btrfs_bio_counter_dec on error only

so no change in the final number of reference, but on less inc/dec
pair for the successful submission fast path.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-20  7:37     ` Christoph Hellwig
@ 2022-06-20  7:45       ` Qu Wenruo
  2022-06-20  7:49         ` Christoph Hellwig
  0 siblings, 1 reply; 40+ messages in thread
From: Qu Wenruo @ 2022-06-20  7:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs



On 2022/6/20 15:37, Christoph Hellwig wrote:
> On Sun, Jun 19, 2022 at 06:45:11PM +0800, Qu Wenruo wrote:
>>> Transfer the bio counter reference acquired by btrfs_submit_bio to
>>> raid56_parity_write and raid56_parity_recovery together with the bio that
>>> the reference was acuired for instead of acquiring another reference in
>>> those helpers and dropping the original one in btrfs_submit_bio.
>>
>> Btrfs_submit_bio() has called btrfs_bio_counter_inc_blocked(), then call
>> btrfs_bio_counter_dec() in its out_dec: tag.
>>
>> Thus the bio counter is already paired.
>>
>> Then why we want to dec the counter again in RAID56 path?
>>
>> Or did I miss some patches in the past modifying the behavior?
>
> The behviour before this patch is:
>
> btrfs_submit_bio does:
>
> 	btrfs_bio_counter_inc_blocked
> 	call raid56_parity_write / raid56_parity_recover
> 	btrfs_bio_counter_dec
>
> raid56_parity_write / raid56_parity_recover then do:
>
> 	btrfs_bio_counter_inc_noblocked
> 	btrfs_bio_counter_dec on error only
>
> The new behavior is:
>
> btrfs_submit_bio does:
>
> 	btrfs_bio_counter_inc_blocked
> 	call raid56_parity_write / raid56_parity_recover
> 	return
>
> raid56_parity_write / raid56_parity_recover then do:
>
> 	btrfs_bio_counter_dec on error only
>
> so no change in the final number of reference, but on less inc/dec
> pair for the successful submission fast path.

Oh, I see now, the patch only modified the lifespan of the counter for
RAID56 path, while the other profiles still go the old lifespan.

So the counter is still correct.

For the sake of consistency, is it possible that the other profiles also
follow the same behavior?

I guess it will need at least a counter to track how many pending bios
in btrfs_io_context so bio counter is only decreased for the last bio?

Thanks,
Qu

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-19 21:50     ` Qu Wenruo
@ 2022-06-20  7:47       ` Christoph Hellwig
  2022-06-20  8:03         ` Qu Wenruo
  0 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20  7:47 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Mon, Jun 20, 2022 at 05:50:53AM +0800, Qu Wenruo wrote:
> In fact, the bio counter for btrfs_map_bio() is just increased and to
> allow the real bios (either the RAID56 code, or submit_stripe_bio()) to
> grab extra counter to cover the full lifespan of the real bio.
>
> Thus I don't think there is any bio counter to be "transferred" here.

What is the real bio?

In the parity raid case there is:

 1) the upper level btrfs_bio, which is handed off to
    raid56_parity_write / raid56_parity_recover.
    It then to the bios list of the rbio and is eventually completed
 2) lower-level RAID bios, which have no direct connection to the
    btrfs_bio, as they are all driven off the rbio-based state
    machine

For the non-parity case we have

 1) the upper level btrfs_bio, which is submitted to the actual devic
    as the last mirror (write case) or the only bio (read case)
 2) the clones for the non-last mirror writes

btrfs_submit_bio calls btrfs_bio_counter_inc_blocked before
__btrfs_map_block to protect against device replace operations,
and that protection needs to last until the last bio using the
mapping returned from __btrfs_map_block has completed.

So we don't need an extra count for the parity case.  In fact we
don't really need an extra count either for the non-parity case
except for additional mirror writes.  So maybe things are cleaned
up if we also add this patch (which relies on the previous ones in
this series):

---
From 6351835b133ce00e2d65a6b2a398678b45426947 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Jun 2022 09:43:48 +0200
Subject: btrfs: don't take a bio_counter reference for cloned bios

There is no need for multiple bio_counter references for a single I/O.
Just release the reference when completing the bio and avoid additional
counter roundtrips.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/ctree.h       | 1 -
 fs/btrfs/dev-replace.c | 5 -----
 fs/btrfs/volumes.c     | 7 ++-----
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1347c92234a56..6857897c77108 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3972,7 +3972,6 @@ static inline void btrfs_init_full_stripe_locks_tree(
 
 /* dev-replace.c */
 void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
-void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info);
 void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);
 
 static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a7dd6ba25e990..aa435d04e8ef3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -1288,11 +1288,6 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
 	return 1;
 }
 
-void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
-{
-	percpu_counter_inc(&fs_info->dev_replace.bio_counter);
-}
-
 void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
 {
 	percpu_counter_sub(&fs_info->dev_replace.bio_counter, amount);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 33a232c897d14..86e200d2000f9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6647,14 +6647,14 @@ static void btrfs_end_bio(struct bio *bio)
 		}
 	}
 
-	btrfs_bio_counter_dec(bioc->fs_info);
-
 	if (bio != orig_bio) {
 		bio_endio(orig_bio);
 		bio_put(bio);
 		return;
 	}
 
+	btrfs_bio_counter_dec(bioc->fs_info);
+
 	/*
 	 * Only send an error to the higher layers if it is beyond the tolerance
 	 * threshold.
@@ -6698,8 +6698,6 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
 	bio->bi_end_io = btrfs_end_bio;
 	bio->bi_iter.bi_sector = physical >> 9;
 
-	btrfs_bio_counter_inc_noblocked(fs_info);
-
 	if (!dev || !dev->bdev ||
 	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
 	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
@@ -6781,7 +6779,6 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
 		submit_stripe_bio(bioc, bio, dev_nr, should_clone);
 	}
-	btrfs_bio_counter_dec(fs_info);
 }
 
 static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-20  7:45       ` Qu Wenruo
@ 2022-06-20  7:49         ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20  7:49 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Mon, Jun 20, 2022 at 03:45:45PM +0800, Qu Wenruo wrote:
> For the sake of consistency, is it possible that the other profiles also
> follow the same behavior?

Yes.  I actually just wrote the patch for that after reading your
other mail.  Still needs more testing of course.

> I guess it will need at least a counter to track how many pending bios
> in btrfs_io_context so bio counter is only decreased for the last bio?

We already do that using the bi_pending counter in the bio.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-20  7:47       ` Christoph Hellwig
@ 2022-06-20  8:03         ` Qu Wenruo
  2022-06-20  8:09           ` Christoph Hellwig
  0 siblings, 1 reply; 40+ messages in thread
From: Qu Wenruo @ 2022-06-20  8:03 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs



On 2022/6/20 15:47, Christoph Hellwig wrote:
> On Mon, Jun 20, 2022 at 05:50:53AM +0800, Qu Wenruo wrote:
>> In fact, the bio counter for btrfs_map_bio() is just increased and to
>> allow the real bios (either the RAID56 code, or submit_stripe_bio()) to
>> grab extra counter to cover the full lifespan of the real bio.
>>
>> Thus I don't think there is any bio counter to be "transferred" here.

Well, your reply already make it very clear that that question is incorrect.

>
> What is the real bio?

At least to me, it is more like a meta bio representing the write for
logical address.

E.g. if we have a write for a logical address, and the underlying chunk
is RAID1, then when both copies finished their endio, the bio counter
should be 0.
(For current code base, during the bio assemble/submission, the counter
can easily go beyond 1, which can be cleaned up in the future).

>
> In the parity raid case there is:
>
>   1) the upper level btrfs_bio, which is handed off to
>      raid56_parity_write / raid56_parity_recover.
>      It then to the bios list of the rbio and is eventually completed
>   2) lower-level RAID bios, which have no direct connection to the
>      btrfs_bio, as they are all driven off the rbio-based state
>      machine
>
> For the non-parity case we have
>
>   1) the upper level btrfs_bio, which is submitted to the actual devic
>      as the last mirror (write case) or the only bio (read case)
>   2) the clones for the non-last mirror writes
>
> btrfs_submit_bio calls btrfs_bio_counter_inc_blocked before
> __btrfs_map_block to protect against device replace operations,
> and that protection needs to last until the last bio using the
> mapping returned from __btrfs_map_block has completed.
>
> So we don't need an extra count for the parity case.

Yep, I can totally agree now.

>  In fact we
> don't really need an extra count either for the non-parity case
> except for additional mirror writes.  So maybe things are cleaned
> up if we also add this patch (which relies on the previous ones in
> this series):

Sure, the series is already a little large, we can definitely do more
cleanup later.

But in that case, mind to put this patch into the series do all the bio
counter cleanups?

AFAIK there is no dependency in the patchset on this patch, it would be
much better to be in a series doing bio counters.

Thanks,
Qu
>
> ---
>  From 6351835b133ce00e2d65a6b2a398678b45426947 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Mon, 20 Jun 2022 09:43:48 +0200
> Subject: btrfs: don't take a bio_counter reference for cloned bios
>
> There is no need for multiple bio_counter references for a single I/O.
> Just release the reference when completing the bio and avoid additional
> counter roundtrips.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/ctree.h       | 1 -
>   fs/btrfs/dev-replace.c | 5 -----
>   fs/btrfs/volumes.c     | 7 ++-----
>   3 files changed, 2 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 1347c92234a56..6857897c77108 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3972,7 +3972,6 @@ static inline void btrfs_init_full_stripe_locks_tree(
>
>   /* dev-replace.c */
>   void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
> -void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info);
>   void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);
>
>   static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
> diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
> index a7dd6ba25e990..aa435d04e8ef3 100644
> --- a/fs/btrfs/dev-replace.c
> +++ b/fs/btrfs/dev-replace.c
> @@ -1288,11 +1288,6 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
>   	return 1;
>   }
>
> -void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
> -{
> -	percpu_counter_inc(&fs_info->dev_replace.bio_counter);
> -}
> -
>   void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
>   {
>   	percpu_counter_sub(&fs_info->dev_replace.bio_counter, amount);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 33a232c897d14..86e200d2000f9 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6647,14 +6647,14 @@ static void btrfs_end_bio(struct bio *bio)
>   		}
>   	}
>
> -	btrfs_bio_counter_dec(bioc->fs_info);
> -
>   	if (bio != orig_bio) {
>   		bio_endio(orig_bio);
>   		bio_put(bio);
>   		return;
>   	}
>
> +	btrfs_bio_counter_dec(bioc->fs_info);
> +
>   	/*
>   	 * Only send an error to the higher layers if it is beyond the tolerance
>   	 * threshold.
> @@ -6698,8 +6698,6 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
>   	bio->bi_end_io = btrfs_end_bio;
>   	bio->bi_iter.bi_sector = physical >> 9;
>
> -	btrfs_bio_counter_inc_noblocked(fs_info);
> -
>   	if (!dev || !dev->bdev ||
>   	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
>   	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
> @@ -6781,7 +6779,6 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>
>   		submit_stripe_bio(bioc, bio, dev_nr, should_clone);
>   	}
> -	btrfs_bio_counter_dec(fs_info);
>   }
>
>   static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers
  2022-06-20  8:03         ` Qu Wenruo
@ 2022-06-20  8:09           ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20  8:09 UTC (permalink / raw)
  To: Qu Wenruo
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Mon, Jun 20, 2022 at 04:03:28PM +0800, Qu Wenruo wrote:
> But in that case, mind to put this patch into the series do all the bio
> counter cleanups?
>
> AFAIK there is no dependency in the patchset on this patch, it would be
> much better to be in a series doing bio counters.

It heavily depends on the previous changes.  Both on the fact that
all cleanup is done through the endio handler, andthe removal of the
stripes_pending field.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-17 10:04 ` [PATCH 10/10] btrfs: remove bioc->stripes_pending Christoph Hellwig
@ 2022-06-20  8:18   ` Nikolay Borisov
  2022-06-20  8:34     ` Nikolay Borisov
  2022-06-20  8:53     ` Christoph Hellwig
  2022-06-22 16:07   ` David Sterba
  1 sibling, 2 replies; 40+ messages in thread
From: Nikolay Borisov @ 2022-06-20  8:18 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 17.06.22 г. 13:04 ч., Christoph Hellwig wrote:
> Replace the stripes_pending field with the pending counter in the bio.
> This avoid an extra field and prepares splitting the btrfs_bio at the
> stripe boundary.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/btrfs/volumes.c | 100 ++++++++++++++++++++++-----------------------
>   fs/btrfs/volumes.h |   1 -
>   2 files changed, 48 insertions(+), 53 deletions(-)
> 

<snip>

>   
>   static void btrfs_end_bio(struct bio *bio)
>   {
>   	struct btrfs_io_stripe *stripe = bio->bi_private;
>   	struct btrfs_io_context *bioc = stripe->bioc;
> +	struct bio *orig_bio = bioc->orig_bio;
> +	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
>   
>   	if (bio->bi_status) {
>   		atomic_inc(&bioc->error);
> -		if (bio->bi_status == BLK_STS_IOERR ||
> -		    bio->bi_status == BLK_STS_TARGET) {
> +		if (stripe->dev && stripe->dev->bdev &&
> +		    (bio->bi_status == BLK_STS_IOERR ||
> +		     bio->bi_status == BLK_STS_TARGET)) {
>   			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
>   				btrfs_dev_stat_inc_and_print(stripe->dev,
>   						BTRFS_DEV_STAT_WRITE_ERRS);
> @@ -6678,12 +6652,35 @@ static void btrfs_end_bio(struct bio *bio)
>   		}
>   	}
>   
> -	if (bio != bioc->orig_bio)
> +	btrfs_bio_counter_dec(bioc->fs_info);
> +
> +	if (bio != orig_bio) {
> +		bio_endio(orig_bio);
>   		bio_put(bio);
> +		return;
> +	}

What if the the currently completed stripe bio is the last - then 
bio_endio would be called for orig_bio, which will executed bi_end_io() 
in softirq context, but for reads we want to execute this in process 
context (as per the below code) ?


>   
> -	btrfs_bio_counter_dec(bioc->fs_info);
> -	if (atomic_dec_and_test(&bioc->stripes_pending))
> -		btrfs_end_bioc(bioc, true);
> +	/*
> +	 * Only send an error to the higher layers if it is beyond the tolerance
> +	 * threshold.
> +	 */
> +	if (atomic_read(&bioc->error) > bioc->max_errors)
> +		orig_bio->bi_status = BLK_STS_IOERR;
> +	else
> +		orig_bio->bi_status = BLK_STS_OK;
> +
> +	bbio->mirror_num = bioc->mirror_num;
> +	orig_bio->bi_end_io = bioc->end_io;
> +	orig_bio->bi_private = bioc->private;
> +	if (btrfs_op(orig_bio) == BTRFS_MAP_READ) {
> +		bbio->device = stripe->dev;
> +		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
> +		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
> +	} else {
> +		orig_bio->bi_end_io(orig_bio);
> +	}
> +
> +	btrfs_put_bioc(bioc);

The old code guaranteed that btrfs_end_bioc() is executed when the last 
stripe bio was completed. With this new scheme, say we have 3 bios - 2 
cloned, 1 being the orig, what guarantees that the orig_bio won't be 
finished before the remaining 2 (or 1) cloned/stripe bios thus calling 
btrfs_end_bio() on orig bio with __bi_remaining potentially being 2 or 1 
and finally calling orig_bio->bi_end_io() again with __bi_remaining 
being elevated?

>   }
>   
>   static void submit_stripe_bio(struct btrfs_io_context *bioc,
> @@ -6694,28 +6691,30 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
>   	u64 physical = bioc->stripes[dev_nr].physical;
>   	struct bio *bio;
>   
> -	if (!dev || !dev->bdev ||
> -	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
> -	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
> -	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
> -		atomic_inc(&bioc->error);
> -		if (atomic_dec_and_test(&bioc->stripes_pending))
> -			btrfs_end_bioc(bioc, false);
> -		return;
> -	}
> -
>   	if (clone) {
> -		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
> +		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
> +		bio_inc_remaining(orig_bio);

When cloning why aren't you passing dev->bdev but instead set it after 
the checks via bio_set_dev ? Is it because of the

if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION))

check inside bio_endio in case bio_io_error is called in submit_stripe_bio?

<snip>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-20  8:18   ` Nikolay Borisov
@ 2022-06-20  8:34     ` Nikolay Borisov
  2022-06-20  8:53     ` Christoph Hellwig
  1 sibling, 0 replies; 40+ messages in thread
From: Nikolay Borisov @ 2022-06-20  8:34 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 20.06.22 г. 11:18 ч., Nikolay Borisov wrote:
> 
> 
> On 17.06.22 г. 13:04 ч., Christoph Hellwig wrote:
>> Replace the stripes_pending field with the pending counter in the bio.
>> This avoid an extra field and prepares splitting the btrfs_bio at the
>> stripe boundary.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>   fs/btrfs/volumes.c | 100 ++++++++++++++++++++++-----------------------
>>   fs/btrfs/volumes.h |   1 -
>>   2 files changed, 48 insertions(+), 53 deletions(-)
>>
> 
> <snip>
>> @@ -6678,12 +6652,35 @@ static void btrfs_end_bio(struct bio *bio)
>>           }
>>       }
>> -    if (bio != bioc->orig_bio)
>> +    btrfs_bio_counter_dec(bioc->fs_info);
>> +
>> +    if (bio != orig_bio) {
>> +        bio_endio(orig_bio);
>>           bio_put(bio);
>> +        return;
>> +    }
> 
> What if the the currently completed stripe bio is the last - then 
> bio_endio would be called for orig_bio, which will executed bi_end_io() 
> in softirq context, but for reads we want to execute this in process 
> context (as per the below code) ?

Ok, that's moot since for reads we have 1 bio, meaning this if() is 
always a null op in the read case.

> 
> 
>> -    btrfs_bio_counter_dec(bioc->fs_info);
>> -    if (atomic_dec_and_test(&bioc->stripes_pending))
>> -        btrfs_end_bioc(bioc, true);
>> +    /*
>> +     * Only send an error to the higher layers if it is beyond the 
>> tolerance
>> +     * threshold.
>> +     */
>> +    if (atomic_read(&bioc->error) > bioc->max_errors)
>> +        orig_bio->bi_status = BLK_STS_IOERR;
>> +    else
>> +        orig_bio->bi_status = BLK_STS_OK;
>> +
>> +    bbio->mirror_num = bioc->mirror_num;
>> +    orig_bio->bi_end_io = bioc->end_io;
>> +    orig_bio->bi_private = bioc->private;
>> +    if (btrfs_op(orig_bio) == BTRFS_MAP_READ) {
>> +        bbio->device = stripe->dev;
>> +        INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
>> +        queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
>> +    } else {
>> +        orig_bio->bi_end_io(orig_bio);
>> +    }
>> +
>> +    btrfs_put_bioc(bioc);
> 
> The old code guaranteed that btrfs_end_bioc() is executed when the last 
> stripe bio was completed. With this new scheme, say we have 3 bios - 2 
> cloned, 1 being the orig, what guarantees that the orig_bio won't be 
> finished before the remaining 2 (or 1) cloned/stripe bios thus calling 
> btrfs_end_bio() on orig bio with __bi_remaining potentially being 2 or 1 
> and finally calling orig_bio->bi_end_io() again with __bi_remaining 
> being elevated?

However this still stands.

> 
>>   }
>>   static void submit_stripe_bio(struct btrfs_io_context *bioc,
>> @@ -6694,28 +6691,30 @@ static void submit_stripe_bio(struct 
>> btrfs_io_context *bioc,
>>       u64 physical = bioc->stripes[dev_nr].physical;
>>       struct bio *bio;
>> -    if (!dev || !dev->bdev ||
>> -        test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
>> -        (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
>> -         !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
>> -        atomic_inc(&bioc->error);
>> -        if (atomic_dec_and_test(&bioc->stripes_pending))
>> -            btrfs_end_bioc(bioc, false);
>> -        return;
>> -    }
>> -
>>       if (clone) {
>> -        bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, 
>> &fs_bio_set);
>> +        bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
>> +        bio_inc_remaining(orig_bio);
> 
> When cloning why aren't you passing dev->bdev but instead set it after 
> the checks via bio_set_dev ? Is it because of the
> 
> if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION))
> 
> check inside bio_endio in case bio_io_error is called in submit_stripe_bio?
> 
> <snip>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-20  8:18   ` Nikolay Borisov
  2022-06-20  8:34     ` Nikolay Borisov
@ 2022-06-20  8:53     ` Christoph Hellwig
  2022-06-20  9:34       ` Nikolay Borisov
  1 sibling, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20  8:53 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Mon, Jun 20, 2022 at 11:18:03AM +0300, Nikolay Borisov wrote:
> What if the the currently completed stripe bio is the last - then bio_endio 
> would be called for orig_bio, which will executed bi_end_io() in softirq 
> context, but for reads we want to execute this in process context (as per 
> the below code) ?

> The old code guaranteed that btrfs_end_bioc() is executed when the last 
> stripe bio was completed. With this new scheme, say we have 3 bios - 2 
> cloned, 1 being the orig, what guarantees that the orig_bio won't be 
> finished before the remaining 2 (or 1) cloned/stripe bios thus calling 
> btrfs_end_bio() on orig bio with __bi_remaining potentially being 2 or 1 
> and finally calling orig_bio->bi_end_io() again with __bi_remaining being 
> elevated?

This is what the bio_inc_remaining after the bio_alloc_clone takes care
of.  With that the remaining count in the original btrfs_bio is
incremented, which ensures that ->end_io for the originl bio is only
called when all other bios and the original one have completed.

Take a look at bio_endio and bio_remaining_done for the glory details.

>>   	if (clone) {
>> -		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
>> +		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
>> +		bio_inc_remaining(orig_bio);
>
> When cloning why aren't you passing dev->bdev but instead set it after the 
> checks via bio_set_dev ? Is it because of the
>
> if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION))
>
> check inside bio_endio in case bio_io_error is called in submit_stripe_bio?

It is because we don't know if we have a valid bdev until after
the checks a few line down that call bio_io_error.  We need those
checks to be done later now so that all error handling goes through
the bio end_io handler.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-20  8:53     ` Christoph Hellwig
@ 2022-06-20  9:34       ` Nikolay Borisov
  2022-06-20 11:23         ` Christoph Hellwig
  0 siblings, 1 reply; 40+ messages in thread
From: Nikolay Borisov @ 2022-06-20  9:34 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs



On 20.06.22 г. 11:53 ч., Christoph Hellwig wrote:
> On Mon, Jun 20, 2022 at 11:18:03AM +0300, Nikolay Borisov wrote:
>> What if the the currently completed stripe bio is the last - then bio_endio
>> would be called for orig_bio, which will executed bi_end_io() in softirq
>> context, but for reads we want to execute this in process context (as per
>> the below code) ?
> 
>> The old code guaranteed that btrfs_end_bioc() is executed when the last
>> stripe bio was completed. With this new scheme, say we have 3 bios - 2
>> cloned, 1 being the orig, what guarantees that the orig_bio won't be
>> finished before the remaining 2 (or 1) cloned/stripe bios thus calling
>> btrfs_end_bio() on orig bio with __bi_remaining potentially being 2 or 1
>> and finally calling orig_bio->bi_end_io() again with __bi_remaining being
>> elevated?
> 
> This is what the bio_inc_remaining after the bio_alloc_clone takes care
> of.  With that the remaining count in the original btrfs_bio is
> incremented, which ensures that ->end_io for the originl bio is only
> called when all other bios and the original one have completed.
> 
> Take a look at bio_endio and bio_remaining_done for the glory details.

I get what the main idea of bio_remaining_done is. HOwever, say we have 
a write to a RAID1 fs. So we have to do 2 writes - 1 of the orig bio, 1 
of a stripe bio so we make 1 clone of the orig bio, call 
bio_inc_remaining and orig_bio->__bi_remaining is 2. So the 2 bios get 
submitted.

If the stripe bio is completed first 'if (bio != orig_bio) {' check in 
btrfs_end_bio ensures that the function is really a noop. Subsequently 
when orig_bio is completed it proceeds to executed:

orig_bio->bi_end_io(orig_bio);


Consider the same scenario, but this time if orig_bio is completed first 
then we proceed directly calling orig_bio->bi_end_io(orig_bio); 
bypassing bio_endio and the __bi_remaining checks etc. So shouldn't this
orig_bio->bi_end_io(orig_bio);  be converted to bio_endio call ?

> 
>>>    	if (clone) {
>>> -		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
>>> +		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
>>> +		bio_inc_remaining(orig_bio);
>>
>> When cloning why aren't you passing dev->bdev but instead set it after the
>> checks via bio_set_dev ? Is it because of the
>>
>> if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION))
>>
>> check inside bio_endio in case bio_io_error is called in submit_stripe_bio?
> 
> It is because we don't know if we have a valid bdev until after
> the checks a few line down that call bio_io_error.  We need those
> checks to be done later now so that all error handling goes through
> the bio end_io handler.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-20  9:34       ` Nikolay Borisov
@ 2022-06-20 11:23         ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20 11:23 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Mon, Jun 20, 2022 at 12:34:48PM +0300, Nikolay Borisov wrote:
> write to a RAID1 fs. So we have to do 2 writes - 1 of the orig bio, 1 of a 
> stripe bio so we make 1 clone of the orig bio, call bio_inc_remaining and 
> orig_bio->__bi_remaining is 2. So the 2 bios get submitted.

Yes.

> If the stripe bio is completed first 'if (bio != orig_bio) {' check in 
> btrfs_end_bio ensures that the function is really a noop. Subsequently when 
> orig_bio is completed it proceeds to executed:
>
> orig_bio->bi_end_io(orig_bio);

Yes.

> Consider the same scenario, but this time if orig_bio is completed first 
> then we proceed directly calling orig_bio->bi_end_io(orig_bio);

No, it doesn't.  When the low-level block driver calls bio_endio for
orig_bio, bio_endio calls bio_remaining_done which does an atomic
atomic_dec_and_test, which returns false because __bi_remaining is still
1 after the call.  That causes bio_endio to just return and not call
->bi_end_io.  Only when the cloned bio completes and calls bio_endio on
orig_bio again (from btrfs_end_bio) that atomic_dec_and_test now
returns true and ->bi_end_io is called on the orig_bio.

> bypassing 
> bio_endio and the __bi_remaining checks etc. So shouldn't this
> orig_bio->bi_end_io(orig_bio);  be converted to bio_endio call ?

The orig_bio->bi_end_io(orig_bio) from btrfs_end_bio and
btrfs_end_bio_work is weird.  It used to be a bio_endio call before
this series, but that is actualy wrong as the bio has already been
completed.  The bio is done and we just need to propagate it to the
submitter of the original btrfs_bio.  The right thing would be to
call this as:

	bioc->end_io(orig_bio);

but doing so would break the repair code that has way too much
magic behavior for its own sake.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: cleanup btrfs bio submission v2
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (9 preceding siblings ...)
  2022-06-17 10:04 ` [PATCH 10/10] btrfs: remove bioc->stripes_pending Christoph Hellwig
@ 2022-06-20 13:04 ` Nikolay Borisov
  2022-07-07 18:35 ` David Sterba
  11 siblings, 0 replies; 40+ messages in thread
From: Nikolay Borisov @ 2022-06-20 13:04 UTC (permalink / raw)
  To: Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo; +Cc: linux-btrfs



On 17.06.22 г. 13:04 ч., Christoph Hellwig wrote:
> Hi all,
> 
> this series removes a bunch of pointlessly passed arguments in the
> raid56 code and then cleans up the btrfs_bio submission to always
> return errors through the end I/O handler.
> 
> Changes since v1:
>   - split the raid submission patches up a little better
>   - fix a whitespace issue
>   - add a new patch to properly deal with memory allocation failures in
>     btrfs_wq_submit_bio
>   - add more clean to prepare for and go along with the above new change
> 
> Diffstat:
>   compression.c |    8 ---
>   disk-io.c     |   46 +++++++++----------
>   disk-io.h     |    6 +-
>   inode.c       |   86 +++++++++++++++---------------------
>   raid56.c      |  127 ++++++++++++++++++++++++-----------------------------
>   raid56.h      |   14 ++---
>   scrub.c       |   19 ++-----
>   volumes.c     |  138 ++++++++++++++++++++++++++++------------------------------
>   volumes.h     |    5 --
>   9 files changed, 202 insertions(+), 247 deletions(-)


Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Tested-by: Nikolay Borisov <nborisov@suse.com>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments
  2022-06-17 10:04 ` [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments Christoph Hellwig
@ 2022-06-20 17:16   ` David Sterba
  2022-06-20 17:38     ` Christoph Hellwig
  2022-06-22  4:30     ` Qu Wenruo
  0 siblings, 2 replies; 40+ messages in thread
From: David Sterba @ 2022-06-20 17:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs, Johannes Thumshirn

On Fri, Jun 17, 2022 at 12:04:05PM +0200, Christoph Hellwig wrote:
> The raid56 code assumes a fixed stripe length.

The code does because it was part of the initial implementation but
raid56 as a feature wants a configurable stripe size, we have a super
block member for that. So the question is if passing the parameter is
such a burden so we'd rather remove it or keep it there though it's a
fixed value but still part of the design.

I'd rather keep it there so it gets used eventually, we have ongoing
work to fix the corner cases of raid56 so removing and adding it back
causes churn, but I'll give it another thought.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments
  2022-06-20 17:16   ` David Sterba
@ 2022-06-20 17:38     ` Christoph Hellwig
  2022-06-22  4:19       ` Christoph Hellwig
  2022-06-22  4:30     ` Qu Wenruo
  1 sibling, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-20 17:38 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo,
	linux-btrfs, Johannes Thumshirn

On Mon, Jun 20, 2022 at 07:16:08PM +0200, David Sterba wrote:
> On Fri, Jun 17, 2022 at 12:04:05PM +0200, Christoph Hellwig wrote:
> > The raid56 code assumes a fixed stripe length.
> 
> The code does because it was part of the initial implementation but
> raid56 as a feature wants a configurable stripe size, we have a super
> block member for that. So the question is if passing the parameter is
> such a burden so we'd rather remove it or keep it there though it's a
> fixed value but still part of the design.
> 
> I'd rather keep it there so it gets used eventually, we have ongoing
> work to fix the corner cases of raid56 so removing and adding it back
> causes churn, but I'll give it another thought.

Well, right now it is very much dead code and complicates a lot
of the argument passing as well as bloating the code size.

IFF the superblock member were to be actually used in the future,
it would make sense to expose it in the btrfs_fs_info and use that
instead of the constant, but still skip all the argument passing.

hch@brick:~/work/linux$ size btrfs.o.*
   text	   data	    bss	    dec	    hex	filename
1502453	 125590	  28776	1656819	 1947f3	btrfs.o.new
1502599	 125590	  28776	1656965	 194885	btrfs.o.old


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments
  2022-06-20 17:38     ` Christoph Hellwig
@ 2022-06-22  4:19       ` Christoph Hellwig
  2022-06-22 14:07         ` David Sterba
  0 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-22  4:19 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo,
	linux-btrfs, Johannes Thumshirn

On Mon, Jun 20, 2022 at 07:38:34PM +0200, Christoph Hellwig wrote:
> > I'd rather keep it there so it gets used eventually, we have ongoing
> > work to fix the corner cases of raid56 so removing and adding it back
> > causes churn, but I'll give it another thought.
> 
> Well, right now it is very much dead code and complicates a lot
> of the argument passing as well as bloating the code size.
> 
> IFF the superblock member were to be actually used in the future,
> it would make sense to expose it in the btrfs_fs_info and use that
> instead of the constant, but still skip all the argument passing.
> 
> hch@brick:~/work/linux$ size btrfs.o.*
>    text	   data	    bss	    dec	    hex	filename
> 1502453	 125590	  28776	1656819	 1947f3	btrfs.o.new
> 1502599	 125590	  28776	1656965	 194885	btrfs.o.old

So I guess this wasn't convincing enough. My plan B would be the fs_info
member.  The related member seems to be the sectorsize field in the super
block, which is checked if it is a power of two but otherwise completely
ignored.  The fs_info has a stripe_size member that is initialized to the
sectorsize after reading the super_block but then also mostly ignored.

So this is a ll a bit of a mess unfortunately :(

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments
  2022-06-20 17:16   ` David Sterba
  2022-06-20 17:38     ` Christoph Hellwig
@ 2022-06-22  4:30     ` Qu Wenruo
  1 sibling, 0 replies; 40+ messages in thread
From: Qu Wenruo @ 2022-06-22  4:30 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo,
	linux-btrfs, Johannes Thumshirn



On 2022/6/21 01:16, David Sterba wrote:
> On Fri, Jun 17, 2022 at 12:04:05PM +0200, Christoph Hellwig wrote:
>> The raid56 code assumes a fixed stripe length.
>
> The code does because it was part of the initial implementation but
> raid56 as a feature wants a configurable stripe size, we have a super
> block member for that. So the question is if passing the parameter is
> such a burden so we'd rather remove it or keep it there though it's a
> fixed value but still part of the design.
>
> I'd rather keep it there so it gets used eventually, we have ongoing
> work to fix the corner cases of raid56 so removing and adding it back
> causes churn, but I'll give it another thought.

Nope, we have too many code relying on that fixed 64KiB stripe length.

In fact, tree-checker has explicit check on that, which means there is
already no way to be compatible even if we add a lot of code to support
different stripe length.

Older kernels will just reject such chunks with different stripe length
other than 64KiB.

I'd say, we had such situations before (mostly for inline dedupe), we
just introduced some dead parameters, but in the end, no inline-dedupe
at all.

So just don't waste time on something that no one is actively developing
yet.

And I'm pretty sure that, even if we're going to support different
stripe length, we will need some compat RO or plain incompat flags for
it anway.

Thanks,
Qu

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments
  2022-06-22  4:19       ` Christoph Hellwig
@ 2022-06-22 14:07         ` David Sterba
  0 siblings, 0 replies; 40+ messages in thread
From: David Sterba @ 2022-06-22 14:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: dsterba, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs,
	Johannes Thumshirn

On Wed, Jun 22, 2022 at 06:19:15AM +0200, Christoph Hellwig wrote:
> On Mon, Jun 20, 2022 at 07:38:34PM +0200, Christoph Hellwig wrote:
> > > I'd rather keep it there so it gets used eventually, we have ongoing
> > > work to fix the corner cases of raid56 so removing and adding it back
> > > causes churn, but I'll give it another thought.
> > 
> > Well, right now it is very much dead code and complicates a lot
> > of the argument passing as well as bloating the code size.
> > 
> > IFF the superblock member were to be actually used in the future,
> > it would make sense to expose it in the btrfs_fs_info and use that
> > instead of the constant, but still skip all the argument passing.
> > 
> > hch@brick:~/work/linux$ size btrfs.o.*
> >    text	   data	    bss	    dec	    hex	filename
> > 1502453	 125590	  28776	1656819	 1947f3	btrfs.o.new
> > 1502599	 125590	  28776	1656965	 194885	btrfs.o.old
> 
> So I guess this wasn't convincing enough. My plan B would be the fs_info
> member.  The related member seems to be the sectorsize field in the super
> block, which is checked if it is a power of two but otherwise completely
> ignored.  The fs_info has a stripe_size member that is initialized to the
> sectorsize after reading the super_block but then also mostly ignored.
> 
> So this is a ll a bit of a mess unfortunately :(

Hold on, it would be better to remove the code, as Qu also noted it's
not used consistently everywhere. My initial objection is more of a
general direction of such changes, but cleaning up the raid56 code to
the level of current functionality should work.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-17 10:04 ` [PATCH 10/10] btrfs: remove bioc->stripes_pending Christoph Hellwig
  2022-06-20  8:18   ` Nikolay Borisov
@ 2022-06-22 16:07   ` David Sterba
  2022-06-22 16:15     ` Christoph Hellwig
  1 sibling, 1 reply; 40+ messages in thread
From: David Sterba @ 2022-06-22 16:07 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Fri, Jun 17, 2022 at 12:04:14PM +0200, Christoph Hellwig wrote:
> Replace the stripes_pending field with the pending counter in the bio.
> This avoid an extra field and prepares splitting the btrfs_bio at the
> stripe boundary.

This does not seem sufficient as changelog, does not explain why the
stripes_pending is being removed, just that it's some prep work. Given
the questions that Nikolay had I think this needs to be expanded. I'll
try to summarize the discussion but in the bio code nothing is that
simple that reading the patch would answer all questions.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-22 16:07   ` David Sterba
@ 2022-06-22 16:15     ` Christoph Hellwig
  2022-07-07 18:34       ` David Sterba
  0 siblings, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2022-06-22 16:15 UTC (permalink / raw)
  To: dsterba, Christoph Hellwig, David Sterba, Josef Bacik, Qu Wenruo,
	linux-btrfs

On Wed, Jun 22, 2022 at 06:07:25PM +0200, David Sterba wrote:
> On Fri, Jun 17, 2022 at 12:04:14PM +0200, Christoph Hellwig wrote:
> > Replace the stripes_pending field with the pending counter in the bio.
> > This avoid an extra field and prepares splitting the btrfs_bio at the
> > stripe boundary.
> 
> This does not seem sufficient as changelog, does not explain why the
> stripes_pending is being removed, just that it's some prep work. Given
> the questions that Nikolay had I think this needs to be expanded. I'll
> try to summarize the discussion but in the bio code nothing is that
> simple that reading the patch would answer all questions.

Maybe just skip this patch for now then.  I also have two follows up
for it, so I can resend it with an updated version of this one.

But the single sentence summary is: use the block layer infrastructure
instead of reinveting it poorly.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 08/10] btrfs: handle allocation failure in btrfs_wq_submit_bio gracefully
  2022-06-17 10:04 ` [PATCH 08/10] btrfs: handle allocation failure in btrfs_wq_submit_bio gracefully Christoph Hellwig
@ 2022-06-28 15:20   ` Boris Burkov
  0 siblings, 0 replies; 40+ messages in thread
From: Boris Burkov @ 2022-06-28 15:20 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Fri, Jun 17, 2022 at 12:04:12PM +0200, Christoph Hellwig wrote:
> btrfs_wq_submit_bio is used for writeback under memory pressure.  Instead
> of failing the I/O when we can't allocate the async_submit_bio, just
> punt back to the synchronous submission path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Burkov <boris@bur.io>
> ---
>  fs/btrfs/disk-io.c | 37 ++++++++++++++++++-------------------
>  fs/btrfs/disk-io.h |  6 +++---
>  fs/btrfs/inode.c   | 17 +++++++++--------
>  3 files changed, 30 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5df6865428a5c..eaa643f38783c 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -756,16 +756,16 @@ static void run_one_async_free(struct btrfs_work *work)
>  	kfree(async);
>  }
>  
> -blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
> -				 int mirror_num, u64 dio_file_offset,
> -				 extent_submit_bio_start_t *submit_bio_start)
> +bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
> +			 u64 dio_file_offset,
> +			 extent_submit_bio_start_t *submit_bio_start)
>  {
>  	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
>  	struct async_submit_bio *async;
>  
>  	async = kmalloc(sizeof(*async), GFP_NOFS);
>  	if (!async)
> -		return BLK_STS_RESOURCE;
> +		return false;
>  
>  	async->inode = inode;
>  	async->bio = bio;
> @@ -783,7 +783,7 @@ blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
>  		btrfs_queue_work(fs_info->hipri_workers, &async->work);
>  	else
>  		btrfs_queue_work(fs_info->workers, &async->work);
> -	return 0;
> +	return true;
>  }
>  
>  static blk_status_t btree_csum_one_bio(struct bio *bio)
> @@ -837,25 +837,24 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
>  		btrfs_submit_bio(fs_info, bio, mirror_num);
>  		return;
>  	}
> -	if (!should_async_write(fs_info, BTRFS_I(inode))) {
> -		ret = btree_csum_one_bio(bio);
> -		if (!ret) {
> -			btrfs_submit_bio(fs_info, bio, mirror_num);
> -			return;
> -		}
> -	} else {
> -		/*
> -		 * kthread helpers are used to submit writes so that
> -		 * checksumming can happen in parallel across all CPUs
> -		 */
> -		ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
> -					  btree_submit_bio_start);
> -	}
>  
> +	/*
> +	 * Kthread helpers are used to submit writes so that checksumming can
> +	 * happen in parallel across all CPUs
> +	 */
> +	if (should_async_write(fs_info, BTRFS_I(inode)) &&
> +	    btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
> +				btree_submit_bio_start))
> +		return;
> +
> +	ret = btree_csum_one_bio(bio);
>  	if (ret) {
>  		bio->bi_status = ret;
>  		bio_endio(bio);
> +		return;
>  	}
> +
> +	btrfs_submit_bio(fs_info, bio, mirror_num);
>  }
>  
>  #ifdef CONFIG_MIGRATION
> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
> index 05e779a41a997..8993b428e09ce 100644
> --- a/fs/btrfs/disk-io.h
> +++ b/fs/btrfs/disk-io.h
> @@ -114,9 +114,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
>  			  int atomic);
>  int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid,
>  			     int level, struct btrfs_key *first_key);
> -blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
> -				 int mirror_num, u64 dio_file_offset,
> -				 extent_submit_bio_start_t *submit_bio_start);
> +bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num,
> +			 u64 dio_file_offset,
> +			 extent_submit_bio_start_t *submit_bio_start);
>  blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
>  			  int mirror_num);
>  int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 5a90fc129aea9..38af980d1cf1f 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2604,11 +2604,10 @@ void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirro
>  	if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
>  	    !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
>  	    !btrfs_is_data_reloc_root(bi->root)) {
> -		if (!atomic_read(&bi->sync_writers)) {
> -			ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
> -						  btrfs_submit_bio_start);
> -			goto out;
> -		}
> +		if (!atomic_read(&bi->sync_writers) &&
> +		    btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
> +					btrfs_submit_bio_start))
> +			return;
>  
>  		ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
>  		if (ret)
> @@ -7953,9 +7952,11 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
>  
>  	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
>  		/* Check btrfs_submit_data_write_bio() for async submit rules */
> -		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
> -			return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
> -					btrfs_submit_bio_start_direct_io);
> +		if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers) &&
> +		    btrfs_wq_submit_bio(inode, bio, 0, file_offset,
> +					btrfs_submit_bio_start_direct_io))
> +			return BLK_STS_OK;
> +
>  		/*
>  		 * If we aren't doing async submit, calculate the csum of the
>  		 * bio now.
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/10] btrfs: remove bioc->stripes_pending
  2022-06-22 16:15     ` Christoph Hellwig
@ 2022-07-07 18:34       ` David Sterba
  0 siblings, 0 replies; 40+ messages in thread
From: David Sterba @ 2022-07-07 18:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: dsterba, David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Wed, Jun 22, 2022 at 06:15:46PM +0200, Christoph Hellwig wrote:
> On Wed, Jun 22, 2022 at 06:07:25PM +0200, David Sterba wrote:
> > On Fri, Jun 17, 2022 at 12:04:14PM +0200, Christoph Hellwig wrote:
> > > Replace the stripes_pending field with the pending counter in the bio.
> > > This avoid an extra field and prepares splitting the btrfs_bio at the
> > > stripe boundary.
> > 
> > This does not seem sufficient as changelog, does not explain why the
> > stripes_pending is being removed, just that it's some prep work. Given
> > the questions that Nikolay had I think this needs to be expanded. I'll
> > try to summarize the discussion but in the bio code nothing is that
> > simple that reading the patch would answer all questions.
> 
> Maybe just skip this patch for now then.  I also have two follows up
> for it, so I can resend it with an updated version of this one.

Ok, I'll move the series from topic branch to misc-next without the last
patch.

> But the single sentence summary is: use the block layer infrastructure
> instead of reinveting it poorly.

A summay is perhaps good as the first sentence but not as explanation or
reasoning why a patch that changes logical structure, data structures,
code flow etc. is correct. IMHO it should be a sort of a proof or steps
to follow and verify during review not just an "abstract".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: cleanup btrfs bio submission v2
  2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
                   ` (10 preceding siblings ...)
  2022-06-20 13:04 ` cleanup btrfs bio submission v2 Nikolay Borisov
@ 2022-07-07 18:35 ` David Sterba
  11 siblings, 0 replies; 40+ messages in thread
From: David Sterba @ 2022-07-07 18:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: David Sterba, Josef Bacik, Qu Wenruo, linux-btrfs

On Fri, Jun 17, 2022 at 12:04:04PM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> this series removes a bunch of pointlessly passed arguments in the
> raid56 code and then cleans up the btrfs_bio submission to always
> return errors through the end I/O handler.
> 
> Changes since v1:
>  - split the raid submission patches up a little better
>  - fix a whitespace issue
>  - add a new patch to properly deal with memory allocation failures in
>    btrfs_wq_submit_bio
>  - add more clean to prepare for and go along with the above new change

Patches 1-9 moved from topic branch to misc-next, thanks.

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2022-07-07 18:40 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-17 10:04 cleanup btrfs bio submission v2 Christoph Hellwig
2022-06-17 10:04 ` [PATCH 01/10] btrfs: remove a bunch of pointles stripe_len arguments Christoph Hellwig
2022-06-20 17:16   ` David Sterba
2022-06-20 17:38     ` Christoph Hellwig
2022-06-22  4:19       ` Christoph Hellwig
2022-06-22 14:07         ` David Sterba
2022-06-22  4:30     ` Qu Wenruo
2022-06-17 10:04 ` [PATCH 02/10] btrfs: return proper mapped length for RAID56 profiles in __btrfs_map_block() Christoph Hellwig
2022-06-17 10:04 ` [PATCH 03/10] btrfs: remove the btrfs_map_bio return value Christoph Hellwig
2022-06-17 10:04 ` [PATCH 04/10] btrfs: remove the raid56_parity_write " Christoph Hellwig
2022-06-17 10:38   ` Qu Wenruo
2022-06-18 11:04   ` Johannes Thumshirn
2022-06-17 10:04 ` [PATCH 05/10] btrfs: remove the raid56_parity_recover " Christoph Hellwig
2022-06-18 11:06   ` Johannes Thumshirn
2022-06-19  6:35     ` Christoph Hellwig
2022-06-19 10:35   ` Qu Wenruo
2022-06-17 10:04 ` [PATCH 06/10] btrfs: transfer the bio counter reference to the raid submission helpers Christoph Hellwig
2022-06-19 10:45   ` Qu Wenruo
2022-06-19 21:50     ` Qu Wenruo
2022-06-20  7:47       ` Christoph Hellwig
2022-06-20  8:03         ` Qu Wenruo
2022-06-20  8:09           ` Christoph Hellwig
2022-06-20  7:37     ` Christoph Hellwig
2022-06-20  7:45       ` Qu Wenruo
2022-06-20  7:49         ` Christoph Hellwig
2022-06-17 10:04 ` [PATCH 07/10] btrfs: simplify the reloc root check in btrfs_submit_data_write_bio Christoph Hellwig
2022-06-17 10:04 ` [PATCH 08/10] btrfs: handle allocation failure in btrfs_wq_submit_bio gracefully Christoph Hellwig
2022-06-28 15:20   ` Boris Burkov
2022-06-17 10:04 ` [PATCH 09/10] btrfs: remove the btrfs_submit_dio_bio return value Christoph Hellwig
2022-06-17 10:04 ` [PATCH 10/10] btrfs: remove bioc->stripes_pending Christoph Hellwig
2022-06-20  8:18   ` Nikolay Borisov
2022-06-20  8:34     ` Nikolay Borisov
2022-06-20  8:53     ` Christoph Hellwig
2022-06-20  9:34       ` Nikolay Borisov
2022-06-20 11:23         ` Christoph Hellwig
2022-06-22 16:07   ` David Sterba
2022-06-22 16:15     ` Christoph Hellwig
2022-07-07 18:34       ` David Sterba
2022-06-20 13:04 ` cleanup btrfs bio submission v2 Nikolay Borisov
2022-07-07 18:35 ` David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.