All of lore.kernel.org
 help / color / mirror / Atom feed
* [dm-devel] [PATCH] dm: fix dm io BLK_STS_DM_REQUEUE
@ 2022-06-23 13:20 Ming Lei
  2022-06-24  3:18 ` [dm-devel] " Mike Snitzer
  0 siblings, 1 reply; 2+ messages in thread
From: Ming Lei @ 2022-06-23 13:20 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Ming Lei

Commit 7dd76d1feec7 ("dm: improve bio splitting and associated IO accounting")
removes cloned bio when dm io splitting is needed. This way will make
multiple dm io instance sharing same original bio, and it works fine if
IOs are completed successfully. But regression may be caused if
BLK_STS_DM_REQUEUE is returned from either one of cloned io.

If case of BLK_STS_DM_REQUEUE from one cloned io, only the mapped part
of original bio for the current exact dm io needs to be re-submitted.
However, since the original bio is shared among all dm io instances,
actually the original bio only represents the last dm io instance, so
requeue can't work as expected. Also when more than one dm io is
requeued, the same original bio is requeued from all dm io's completion
handler, then race is caused.

Fix the issue by still allocating one bio for completing io only, then
io accounting can reply on ->orig_bio.

Based on one earlier version from Mike.

In theory, we can delay the bio clone when BLK_STS_DM_REQUEUE happens,
but that approach is a bit complicated: 1) bio clone needs to be done in
task context; 2) one block interface for unwinding bio is required.

Cc: Benjamin Marzinski <bmarzins@redhat.com>
Fixes: 7dd76d1feec7 ("dm: improve bio splitting and associated IO accounting")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/md/dm-core.h |  2 +-
 drivers/md/dm.c      | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 54c0473a51dd..32f461c624c6 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -273,7 +273,7 @@ struct dm_io {
 	struct mapped_device *md;
 
 	/* The three fields represent mapped part of original bio */
-	struct bio *orig_bio;
+	struct bio *orig_bio, *bak_bio;
 	unsigned int sector_offset; /* offset to end of orig_bio */
 	unsigned int sectors;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9ede55278eec..85d8f2f1c9c8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -594,6 +594,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	atomic_set(&io->io_count, 2);
 	this_cpu_inc(*md->pending_io);
 	io->orig_bio = bio;
+	io->bak_bio = NULL;
 	io->md = md;
 	spin_lock_init(&io->lock);
 	io->start_time = jiffies;
@@ -887,7 +888,7 @@ static void dm_io_complete(struct dm_io *io)
 {
 	blk_status_t io_error;
 	struct mapped_device *md = io->md;
-	struct bio *bio = io->orig_bio;
+	struct bio *bio = io->bak_bio ? io->bak_bio : io->orig_bio;
 
 	if (io->status == BLK_STS_DM_REQUEUE) {
 		unsigned long flags;
@@ -1693,9 +1694,10 @@ static void dm_split_and_process_bio(struct mapped_device *md,
 	 * Remainder must be passed to submit_bio_noacct() so it gets handled
 	 * *after* bios already submitted have been completely processed.
 	 */
-	bio_trim(bio, io->sectors, ci.sector_count);
-	trace_block_split(bio, bio->bi_iter.bi_sector);
-	bio_inc_remaining(bio);
+	io->bak_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count,
+			GFP_NOIO, &md->queue->bio_split);
+	bio_chain(io->bak_bio, bio);
+	trace_block_split(io->bak_bio, bio->bi_iter.bi_sector);
 	submit_bio_noacct(bio);
 out:
 	/*
-- 
2.31.1

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [dm-devel] dm: fix dm io BLK_STS_DM_REQUEUE
  2022-06-23 13:20 [dm-devel] [PATCH] dm: fix dm io BLK_STS_DM_REQUEUE Ming Lei
@ 2022-06-24  3:18 ` Mike Snitzer
  0 siblings, 0 replies; 2+ messages in thread
From: Mike Snitzer @ 2022-06-24  3:18 UTC (permalink / raw)
  To: Ming Lei; +Cc: dm-devel

On Thu, Jun 23 2022 at  9:20P -0400,
Ming Lei <ming.lei@redhat.com> wrote:

> Commit 7dd76d1feec7 ("dm: improve bio splitting and associated IO accounting")
> removes cloned bio when dm io splitting is needed. This way will make
> multiple dm io instance sharing same original bio, and it works fine if
> IOs are completed successfully. But regression may be caused if
> BLK_STS_DM_REQUEUE is returned from either one of cloned io.
> 
> If case of BLK_STS_DM_REQUEUE from one cloned io, only the mapped part
> of original bio for the current exact dm io needs to be re-submitted.
> However, since the original bio is shared among all dm io instances,
> actually the original bio only represents the last dm io instance, so
> requeue can't work as expected. Also when more than one dm io is
> requeued, the same original bio is requeued from all dm io's completion
> handler, then race is caused.
> 
> Fix the issue by still allocating one bio for completing io only, then
> io accounting can reply on ->orig_bio.
> 
> Based on one earlier version from Mike.
> 
> In theory, we can delay the bio clone when BLK_STS_DM_REQUEUE happens,
> but that approach is a bit complicated: 1) bio clone needs to be done in
> task context; 2) one block interface for unwinding bio is required.
> 
> Cc: Benjamin Marzinski <bmarzins@redhat.com>
> Fixes: 7dd76d1feec7 ("dm: improve bio splitting and associated IO accounting")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/md/dm-core.h |  2 +-
>  drivers/md/dm.c      | 10 ++++++----
>  2 files changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
> index 54c0473a51dd..32f461c624c6 100644
> --- a/drivers/md/dm-core.h
> +++ b/drivers/md/dm-core.h
> @@ -273,7 +273,7 @@ struct dm_io {
>  	struct mapped_device *md;
>  
>  	/* The three fields represent mapped part of original bio */
> -	struct bio *orig_bio;
> +	struct bio *orig_bio, *bak_bio;
>  	unsigned int sector_offset; /* offset to end of orig_bio */
>  	unsigned int sectors;
>  
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 9ede55278eec..85d8f2f1c9c8 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -594,6 +594,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
>  	atomic_set(&io->io_count, 2);
>  	this_cpu_inc(*md->pending_io);
>  	io->orig_bio = bio;
> +	io->bak_bio = NULL;
>  	io->md = md;
>  	spin_lock_init(&io->lock);
>  	io->start_time = jiffies;
> @@ -887,7 +888,7 @@ static void dm_io_complete(struct dm_io *io)
>  {
>  	blk_status_t io_error;
>  	struct mapped_device *md = io->md;
> -	struct bio *bio = io->orig_bio;
> +	struct bio *bio = io->bak_bio ? io->bak_bio : io->orig_bio;
>  
>  	if (io->status == BLK_STS_DM_REQUEUE) {
>  		unsigned long flags;
> @@ -1693,9 +1694,10 @@ static void dm_split_and_process_bio(struct mapped_device *md,
>  	 * Remainder must be passed to submit_bio_noacct() so it gets handled
>  	 * *after* bios already submitted have been completely processed.
>  	 */
> -	bio_trim(bio, io->sectors, ci.sector_count);
> -	trace_block_split(bio, bio->bi_iter.bi_sector);
> -	bio_inc_remaining(bio);
> +	io->bak_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count,
> +			GFP_NOIO, &md->queue->bio_split);
> +	bio_chain(io->bak_bio, bio);
> +	trace_block_split(io->bak_bio, bio->bi_iter.bi_sector);
>  	submit_bio_noacct(bio);
>  out:
>  	/*
> -- 
> 2.31.1

FYI, I renamed bak_bio to split_bio.  I also made use of io->sectors
and added a WARN_ON_ONCE, please see:

https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-5.19&id=61b6e2e5321da281ab3c0c04e1962b3d000f6248

It worked in Ben's latest testing.

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-06-24  3:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-23 13:20 [dm-devel] [PATCH] dm: fix dm io BLK_STS_DM_REQUEUE Ming Lei
2022-06-24  3:18 ` [dm-devel] " Mike Snitzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.