All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Mike Snitzer <snitzer@redhat.com>
Cc: linux-block@vger.kernel.org, dm-devel@redhat.com,
	Damien Le Moal <damien.lemoal@opensource.wdc.com>,
	Ming Lei <ming.lei@redhat.com>
Subject: [dm-devel] [PATCH 5/8] dm: always setup ->orig_bio in alloc_io
Date: Tue, 12 Apr 2022 16:56:13 +0800	[thread overview]
Message-ID: <20220412085616.1409626-6-ming.lei@redhat.com> (raw)
In-Reply-To: <20220412085616.1409626-1-ming.lei@redhat.com>

The current DM codes setup ->orig_bio after __map_bio() returns,
and not only cause kernel panic for dm zone, but also a bit ugly
and tricky, especially the waiting until ->orig_bio is set in
dm_submit_bio_remap().

The reason is that one new bio is cloned from original FS bio to
represent the mapped part, which just serves io accounting.

Now we have switched to bdev based io accounting interface, and we
can retrieve sectors/bio_op from both the real original bio and the
added fields of .sector_offset & .sectors easily, so the new cloned
bio isn't necessary any more.

Not only fixes dm-zone's kernel panic, but also cleans up dm io
accounting & split a bit.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/md/dm-core.h |  8 ++++++-
 drivers/md/dm.c      | 51 ++++++++++++++++++++++----------------------
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 4277853c7535..aefb080c230d 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -247,7 +247,12 @@ struct dm_io {
 	blk_short_t flags;
 	atomic_t io_count;
 	struct mapped_device *md;
+
+	/* The three fields represent mapped part of original bio */
 	struct bio *orig_bio;
+	unsigned int sector_offset; /* offset to end of orig_bio */
+	unsigned int sectors;
+
 	blk_status_t status;
 	spinlock_t lock;
 	unsigned long start_time;
@@ -264,7 +269,8 @@ struct dm_io {
  */
 enum {
 	DM_IO_START_ACCT,
-	DM_IO_ACCOUNTED
+	DM_IO_ACCOUNTED,
+	DM_IO_SPLITTED
 };
 
 static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 31eacc0e93ed..df1d013fb793 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -509,8 +509,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
 	/* If REQ_PREFLUSH set save any payload but do not account it */
 	if (bio_is_flush_with_data(bio))
 		sectors = 0;
-	else
+	else if (likely(!(dm_io_flagged(io, DM_IO_SPLITTED))))
 		sectors = bio_sectors(bio);
+	else
+		sectors = io->sectors;
 
 	if (!end)
 		bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
@@ -518,10 +520,21 @@ static void dm_io_acct(struct dm_io *io, bool end)
 	else
 		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
 
-	if (unlikely(dm_stats_used(&md->stats)))
+	if (unlikely(dm_stats_used(&md->stats))) {
+		sector_t sector;
+
+		if (likely(!dm_io_flagged(io, DM_IO_SPLITTED))) {
+			sector = bio->bi_iter.bi_sector;
+			sectors = bio_sectors(bio);
+		} else {
+			sector = bio_end_sector(bio) - io->sector_offset;
+			sectors = io->sectors;
+		}
+
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
-				    bio->bi_iter.bi_sector, bio_sectors(bio),
+				    sector, sectors,
 				    end, start_time, stats_aux);
+	}
 }
 
 static void __dm_start_io_acct(struct dm_io *io)
@@ -576,7 +589,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	io->status = 0;
 	atomic_set(&io->io_count, 1);
 	this_cpu_inc(*md->pending_io);
-	io->orig_bio = NULL;
+	io->orig_bio = bio;
 	io->md = md;
 	io->map_task = current;
 	spin_lock_init(&io->lock);
@@ -1222,13 +1235,6 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
 		/* Still in target's map function */
 		dm_io_set_flag(io, DM_IO_START_ACCT);
 	} else {
-		/*
-		 * Called by another thread, managed by DM target,
-		 * wait for dm_split_and_process_bio() to store
-		 * io->orig_bio
-		 */
-		while (unlikely(!smp_load_acquire(&io->orig_bio)))
-			msleep(1);
 		dm_start_io_acct(io, clone);
 	}
 
@@ -1557,7 +1563,6 @@ static void dm_split_and_process_bio(struct mapped_device *md,
 				     struct dm_table *map, struct bio *bio)
 {
 	struct clone_info ci;
-	struct bio *orig_bio = NULL;
 	int error = 0;
 
 	init_clone_info(&ci, md, map, bio);
@@ -1573,22 +1578,16 @@ static void dm_split_and_process_bio(struct mapped_device *md,
 	if (error || !ci.sector_count)
 		goto out;
 
-	/*
-	 * Remainder must be passed to submit_bio_noacct() so it gets handled
-	 * *after* bios already submitted have been completely processed.
-	 * We take a clone of the original to store in ci.io->orig_bio to be
-	 * used by dm_end_io_acct() and for dm_io_complete() to use for
-	 * completion handling.
-	 */
-	orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count,
-			     GFP_NOIO, &md->queue->bio_split);
-	bio_chain(orig_bio, bio);
-	trace_block_split(orig_bio, bio->bi_iter.bi_sector);
+	/* setup the mapped part for accounting */
+	dm_io_set_flag(ci.io, DM_IO_SPLITTED);
+	ci.io->sectors = bio_sectors(bio) - ci.sector_count;
+	ci.io->sector_offset = bio_end_sector(bio) - bio->bi_iter.bi_sector;
+
+	bio_trim(bio, ci.io->sectors, ci.sector_count);
+	trace_block_split(bio, bio->bi_iter.bi_sector);
+	bio_inc_remaining(bio);
 	submit_bio_noacct(bio);
 out:
-	if (!orig_bio)
-		orig_bio = bio;
-	smp_store_release(&ci.io->orig_bio, orig_bio);
 	if (dm_io_flagged(ci.io, DM_IO_START_ACCT))
 		dm_start_io_acct(ci.io, NULL);
 
-- 
2.31.1

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


WARNING: multiple messages have this Message-ID (diff)
From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Mike Snitzer <snitzer@redhat.com>
Cc: linux-block@vger.kernel.org, dm-devel@redhat.com,
	Damien Le Moal <damien.lemoal@opensource.wdc.com>,
	Ming Lei <ming.lei@redhat.com>
Subject: [PATCH 5/8] dm: always setup ->orig_bio in alloc_io
Date: Tue, 12 Apr 2022 16:56:13 +0800	[thread overview]
Message-ID: <20220412085616.1409626-6-ming.lei@redhat.com> (raw)
In-Reply-To: <20220412085616.1409626-1-ming.lei@redhat.com>

The current DM codes setup ->orig_bio after __map_bio() returns,
and not only cause kernel panic for dm zone, but also a bit ugly
and tricky, especially the waiting until ->orig_bio is set in
dm_submit_bio_remap().

The reason is that one new bio is cloned from original FS bio to
represent the mapped part, which just serves io accounting.

Now we have switched to bdev based io accounting interface, and we
can retrieve sectors/bio_op from both the real original bio and the
added fields of .sector_offset & .sectors easily, so the new cloned
bio isn't necessary any more.

Not only fixes dm-zone's kernel panic, but also cleans up dm io
accounting & split a bit.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/md/dm-core.h |  8 ++++++-
 drivers/md/dm.c      | 51 ++++++++++++++++++++++----------------------
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 4277853c7535..aefb080c230d 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -247,7 +247,12 @@ struct dm_io {
 	blk_short_t flags;
 	atomic_t io_count;
 	struct mapped_device *md;
+
+	/* The three fields represent mapped part of original bio */
 	struct bio *orig_bio;
+	unsigned int sector_offset; /* offset to end of orig_bio */
+	unsigned int sectors;
+
 	blk_status_t status;
 	spinlock_t lock;
 	unsigned long start_time;
@@ -264,7 +269,8 @@ struct dm_io {
  */
 enum {
 	DM_IO_START_ACCT,
-	DM_IO_ACCOUNTED
+	DM_IO_ACCOUNTED,
+	DM_IO_SPLITTED
 };
 
 static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 31eacc0e93ed..df1d013fb793 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -509,8 +509,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
 	/* If REQ_PREFLUSH set save any payload but do not account it */
 	if (bio_is_flush_with_data(bio))
 		sectors = 0;
-	else
+	else if (likely(!(dm_io_flagged(io, DM_IO_SPLITTED))))
 		sectors = bio_sectors(bio);
+	else
+		sectors = io->sectors;
 
 	if (!end)
 		bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
@@ -518,10 +520,21 @@ static void dm_io_acct(struct dm_io *io, bool end)
 	else
 		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
 
-	if (unlikely(dm_stats_used(&md->stats)))
+	if (unlikely(dm_stats_used(&md->stats))) {
+		sector_t sector;
+
+		if (likely(!dm_io_flagged(io, DM_IO_SPLITTED))) {
+			sector = bio->bi_iter.bi_sector;
+			sectors = bio_sectors(bio);
+		} else {
+			sector = bio_end_sector(bio) - io->sector_offset;
+			sectors = io->sectors;
+		}
+
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
-				    bio->bi_iter.bi_sector, bio_sectors(bio),
+				    sector, sectors,
 				    end, start_time, stats_aux);
+	}
 }
 
 static void __dm_start_io_acct(struct dm_io *io)
@@ -576,7 +589,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	io->status = 0;
 	atomic_set(&io->io_count, 1);
 	this_cpu_inc(*md->pending_io);
-	io->orig_bio = NULL;
+	io->orig_bio = bio;
 	io->md = md;
 	io->map_task = current;
 	spin_lock_init(&io->lock);
@@ -1222,13 +1235,6 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
 		/* Still in target's map function */
 		dm_io_set_flag(io, DM_IO_START_ACCT);
 	} else {
-		/*
-		 * Called by another thread, managed by DM target,
-		 * wait for dm_split_and_process_bio() to store
-		 * io->orig_bio
-		 */
-		while (unlikely(!smp_load_acquire(&io->orig_bio)))
-			msleep(1);
 		dm_start_io_acct(io, clone);
 	}
 
@@ -1557,7 +1563,6 @@ static void dm_split_and_process_bio(struct mapped_device *md,
 				     struct dm_table *map, struct bio *bio)
 {
 	struct clone_info ci;
-	struct bio *orig_bio = NULL;
 	int error = 0;
 
 	init_clone_info(&ci, md, map, bio);
@@ -1573,22 +1578,16 @@ static void dm_split_and_process_bio(struct mapped_device *md,
 	if (error || !ci.sector_count)
 		goto out;
 
-	/*
-	 * Remainder must be passed to submit_bio_noacct() so it gets handled
-	 * *after* bios already submitted have been completely processed.
-	 * We take a clone of the original to store in ci.io->orig_bio to be
-	 * used by dm_end_io_acct() and for dm_io_complete() to use for
-	 * completion handling.
-	 */
-	orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count,
-			     GFP_NOIO, &md->queue->bio_split);
-	bio_chain(orig_bio, bio);
-	trace_block_split(orig_bio, bio->bi_iter.bi_sector);
+	/* setup the mapped part for accounting */
+	dm_io_set_flag(ci.io, DM_IO_SPLITTED);
+	ci.io->sectors = bio_sectors(bio) - ci.sector_count;
+	ci.io->sector_offset = bio_end_sector(bio) - bio->bi_iter.bi_sector;
+
+	bio_trim(bio, ci.io->sectors, ci.sector_count);
+	trace_block_split(bio, bio->bi_iter.bi_sector);
+	bio_inc_remaining(bio);
 	submit_bio_noacct(bio);
 out:
-	if (!orig_bio)
-		orig_bio = bio;
-	smp_store_release(&ci.io->orig_bio, orig_bio);
 	if (dm_io_flagged(ci.io, DM_IO_START_ACCT))
 		dm_start_io_acct(ci.io, NULL);
 
-- 
2.31.1


  parent reply	other threads:[~2022-04-12  8:57 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-12  8:56 [dm-devel] [PATCH 0/8] dm: io accounting & polling improvement Ming Lei
2022-04-12  8:56 ` Ming Lei
2022-04-12  8:56 ` [dm-devel] [PATCH 1/8] block: replace disk based account with bdev's Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-16  5:57   ` Christoph Hellwig
2022-04-16  5:57     ` [dm-devel] " Christoph Hellwig
2022-04-12  8:56 ` [dm-devel] [PATCH 2/8] dm: don't pass bio to __dm_start_io_acct and dm_end_io_acct Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-12  8:56 ` [dm-devel] [PATCH 3/8] dm: pass 'dm_io' instance to dm_io_acct directly Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-12 20:28   ` [dm-devel] " Mike Snitzer
2022-04-12 20:28     ` Mike Snitzer
2022-04-13  1:43     ` Ming Lei
2022-04-13  1:43       ` [dm-devel] " Ming Lei
2022-04-12  8:56 ` [dm-devel] [PATCH 4/8] dm: switch to bdev based io accounting interface Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-12  8:56 ` Ming Lei [this message]
2022-04-12  8:56   ` [PATCH 5/8] dm: always setup ->orig_bio in alloc_io Ming Lei
2022-04-12 20:52   ` [dm-devel] " Mike Snitzer
2022-04-12 20:52     ` Mike Snitzer
2022-04-12 22:38     ` [dm-devel] " Damien Le Moal
2022-04-12 22:38       ` Damien Le Moal
2022-04-12 23:00       ` [dm-devel] " Mike Snitzer
2022-04-12 23:00         ` Mike Snitzer
2022-04-12 23:31         ` [dm-devel] " Damien Le Moal
2022-04-12 23:31           ` Damien Le Moal
2022-04-13  0:00         ` Damien Le Moal
2022-04-13  0:00           ` [dm-devel] " Damien Le Moal
2022-04-13  1:56     ` Ming Lei
2022-04-13  1:56       ` [dm-devel] " Ming Lei
2022-04-13  6:12       ` Mike Snitzer
2022-04-13  6:12         ` [dm-devel] " Mike Snitzer
2022-04-13 12:26         ` Ming Lei
2022-04-13 12:26           ` [dm-devel] " Ming Lei
2022-04-13 17:58           ` Mike Snitzer
2022-04-13 17:58             ` [dm-devel] " Mike Snitzer
2022-04-14  0:36             ` Ming Lei
2022-04-14  0:36               ` [dm-devel] " Ming Lei
2022-04-14  2:25               ` Mike Snitzer
2022-04-14  2:25                 ` [dm-devel] " Mike Snitzer
2022-04-14  3:57                 ` Ming Lei
2022-04-14  3:57                   ` [dm-devel] " Ming Lei
2022-04-14 17:45                   ` Mike Snitzer
2022-04-14 17:45                     ` [dm-devel] " Mike Snitzer
2022-04-15  0:14                     ` Ming Lei
2022-04-15  0:14                       ` [dm-devel] " Ming Lei
2022-04-15 21:06                       ` Mike Snitzer
2022-04-15 21:06                         ` [dm-devel] " Mike Snitzer
2022-04-17  2:22                         ` Ming Lei
2022-04-17  2:22                           ` [dm-devel] " Ming Lei
2022-04-12  8:56 ` [dm-devel] [PATCH 6/8] dm: don't grab target io reference in dm_zone_map_bio Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-12  8:56 ` [dm-devel] [PATCH 7/8] dm: improve target io referencing Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-12  8:56 ` [dm-devel] [PATCH 8/8] dm: put all polled io into one single list Ming Lei
2022-04-12  8:56   ` Ming Lei
2022-04-12 17:15 ` [PATCH 0/8] dm: io accounting & polling improvement Mike Snitzer
2022-04-12 17:15   ` [dm-devel] " Mike Snitzer
2022-04-16  5:58   ` Christoph Hellwig
2022-04-16  5:58     ` [dm-devel] " Christoph Hellwig
2022-04-17  1:23     ` Mike Snitzer
2022-04-17  1:23       ` [dm-devel] " Mike Snitzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220412085616.1409626-6-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=dm-devel@redhat.com \
    --cc=linux-block@vger.kernel.org \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.