linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sergei Shtepa <sergei.shtepa@veeam.com>
To: Christoph Hellwig <hch@infradead.org>,
	Hannes Reinecke <hare@suse.de>, Mike Snitzer <snitzer@redhat.com>,
	Alasdair Kergon <agk@redhat.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Jens Axboe <axboe@kernel.dk>, <dm-devel@redhat.com>,
	<linux-fsdevel@vger.kernel.org>, <linux-block@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: <sergei.shtepa@veeam.com>, <pavel.tide@veeam.com>
Subject: [PATCH v8 2/4] Adds the blk_interposers logic to __submit_bio_noacct().
Date: Fri, 9 Apr 2021 14:48:02 +0300	[thread overview]
Message-ID: <1617968884-15149-3-git-send-email-sergei.shtepa@veeam.com> (raw)
In-Reply-To: <1617968884-15149-1-git-send-email-sergei.shtepa@veeam.com>

* The calling to blk_partition_remap() function has moved
from submit_bio_checks() to submit_bio_noacct().
* The __submit_bio() and __submit_bio_noacct_mq() functions
have been removed and their functionality moved to
submit_bio_noacct().
* Added locking of the block device queue using
the bd_interposer_lock.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 block/bio.c      |   2 +
 block/blk-core.c | 194 ++++++++++++++++++++++++++---------------------
 2 files changed, 108 insertions(+), 88 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 50e579088aca..6fc9e8f395a6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -640,6 +640,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 		bio_set_flag(bio, BIO_THROTTLED);
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
+	if (bio_flagged(bio_src, BIO_INTERPOSED))
+		bio_set_flag(bio, BIO_INTERPOSED);
 	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_write_hint = bio_src->bi_write_hint;
diff --git a/block/blk-core.c b/block/blk-core.c
index fc60ff208497..a987daa76a79 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -735,26 +735,27 @@ static inline int bio_check_eod(struct bio *bio)
 		handle_bad_sector(bio, maxsector);
 		return -EIO;
 	}
+
+	if (unlikely(should_fail_request(bio->bi_bdev, bio->bi_iter.bi_size)))
+		return -EIO;
+
 	return 0;
 }
 
 /*
  * Remap block n of partition p to block n+start(p) of the disk.
  */
-static int blk_partition_remap(struct bio *bio)
+static inline void blk_partition_remap(struct bio *bio)
 {
-	struct block_device *p = bio->bi_bdev;
+	struct block_device *bdev = bio->bi_bdev;
 
-	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
-		return -EIO;
-	if (bio_sectors(bio)) {
-		bio->bi_iter.bi_sector += p->bd_start_sect;
-		trace_block_bio_remap(bio, p->bd_dev,
+	if (bdev->bd_partno && bio_sectors(bio)) {
+		bio->bi_iter.bi_sector += bdev->bd_start_sect;
+		trace_block_bio_remap(bio, bdev->bd_dev,
 				      bio->bi_iter.bi_sector -
-				      p->bd_start_sect);
+				      bdev->bd_start_sect);
 	}
 	bio_set_flag(bio, BIO_REMAPPED);
-	return 0;
 }
 
 /*
@@ -819,8 +820,6 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 	if (!bio_flagged(bio, BIO_REMAPPED)) {
 		if (unlikely(bio_check_eod(bio)))
 			goto end_io;
-		if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
-			goto end_io;
 	}
 
 	/*
@@ -910,20 +909,6 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 	return false;
 }
 
-static blk_qc_t __submit_bio(struct bio *bio)
-{
-	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	blk_qc_t ret = BLK_QC_T_NONE;
-
-	if (blk_crypto_bio_prep(&bio)) {
-		if (!disk->fops->submit_bio)
-			return blk_mq_submit_bio(bio);
-		ret = disk->fops->submit_bio(bio);
-	}
-	blk_queue_exit(disk->queue);
-	return ret;
-}
-
 /*
  * The loop in this function may be a bit non-obvious, and so deserves some
  * explanation:
@@ -931,7 +916,7 @@ static blk_qc_t __submit_bio(struct bio *bio)
  *  - Before entering the loop, bio->bi_next is NULL (as all callers ensure
  *    that), so we have a list with a single bio.
  *  - We pretend that we have just taken it off a longer list, so we assign
- *    bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ *    bio_list to a pointer to the current->bio_list, thus initialising the
  *    bio_list of new bios to be added.  ->submit_bio() may indeed add some more
  *    bios through a recursive call to submit_bio_noacct.  If it did, we find a
  *    non-NULL value in bio_list and re-enter the loop from the top.
@@ -939,83 +924,75 @@ static blk_qc_t __submit_bio(struct bio *bio)
  *    pretending) and so remove it from bio_list, and call into ->submit_bio()
  *    again.
  *
- * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
- * bio_list_on_stack[1] contains bios that were submitted before the current
+ * current->bio_list[0] contains bios submitted by the current ->submit_bio.
+ * current->bio_list[1] contains bios that were submitted before the current
  *	->submit_bio_bio, but that haven't been processed yet.
  */
 static blk_qc_t __submit_bio_noacct(struct bio *bio)
 {
-	struct bio_list bio_list_on_stack[2];
-	blk_qc_t ret = BLK_QC_T_NONE;
-
-	BUG_ON(bio->bi_next);
-
-	bio_list_init(&bio_list_on_stack[0]);
-	current->bio_list = bio_list_on_stack;
-
-	do {
-		struct request_queue *q = bio->bi_bdev->bd_disk->queue;
-		struct bio_list lower, same;
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+	struct bio_list lower, same;
+	blk_qc_t ret;
 
-		if (unlikely(bio_queue_enter(bio) != 0))
-			continue;
+	if (!blk_crypto_bio_prep(&bio)) {
+		blk_queue_exit(disk->queue);
+		return BLK_QC_T_NONE;
+	}
 
-		/*
-		 * Create a fresh bio_list for all subordinate requests.
-		 */
-		bio_list_on_stack[1] = bio_list_on_stack[0];
-		bio_list_init(&bio_list_on_stack[0]);
+	if (queue_is_mq(disk->queue))
+		return blk_mq_submit_bio(bio);
 
-		ret = __submit_bio(bio);
+	/*
+	 * Create a fresh bio_list for all subordinate requests.
+	 */
+	current->bio_list[1] = current->bio_list[0];
+	bio_list_init(&current->bio_list[0]);
 
-		/*
-		 * Sort new bios into those for a lower level and those for the
-		 * same level.
-		 */
-		bio_list_init(&lower);
-		bio_list_init(&same);
-		while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
-			if (q == bio->bi_bdev->bd_disk->queue)
-				bio_list_add(&same, bio);
-			else
-				bio_list_add(&lower, bio);
+	WARN_ON_ONCE(!disk->fops->submit_bio);
+	ret = disk->fops->submit_bio(bio);
+	blk_queue_exit(disk->queue);
+	/*
+	 * Sort new bios into those for a lower level and those
+	 * for the same level.
+	 */
+	bio_list_init(&lower);
+	bio_list_init(&same);
+	while ((bio = bio_list_pop(&current->bio_list[0])) != NULL)
+		if (disk->queue == bio->bi_bdev->bd_disk->queue)
+			bio_list_add(&same, bio);
+		else
+			bio_list_add(&lower, bio);
 
-		/*
-		 * Now assemble so we handle the lowest level first.
-		 */
-		bio_list_merge(&bio_list_on_stack[0], &lower);
-		bio_list_merge(&bio_list_on_stack[0], &same);
-		bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
-	} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+	/*
+	 * Now assemble so we handle the lowest level first.
+	 */
+	bio_list_merge(&current->bio_list[0], &lower);
+	bio_list_merge(&current->bio_list[0], &same);
+	bio_list_merge(&current->bio_list[0], &current->bio_list[1]);
 
-	current->bio_list = NULL;
 	return ret;
 }
 
-static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+static inline struct block_device *bio_interposer_lock(struct bio *bio)
 {
-	struct bio_list bio_list[2] = { };
-	blk_qc_t ret = BLK_QC_T_NONE;
-
-	current->bio_list = bio_list;
-
-	do {
-		struct gendisk *disk = bio->bi_bdev->bd_disk;
-
-		if (unlikely(bio_queue_enter(bio) != 0))
-			continue;
+	bool locked;
+	struct block_device *bdev = bio->bi_bdev;
 
-		if (!blk_crypto_bio_prep(&bio)) {
-			blk_queue_exit(disk->queue);
-			ret = BLK_QC_T_NONE;
-			continue;
+	if (bio->bi_opf & REQ_NOWAIT) {
+		locked = percpu_down_read_trylock(&bdev->bd_interposer_lock);
+		if (unlikely(!locked)) {
+			bio_wouldblock_error(bio);
+			return NULL;
 		}
+	} else
+		percpu_down_read(&bdev->bd_interposer_lock);
 
-		ret = blk_mq_submit_bio(bio);
-	} while ((bio = bio_list_pop(&bio_list[0])));
+	return bdev;
+}
 
-	current->bio_list = NULL;
-	return ret;
+static inline void bio_interposer_unlock(struct block_device *locked_bdev)
+{
+	percpu_up_read(&locked_bdev->bd_interposer_lock);
 }
 
 /**
@@ -1029,6 +1006,10 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
  */
 blk_qc_t submit_bio_noacct(struct bio *bio)
 {
+	struct block_device *locked_bdev;
+	struct bio_list bio_list_on_stack[2] = { };
+	blk_qc_t ret = BLK_QC_T_NONE;
+
 	if (!submit_bio_checks(bio))
 		return BLK_QC_T_NONE;
 
@@ -1043,9 +1024,46 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	if (!bio->bi_bdev->bd_disk->fops->submit_bio)
-		return __submit_bio_noacct_mq(bio);
-	return __submit_bio_noacct(bio);
+	BUG_ON(bio->bi_next);
+
+	locked_bdev = bio_interposer_lock(bio);
+	if (!locked_bdev)
+		return BLK_QC_T_NONE;
+
+	current->bio_list = bio_list_on_stack;
+
+	do {
+		if (unlikely(bio_queue_enter(bio) != 0)) {
+			ret = BLK_QC_T_NONE;
+			continue;
+		}
+
+		if (!bio_flagged(bio, BIO_INTERPOSED) &&
+		    bio->bi_bdev->bd_interposer) {
+			struct gendisk *disk = bio->bi_bdev->bd_disk;
+
+			bio_set_dev(bio, bio->bi_bdev->bd_interposer);
+			bio_set_flag(bio, BIO_INTERPOSED);
+
+			bio_list_add(&bio_list_on_stack[0], bio);
+
+			blk_queue_exit(disk->queue);
+			ret = BLK_QC_T_NONE;
+			continue;
+		}
+
+		if (!bio_flagged(bio, BIO_REMAPPED))
+			blk_partition_remap(bio);
+
+		ret = __submit_bio_noacct(bio);
+
+	} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+	current->bio_list = NULL;
+
+	bio_interposer_unlock(locked_bdev);
+
+	return ret;
 }
 EXPORT_SYMBOL(submit_bio_noacct);
 
-- 
2.20.1


  parent reply	other threads:[~2021-04-09 11:48 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-09 11:48 [PATCH v8 0/4] block device interposer Sergei Shtepa
2021-04-09 11:48 ` [PATCH v8 1/4] Adds blk_interposer. It allows to redirect bio requests to another block device Sergei Shtepa
2021-04-09 11:48 ` Sergei Shtepa [this message]
2021-04-09 11:48 ` [PATCH v8 3/4] Adds blk_interposer to md Sergei Shtepa
2021-04-09 14:12   ` kernel test robot
2021-04-09 14:39   ` kernel test robot
2021-04-09 17:03   ` kernel test robot
2021-04-09 17:03   ` [RFC PATCH] __dm_attach_interposer() can be static kernel test robot
2021-04-09 11:48 ` [PATCH v8 4/4] fix origin_map - don't split a bio for the origin device if it does not have registered snapshots Sergei Shtepa
2021-04-09 15:23 ` [PATCH v8 0/4] block device interposer Mike Snitzer
2021-04-13 10:12   ` Sergei Shtepa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617968884-15149-3-git-send-email-sergei.shtepa@veeam.com \
    --to=sergei.shtepa@veeam.com \
    --cc=agk@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=dm-devel@redhat.com \
    --cc=hare@suse.de \
    --cc=hch@infradead.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pavel.tide@veeam.com \
    --cc=snitzer@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).