All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sergei Shtepa <sergei.shtepa@veeam.com>
To: Christoph Hellwig <hch@infradead.org>,
	Mike Snitzer <snitzer@redhat.com>,
	Alasdair Kergon <agk@redhat.com>, Hannes Reinecke <hare@suse.de>,
	Jens Axboe <axboe@kernel.dk>, <dm-devel@redhat.com>,
	<linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-api@vger.kernel.org>
Cc: <sergei.shtepa@veeam.com>, <pavel.tide@veeam.com>
Subject: [PATCH v7 2/3] block: add bdev_interposer
Date: Fri, 12 Mar 2021 18:44:54 +0300	[thread overview]
Message-ID: <1615563895-28565-3-git-send-email-sergei.shtepa@veeam.com> (raw)
In-Reply-To: <1615563895-28565-1-git-send-email-sergei.shtepa@veeam.com>

bdev_interposer allows to redirect bio requests to another devices.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 block/bio.c               |  2 ++
 block/blk-core.c          | 57 +++++++++++++++++++++++++++++++++++++++
 block/genhd.c             | 54 +++++++++++++++++++++++++++++++++++++
 include/linux/blk_types.h |  3 +++
 include/linux/blkdev.h    |  9 +++++++
 5 files changed, 125 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index a1c4d2900c7a..0bfbf06475ee 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -640,6 +640,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 		bio_set_flag(bio, BIO_THROTTLED);
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
+	if (bio_flagged(bio_src, BIO_INTERPOSED))
+		bio_set_flag(bio, BIO_INTERPOSED);
 	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_write_hint = bio_src->bi_write_hint;
diff --git a/block/blk-core.c b/block/blk-core.c
index fc60ff208497..da1abc4c27a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1018,6 +1018,55 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
 	return ret;
 }
 
+static noinline blk_qc_t submit_bio_interposed(struct bio *bio)
+{
+	blk_qc_t ret = BLK_QC_T_NONE;
+	struct bio_list bio_list[2] = { };
+	struct gendisk *orig_disk;
+
+	if (current->bio_list) {
+		bio_list_add(&current->bio_list[0], bio);
+		return BLK_QC_T_NONE;
+	}
+
+	orig_disk = bio->bi_bdev->bd_disk;
+	if (unlikely(bio_queue_enter(bio)))
+		return BLK_QC_T_NONE;
+
+	current->bio_list = bio_list;
+
+	do {
+		struct block_device *interposer = bio->bi_bdev->bd_interposer;
+
+		if (unlikely(!interposer)) {
+			/* interposer was removed */
+			bio_list_add(&current->bio_list[0], bio);
+			break;
+		}
+		/* assign bio to interposer device */
+		bio_set_dev(bio, interposer);
+		bio_set_flag(bio, BIO_INTERPOSED);
+
+		if (!submit_bio_checks(bio))
+			break;
+		/*
+		 * Because the current->bio_list is initialized,
+		 * the submit_bio callback will always return BLK_QC_T_NONE.
+		 */
+		interposer->bd_disk->fops->submit_bio(bio);
+	} while (false);
+
+	current->bio_list = NULL;
+
+	blk_queue_exit(orig_disk->queue);
+
+	/* Resubmit remaining bios */
+	while ((bio = bio_list_pop(&bio_list[0])))
+		ret = submit_bio_noacct(bio);
+
+	return ret;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -1029,6 +1078,14 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
  */
 blk_qc_t submit_bio_noacct(struct bio *bio)
 {
+	/*
+	 * Checking the BIO_INTERPOSED flag is necessary so that the bio
+	 * created by the bdev_interposer do not get to it for processing.
+	 */
+	if (bdev_has_interposer(bio->bi_bdev) &&
+	    !bio_flagged(bio, BIO_INTERPOSED))
+		return submit_bio_interposed(bio);
+
 	if (!submit_bio_checks(bio))
 		return BLK_QC_T_NONE;
 
diff --git a/block/genhd.c b/block/genhd.c
index c55e8f0fced1..c840ecffea68 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -30,6 +30,11 @@
 static struct kobject *block_depr;
 
 DECLARE_RWSEM(bdev_lookup_sem);
+/*
+ * Prevents different block-layer interposers from attaching or detaching
+ * to the block device at the same time.
+ */
+static DEFINE_MUTEX(bdev_interposer_attach_lock);
 
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT		(1 << MINORBITS)
@@ -1940,3 +1945,52 @@ static void disk_release_events(struct gendisk *disk)
 	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
 	kfree(disk->ev);
 }
+
+int bdev_interposer_attach(struct block_device *original,
+			   struct block_device *interposer)
+{
+	int ret = 0;
+
+	if (WARN_ON(((!original) || (!interposer))))
+		return -EINVAL;
+	/*
+	 * interposer should be simple, no a multi-queue device
+	 */
+	if (!interposer->bd_disk->fops->submit_bio)
+		return -EINVAL;
+
+	if (WARN_ON(!blk_mq_is_queue_frozen(original->bd_disk->queue)))
+		return -EPERM;
+
+	mutex_lock(&bdev_interposer_attach_lock);
+
+	if (bdev_has_interposer(original))
+		ret = -EBUSY;
+	else {
+		original->bd_interposer = bdgrab(interposer);
+		if (!original->bd_interposer)
+			ret = -ENODEV;
+	}
+
+	mutex_unlock(&bdev_interposer_attach_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bdev_interposer_attach);
+
+void bdev_interposer_detach(struct block_device *original)
+{
+	if (WARN_ON(!original))
+		return;
+
+	if (WARN_ON(!blk_mq_is_queue_frozen(original->bd_disk->queue)))
+		return;
+
+	mutex_lock(&bdev_interposer_attach_lock);
+	if (bdev_has_interposer(original)) {
+		bdput(original->bd_interposer);
+		original->bd_interposer = NULL;
+	}
+	mutex_unlock(&bdev_interposer_attach_lock);
+}
+EXPORT_SYMBOL_GPL(bdev_interposer_detach);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db026b6ec15a..13bda4732cf5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -19,6 +19,7 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 struct bio_crypt_ctx;
+struct bdev_interposer;
 
 struct block_device {
 	sector_t		bd_start_sect;
@@ -46,6 +47,7 @@ struct block_device {
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct gendisk *	bd_disk;
 	struct backing_dev_info *bd_bdi;
+	struct block_device     *bd_interposer;
 
 	/* The counter of freeze processes */
 	int			bd_fsfreeze_count;
@@ -304,6 +306,7 @@ enum {
 	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
 	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
 	BIO_REMAPPED,
+	BIO_INTERPOSED,		/* bio was reassigned to another block device */
 	BIO_FLAG_LAST
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bc6bc8383b43..90f62b4197da 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -2031,4 +2031,13 @@ int fsync_bdev(struct block_device *bdev);
 int freeze_bdev(struct block_device *bdev);
 int thaw_bdev(struct block_device *bdev);
 
+static inline bool bdev_has_interposer(struct block_device *bdev)
+{
+	return (bdev->bd_interposer != NULL);
+};
+
+int bdev_interposer_attach(struct block_device *original,
+			   struct block_device *interposer);
+void bdev_interposer_detach(struct block_device *original);
+
 #endif /* _LINUX_BLKDEV_H */
-- 
2.20.1


WARNING: multiple messages have this Message-ID (diff)
From: Sergei Shtepa <sergei.shtepa@veeam.com>
To: Christoph Hellwig <hch@infradead.org>,
	Mike Snitzer <snitzer@redhat.com>,
	 Alasdair Kergon <agk@redhat.com>, Hannes Reinecke <hare@suse.de>,
	Jens Axboe <axboe@kernel.dk>, <dm-devel@redhat.com>,
	<linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-api@vger.kernel.org>
Cc: pavel.tide@veeam.com, sergei.shtepa@veeam.com
Subject: [dm-devel] [PATCH v7 2/3] block: add bdev_interposer
Date: Fri, 12 Mar 2021 18:44:54 +0300	[thread overview]
Message-ID: <1615563895-28565-3-git-send-email-sergei.shtepa@veeam.com> (raw)
In-Reply-To: <1615563895-28565-1-git-send-email-sergei.shtepa@veeam.com>

bdev_interposer allows to redirect bio requests to another devices.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 block/bio.c               |  2 ++
 block/blk-core.c          | 57 +++++++++++++++++++++++++++++++++++++++
 block/genhd.c             | 54 +++++++++++++++++++++++++++++++++++++
 include/linux/blk_types.h |  3 +++
 include/linux/blkdev.h    |  9 +++++++
 5 files changed, 125 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index a1c4d2900c7a..0bfbf06475ee 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -640,6 +640,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 		bio_set_flag(bio, BIO_THROTTLED);
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
+	if (bio_flagged(bio_src, BIO_INTERPOSED))
+		bio_set_flag(bio, BIO_INTERPOSED);
 	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_write_hint = bio_src->bi_write_hint;
diff --git a/block/blk-core.c b/block/blk-core.c
index fc60ff208497..da1abc4c27a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1018,6 +1018,55 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
 	return ret;
 }
 
+static noinline blk_qc_t submit_bio_interposed(struct bio *bio)
+{
+	blk_qc_t ret = BLK_QC_T_NONE;
+	struct bio_list bio_list[2] = { };
+	struct gendisk *orig_disk;
+
+	if (current->bio_list) {
+		bio_list_add(&current->bio_list[0], bio);
+		return BLK_QC_T_NONE;
+	}
+
+	orig_disk = bio->bi_bdev->bd_disk;
+	if (unlikely(bio_queue_enter(bio)))
+		return BLK_QC_T_NONE;
+
+	current->bio_list = bio_list;
+
+	do {
+		struct block_device *interposer = bio->bi_bdev->bd_interposer;
+
+		if (unlikely(!interposer)) {
+			/* interposer was removed */
+			bio_list_add(&current->bio_list[0], bio);
+			break;
+		}
+		/* assign bio to interposer device */
+		bio_set_dev(bio, interposer);
+		bio_set_flag(bio, BIO_INTERPOSED);
+
+		if (!submit_bio_checks(bio))
+			break;
+		/*
+		 * Because the current->bio_list is initialized,
+		 * the submit_bio callback will always return BLK_QC_T_NONE.
+		 */
+		interposer->bd_disk->fops->submit_bio(bio);
+	} while (false);
+
+	current->bio_list = NULL;
+
+	blk_queue_exit(orig_disk->queue);
+
+	/* Resubmit remaining bios */
+	while ((bio = bio_list_pop(&bio_list[0])))
+		ret = submit_bio_noacct(bio);
+
+	return ret;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -1029,6 +1078,14 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
  */
 blk_qc_t submit_bio_noacct(struct bio *bio)
 {
+	/*
+	 * Checking the BIO_INTERPOSED flag is necessary so that the bio
+	 * created by the bdev_interposer do not get to it for processing.
+	 */
+	if (bdev_has_interposer(bio->bi_bdev) &&
+	    !bio_flagged(bio, BIO_INTERPOSED))
+		return submit_bio_interposed(bio);
+
 	if (!submit_bio_checks(bio))
 		return BLK_QC_T_NONE;
 
diff --git a/block/genhd.c b/block/genhd.c
index c55e8f0fced1..c840ecffea68 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -30,6 +30,11 @@
 static struct kobject *block_depr;
 
 DECLARE_RWSEM(bdev_lookup_sem);
+/*
+ * Prevents different block-layer interposers from attaching or detaching
+ * to the block device at the same time.
+ */
+static DEFINE_MUTEX(bdev_interposer_attach_lock);
 
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT		(1 << MINORBITS)
@@ -1940,3 +1945,52 @@ static void disk_release_events(struct gendisk *disk)
 	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
 	kfree(disk->ev);
 }
+
+int bdev_interposer_attach(struct block_device *original,
+			   struct block_device *interposer)
+{
+	int ret = 0;
+
+	if (WARN_ON(((!original) || (!interposer))))
+		return -EINVAL;
+	/*
+	 * interposer should be simple, no a multi-queue device
+	 */
+	if (!interposer->bd_disk->fops->submit_bio)
+		return -EINVAL;
+
+	if (WARN_ON(!blk_mq_is_queue_frozen(original->bd_disk->queue)))
+		return -EPERM;
+
+	mutex_lock(&bdev_interposer_attach_lock);
+
+	if (bdev_has_interposer(original))
+		ret = -EBUSY;
+	else {
+		original->bd_interposer = bdgrab(interposer);
+		if (!original->bd_interposer)
+			ret = -ENODEV;
+	}
+
+	mutex_unlock(&bdev_interposer_attach_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bdev_interposer_attach);
+
+void bdev_interposer_detach(struct block_device *original)
+{
+	if (WARN_ON(!original))
+		return;
+
+	if (WARN_ON(!blk_mq_is_queue_frozen(original->bd_disk->queue)))
+		return;
+
+	mutex_lock(&bdev_interposer_attach_lock);
+	if (bdev_has_interposer(original)) {
+		bdput(original->bd_interposer);
+		original->bd_interposer = NULL;
+	}
+	mutex_unlock(&bdev_interposer_attach_lock);
+}
+EXPORT_SYMBOL_GPL(bdev_interposer_detach);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db026b6ec15a..13bda4732cf5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -19,6 +19,7 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 struct bio_crypt_ctx;
+struct bdev_interposer;
 
 struct block_device {
 	sector_t		bd_start_sect;
@@ -46,6 +47,7 @@ struct block_device {
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct gendisk *	bd_disk;
 	struct backing_dev_info *bd_bdi;
+	struct block_device     *bd_interposer;
 
 	/* The counter of freeze processes */
 	int			bd_fsfreeze_count;
@@ -304,6 +306,7 @@ enum {
 	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
 	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
 	BIO_REMAPPED,
+	BIO_INTERPOSED,		/* bio was reassigned to another block device */
 	BIO_FLAG_LAST
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bc6bc8383b43..90f62b4197da 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -2031,4 +2031,13 @@ int fsync_bdev(struct block_device *bdev);
 int freeze_bdev(struct block_device *bdev);
 int thaw_bdev(struct block_device *bdev);
 
+static inline bool bdev_has_interposer(struct block_device *bdev)
+{
+	return (bdev->bd_interposer != NULL);
+};
+
+int bdev_interposer_attach(struct block_device *original,
+			   struct block_device *interposer);
+void bdev_interposer_detach(struct block_device *original);
+
 #endif /* _LINUX_BLKDEV_H */
-- 
2.20.1

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


  parent reply	other threads:[~2021-03-12 15:46 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-12 15:44 [PATCH v7 0/3] block device interposer Sergei Shtepa
2021-03-12 15:44 ` [dm-devel] " Sergei Shtepa
2021-03-12 15:44 ` [PATCH v7 1/3] block: add blk_mq_is_queue_frozen() Sergei Shtepa
2021-03-12 15:44   ` [dm-devel] " Sergei Shtepa
2021-03-12 19:06   ` Mike Snitzer
2021-03-12 19:06     ` [dm-devel] " Mike Snitzer
2021-03-14  9:14     ` Christoph Hellwig
2021-03-14  9:14       ` [dm-devel] " Christoph Hellwig
2021-03-15 12:06       ` Sergei Shtepa
2021-03-15 12:06         ` [dm-devel] " Sergei Shtepa
2021-03-12 15:44 ` Sergei Shtepa [this message]
2021-03-12 15:44   ` [dm-devel] [PATCH v7 2/3] block: add bdev_interposer Sergei Shtepa
2021-03-14  9:28   ` Christoph Hellwig
2021-03-14  9:28     ` [dm-devel] " Christoph Hellwig
2021-03-15 13:06     ` Sergei Shtepa
2021-03-15 13:06       ` [dm-devel] " Sergei Shtepa
2021-03-16  8:09   ` Ming Lei
2021-03-16  8:09     ` [dm-devel] " Ming Lei
2021-03-16 16:35     ` Sergei Shtepa
2021-03-16 16:35       ` [dm-devel] " Sergei Shtepa
2021-03-17  3:03       ` Ming Lei
2021-03-17  3:03         ` [dm-devel] " Ming Lei
2021-03-17 12:22         ` Sergei Shtepa
2021-03-17 12:22           ` [dm-devel] " Sergei Shtepa
2021-03-17 15:04           ` Mike Snitzer
2021-03-17 15:04             ` [dm-devel] " Mike Snitzer
2021-03-17 18:14             ` Sergei Shtepa
2021-03-17 18:14               ` [dm-devel] " Sergei Shtepa
2021-03-17 19:13               ` Mike Snitzer
2021-03-17 19:13                 ` [dm-devel] " Mike Snitzer
2021-03-18 14:56                 ` Sergei Shtepa
2021-03-18 14:56                   ` [dm-devel] " Sergei Shtepa
2021-03-17 14:58         ` Mike Snitzer
2021-03-17 14:58           ` [dm-devel] " Mike Snitzer
2021-03-12 15:44 ` [PATCH v7 3/3] dm: add DM_INTERPOSED_FLAG Sergei Shtepa
2021-03-12 15:44   ` [dm-devel] " Sergei Shtepa
2021-03-12 19:00   ` Mike Snitzer
2021-03-12 19:00     ` [dm-devel] " Mike Snitzer
2021-03-15 12:29     ` Sergei Shtepa
2021-03-15 12:29       ` [dm-devel] " Sergei Shtepa
2021-03-14  9:30   ` Christoph Hellwig
2021-03-14  9:30     ` [dm-devel] " Christoph Hellwig
2021-03-15 13:25     ` Sergei Shtepa
2021-03-15 13:25       ` [dm-devel] " Sergei Shtepa
2021-03-16 15:23       ` Christoph Hellwig
2021-03-16 15:23         ` [dm-devel] " Christoph Hellwig
2021-03-16 15:25         ` Christoph Hellwig
2021-03-16 15:25           ` [dm-devel] " Christoph Hellwig
2021-03-16 16:20           ` Sergei Shtepa
2021-03-16 16:20             ` [dm-devel] " Sergei Shtepa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1615563895-28565-3-git-send-email-sergei.shtepa@veeam.com \
    --to=sergei.shtepa@veeam.com \
    --cc=agk@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=dm-devel@redhat.com \
    --cc=hare@suse.de \
    --cc=hch@infradead.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pavel.tide@veeam.com \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.