linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sergei Shtepa <sergei.shtepa@veeam.com>
To: <axboe@kernel.dk>, <linux-block@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: Sergei Shtepa <sergei.shtepa@veeam.com>
Subject: [PATCH 01/20] block, blk_filter: enable block device filters
Date: Mon, 13 Jun 2022 18:52:54 +0300	[thread overview]
Message-ID: <1655135593-1900-2-git-send-email-sergei.shtepa@veeam.com> (raw)
In-Reply-To: <1655135593-1900-1-git-send-email-sergei.shtepa@veeam.com>

Allows to attach block device filters to the block devices.
Kernel modules can use this functionality to extend the
capabilities of the block layer.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 block/Kconfig             |   8 +++
 block/bdev.c              | 129 ++++++++++++++++++++++++++++++++++++++
 block/blk-core.c          |  88 ++++++++++++++++++++++++++
 include/linux/blk_types.h |  22 +++++++
 include/linux/blkdev.h    |  81 ++++++++++++++++++++++++
 5 files changed, 328 insertions(+)

diff --git a/block/Kconfig b/block/Kconfig
index 50b17e260fa2..256483e00224 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -225,6 +225,14 @@ config BLK_MQ_RDMA
 config BLK_PM
 	def_bool PM
 
+config BLK_FILTER
+	bool "Enable block device filters"
+	default n
+	help
+	  Enabling this lets the block layer filters handle bio requests.
+	  Kernel modules can use this feature to extend the functionality
+	  of the block layer.
+
 # do not use in new code
 config BLOCK_HOLDER_DEPRECATED
 	bool
diff --git a/block/bdev.c b/block/bdev.c
index 5fe06c1f2def..4bcd9f4378e3 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -426,8 +426,15 @@ static void init_once(void *data)
 	inode_init_once(&ei->vfs_inode);
 }
 
+#ifdef CONFIG_BLK_FILTER
+static void bdev_filter_cleanup(struct block_device *bdev);
+#endif
+
 static void bdev_evict_inode(struct inode *inode)
 {
+#ifdef CONFIG_BLK_FILTER
+	bdev_filter_cleanup(I_BDEV(inode));
+#endif
 	truncate_inode_pages_final(&inode->i_data);
 	invalidate_inode_buffers(inode); /* is it needed here? */
 	clear_inode(inode);
@@ -503,6 +510,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 		return NULL;
 	}
 	bdev->bd_disk = disk;
+
+#ifdef CONFIG_BLK_FILTER
+	memset(bdev->bd_filters, 0, sizeof(bdev->bd_filters));
+	spin_lock_init(&bdev->bd_filters_lock);
+#endif
 	return bdev;
 }
 
@@ -1071,3 +1083,120 @@ void sync_bdevs(bool wait)
 	spin_unlock(&blockdev_superblock->s_inode_list_lock);
 	iput(old_inode);
 }
+
+#ifdef CONFIG_BLK_FILTER
+static void bdev_filter_cleanup(struct block_device *bdev)
+{
+	int altitude;
+	struct bdev_filter *flt;
+
+	for (altitude = 0; altitude < bdev_filter_alt_end; altitude++) {
+		spin_lock(&bdev->bd_filters_lock);
+		flt = bdev->bd_filters[altitude];
+		bdev->bd_filters[altitude] = NULL;
+		spin_unlock(&bdev->bd_filters_lock);
+
+		bdev_filter_put(flt);
+	}
+}
+
+/**
+ * bdev_filter_attach - Attach a filter to the original block device.
+ * @bdev:
+ *	Block device.
+ * @name:
+ *	Name of the block device filter.
+ * @altitude:
+ *	Altituda number of the block device filter.
+ * @flt:
+ *	Pointer to the filter structure.
+ *
+ * Before adding a filter, it is necessary to initialize &struct bdev_filter.
+ *
+ * The bdev_filter_detach() function allows to detach the filter from the block
+ * device.
+ *
+ * Return:
+ * 0 - OK
+ * -EALREADY - a filter with this name already exists
+ */
+int bdev_filter_attach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude,
+		       struct bdev_filter *flt)
+{
+	int ret = 0;
+
+	spin_lock(&bdev->bd_filters_lock);
+	if (bdev->bd_filters[altitude])
+		ret = -EALREADY;
+	else
+		bdev->bd_filters[altitude] = flt;
+	spin_unlock(&bdev->bd_filters_lock);
+
+	if (!ret)
+		pr_info("block device filter '%s' has been attached to %d:%d",
+			name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_attach);
+
+/**
+ * bdev_filter_detach - Detach a filter from the block device.
+ * @bdev:
+ *	Block device.
+ * @name:
+ *	Name of the block device filter.
+ * @altitude:
+ *	Altituda number of the block device filter.
+ *
+ * The filter should be added using the bdev_filter_attach() function.
+ *
+ * Return:
+ * 0 - OK
+ * -ENOENT - the filter was not found in the linked list
+ */
+int bdev_filter_detach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude)
+{
+	struct bdev_filter *flt = NULL;
+
+	spin_lock(&bdev->bd_filters_lock);
+	flt = bdev->bd_filters[altitude];
+	bdev->bd_filters[altitude] = NULL;
+	spin_unlock(&bdev->bd_filters_lock);
+
+	if (!flt)
+		return -ENOENT;
+
+	bdev_filter_put(flt);
+	pr_info("block device filter '%s' has been detached from %d:%d",
+		name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_detach);
+
+/**
+ * bdev_filter_get_by_altitude - Get filter by altitude.
+ * @bdev:
+ *	Pointer to the block device structure.
+ *
+ * Return:
+ * pointer - pointer to filters structure from &struct blk_filter
+ * NULL - no filter has been set
+ */
+struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev,
+				const enum bdev_filter_altitudes altitude)
+{
+	struct bdev_filter *flt;
+
+	spin_lock(&bdev->bd_filters_lock);
+	flt = bdev->bd_filters[altitude];
+	if (flt)
+		bdev_filter_get(flt);
+	spin_unlock(&bdev->bd_filters_lock);
+
+	return flt;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_get_by_altitude);
+#endif
diff --git a/block/blk-core.c b/block/blk-core.c
index 06ff5bbfe8f6..a44906fb08aa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -757,6 +757,86 @@ void submit_bio_noacct_nocheck(struct bio *bio)
 		__submit_bio_noacct(bio);
 }
 
+#ifdef CONFIG_BLK_FILTER
+
+/**
+ * __filter_bio() - Process bio by the block device filter.
+ * @flt:
+ *	Block device filter.
+ * @bio:
+ *	Original I/O unit.
+ *
+ * Return:
+ * bdev_filter_pass - original bio should be submitted
+ * bdev_filter_skip - do not submit original bio
+ * bdev_filter_redirect - repeat bio processing for another block device
+ */
+static inline enum bdev_filter_result __filter_bio(struct bdev_filter *flt,
+						   struct bio *bio)
+{
+	enum bdev_filter_result result;
+	struct bio *new_bio;
+	struct bio_list bio_list[2] = { };
+
+	do {
+		bio_list_init(&bio_list[0]);
+		current->bio_list = bio_list;
+
+		result = flt->fops->submit_bio_cb(bio, flt);
+
+		current->bio_list = NULL;
+
+		while ((new_bio = bio_list_pop(&bio_list[0]))) {
+			bio_set_flag(new_bio, BIO_FILTERED);
+			submit_bio_noacct(new_bio);
+		};
+	} while (result == bdev_filter_repeat);
+
+	return result;
+}
+
+/**
+ * filter_bio() - Pass bio to the block device filters.
+ * @bio:
+ *	Original I/O unit.
+ *
+ * Return:
+ * true - original bio should be submitted
+ * false - do not submit original bio
+ */
+static bool filter_bio(struct bio *bio)
+{
+	enum bdev_filter_result result = bdev_filter_pass;
+
+	if (bio_flagged(bio, BIO_FILTERED))
+		return true;
+	do {
+		struct block_device *bdev = bio->bi_bdev;
+		unsigned int altitude = 0;
+
+		while (altitude < bdev_filter_alt_end) {
+			struct bdev_filter *flt;
+
+			spin_lock(&bdev->bd_filters_lock);
+			flt = bdev->bd_filters[altitude];
+			if (flt)
+				bdev_filter_get(flt);
+			spin_unlock(&bdev->bd_filters_lock);
+
+			if (flt) {
+				result = __filter_bio(flt, bio);
+				bdev_filter_put(flt);
+				if (result != bdev_filter_pass)
+					break;
+			}
+			altitude++;
+		}
+	} while (result == bdev_filter_redirect);
+
+	return (result == bdev_filter_pass);
+}
+#endif
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -790,6 +870,14 @@ void submit_bio_noacct(struct bio *bio)
 		goto end_io;
 	if (unlikely(bio_check_ro(bio)))
 		goto end_io;
+#ifdef CONFIG_BLK_FILTER
+	/*
+	 * It looks like should_fail_bio() and bio_check_ro() can be placed
+	 * in a separate block device filter for debugging.
+	 */
+	if (!filter_bio(bio))
+		goto end_io;
+#endif
 	if (!bio_flagged(bio, BIO_REMAPPED)) {
 		if (unlikely(bio_check_eod(bio)))
 			goto end_io;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a24d4078fb21..b88f506ea59e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -37,6 +37,23 @@ struct bio_crypt_ctx;
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
+#ifdef CONFIG_BLK_FILTER
+/**
+ * enum bdev_filter_altitudes - Set of reserved altitudes for block device
+ *	filters.
+ *
+ * @bdev_filter_alt_blksnap:
+ *	An altitude for the blksnap module.
+ * @bdev_filter_alt_end:
+ *	Indicates the end of the altitude set.
+ */
+enum bdev_filter_altitudes {
+	bdev_filter_alt_blksnap = 0,
+	bdev_filter_alt_end
+};
+struct bdev_filter;
+#endif
+
 struct block_device {
 	sector_t		bd_start_sect;
 	sector_t		bd_nr_sectors;
@@ -68,6 +85,10 @@ struct block_device {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	bool			bd_make_it_fail;
 #endif
+#ifdef CONFIG_BLK_FILTER
+	struct bdev_filter	*bd_filters[bdev_filter_alt_end];
+	spinlock_t		bd_filters_lock;
+#endif
 } __randomize_layout;
 
 #define bdev_whole(_bdev) \
@@ -332,6 +353,7 @@ enum {
 	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
+	BIO_FILTERED,		/* bio has already been filtered */
 	BIO_FLAG_LAST
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 608d577734c2..24cb5293897f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1573,4 +1573,85 @@ struct io_comp_batch {
 
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
+#ifdef CONFIG_BLK_FILTER
+/**
+ * enum bdev_filter_result - The result of bio processing by
+ *	the block device filter.
+ *
+ * @bdev_filter_skip:
+ *	Original bio does not need to be submitted.
+ * @bdev_filter_pass:
+ *	It is necessary to submit the original request.
+ * @bdev_filter_repeat:
+ *	Bio processing has not been completed, a second call is required.
+ * @bdev_filter_redirect:
+ *	Original bio was redirected to another block device. The set
+ *	of filters on it is different, so processing must be repeated.
+ */
+enum bdev_filter_result {
+	bdev_filter_skip = 0,
+	bdev_filter_pass,
+	bdev_filter_repeat,
+	bdev_filter_redirect
+};
+struct bdev_filter;
+/**
+ * bdev_filter_operations - List of callback functions for the filter.
+ *
+ * @submit_bio_cb:
+ *	A callback function for bio processing.
+ * @detach_cb:
+ *	A callback function to disable the filter when removing a block
+ *	device from the system.
+ */
+struct bdev_filter_operations {
+	enum bdev_filter_result (*submit_bio_cb)(struct bio *bio,
+						 struct bdev_filter *flt);
+	void (*detach_cb)(struct kref *kref);
+};
+/**
+ * struct bdev_filter - Block device filter.
+ *
+ * @kref:
+ *	Kernel reference counter.
+ * @fops:
+ *	The pointer to &struct bdev_filter_operations with callback
+ *	functions for the filter.
+ */
+struct bdev_filter {
+	struct kref kref;
+	const struct bdev_filter_operations *fops;
+};
+/**
+ * bdev_filter_init - Initialization of the filter structure.
+ * @flt:
+ *	Pointer to the &struct bdev_filter to be initialized.
+ * @fops:
+ *	The callback functions for the filter.
+ */
+static inline void bdev_filter_init(struct bdev_filter *flt,
+		const struct bdev_filter_operations *fops)
+{
+	kref_init(&flt->kref);
+	flt->fops = fops;
+};
+int bdev_filter_attach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude,
+		       struct bdev_filter *flt);
+int bdev_filter_detach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude);
+struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev,
+		       const enum bdev_filter_altitudes altitude);
+static inline void bdev_filter_get(struct bdev_filter *flt)
+{
+	kref_get(&flt->kref);
+}
+static inline void bdev_filter_put(struct bdev_filter *flt)
+{
+	if (flt)
+		kref_put(&flt->kref, flt->fops->detach_cb);
+};
+
+#endif
+
 #endif /* _LINUX_BLKDEV_H */
-- 
2.20.1


  reply	other threads:[~2022-06-13 18:55 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-13 15:52 [PATCH 00/20] blksnap - creating non-persistent snapshots for backup Sergei Shtepa
2022-06-13 15:52 ` Sergei Shtepa [this message]
2022-06-13 21:50   ` [PATCH 01/20] block, blk_filter: enable block device filters Randy Dunlap
2022-06-14  9:19     ` Sergei Shtepa
2022-06-14  9:21     ` Sergei Shtepa
2022-06-14  9:01   ` kernel test robot
2022-07-06 12:59   ` Christoph Hellwig
2022-07-07  8:26     ` Sergei Shtepa
2022-07-07 17:26       ` Christoph Hellwig
2022-07-08 10:45         ` Sergei Shtepa
2022-07-13 11:56           ` Christoph Hellwig
2022-07-13 13:47             ` Sergei Shtepa
2022-07-14  5:12               ` Christoph Hellwig
2022-07-14  9:22                 ` Sergei Shtepa
2022-06-13 15:52 ` [PATCH 02/20] block, blksnap: header file of the module interface Sergei Shtepa
2022-07-06 13:03   ` Christoph Hellwig
2022-06-13 15:52 ` [PATCH 03/20] block, blksnap: module management interface functions Sergei Shtepa
2022-06-13 22:44   ` Chaitanya Kulkarni
2022-06-13 15:52 ` [PATCH 04/20] block, blksnap: init() and exit() functions Sergei Shtepa
2022-06-13 15:52 ` [PATCH 05/20] block, blksnap: interaction with sysfs Sergei Shtepa
2022-06-13 15:52 ` [PATCH 06/20] block, blksnap: attaching and detaching the filter and handling a bios Sergei Shtepa
2022-06-13 15:53 ` [PATCH 07/20] block, blksnap: map of change block tracking Sergei Shtepa
2022-06-13 15:53 ` [PATCH 08/20] block, blksnap: big buffer in the form of an array of pages Sergei Shtepa
2022-07-06 13:09   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 09/20] block, blksnap: minimum data storage unit of the original block device Sergei Shtepa
2022-06-13 15:53 ` [PATCH 10/20] block, blksnap: buffer in memory for the minimum data storage unit Sergei Shtepa
2022-06-13 15:53 ` [PATCH 11/20] block, blksnap: functions and structures for performing block I/O operations Sergei Shtepa
2022-07-07 17:33   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 12/20] block, blksnap: storage for storing difference blocks Sergei Shtepa
2022-06-13 15:53 ` [PATCH 13/20] block, blksnap: event queue from the difference storage Sergei Shtepa
2022-06-13 15:53 ` [PATCH 14/20] block, blksnap: owner of information about overwritten blocks of the original block device Sergei Shtepa
2022-06-13 15:53 ` [PATCH 15/20] block, blksnap: snapshot image " Sergei Shtepa
2022-07-06 13:13   ` Christoph Hellwig
2022-07-07  9:16     ` Sergei Shtepa
2022-07-07 17:24       ` Christoph Hellwig
2022-07-08  7:58         ` Sergei Shtepa
2022-07-08  8:04           ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 16/20] block, blksnap: snapshot Sergei Shtepa
2022-06-13 15:53 ` [PATCH 17/20] block, blksnap: debugging mechanism for monitoring memory consumption Sergei Shtepa
2022-06-13 15:53 ` [PATCH 18/20] block, blksnap: Kconfig Sergei Shtepa
2022-06-13 21:38   ` Randy Dunlap
2022-07-06 13:06   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 19/20] block, blksnap: Makefile Sergei Shtepa
2022-07-06 13:06   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 20/20] block, blksnap: adds a blksnap to the kernel tree Sergei Shtepa
2022-06-18  2:11   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1655135593-1900-2-git-send-email-sergei.shtepa@veeam.com \
    --to=sergei.shtepa@veeam.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).