All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sergei Shtepa <sergei.shtepa@veeam.com>
To: <axboe@kernel.dk>, <linux-block@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Cc: Sergei Shtepa <sergei.shtepa@veeam.com>
Subject: [PATCH 01/20] block, blk_filter: enable block device filters
Date: Mon, 13 Jun 2022 18:52:54 +0300	[thread overview]
Message-ID: <1655135593-1900-2-git-send-email-sergei.shtepa@veeam.com> (raw)
In-Reply-To: <1655135593-1900-1-git-send-email-sergei.shtepa@veeam.com>

Allows to attach block device filters to the block devices.
Kernel modules can use this functionality to extend the
capabilities of the block layer.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 block/Kconfig             |   8 +++
 block/bdev.c              | 129 ++++++++++++++++++++++++++++++++++++++
 block/blk-core.c          |  88 ++++++++++++++++++++++++++
 include/linux/blk_types.h |  22 +++++++
 include/linux/blkdev.h    |  81 ++++++++++++++++++++++++
 5 files changed, 328 insertions(+)

diff --git a/block/Kconfig b/block/Kconfig
index 50b17e260fa2..256483e00224 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -225,6 +225,14 @@ config BLK_MQ_RDMA
 config BLK_PM
 	def_bool PM
 
+config BLK_FILTER
+	bool "Enable block device filters"
+	default n
+	help
+	  Enabling this lets the block layer filters handle bio requests.
+	  Kernel modules can use this feature to extend the functionality
+	  of the block layer.
+
 # do not use in new code
 config BLOCK_HOLDER_DEPRECATED
 	bool
diff --git a/block/bdev.c b/block/bdev.c
index 5fe06c1f2def..4bcd9f4378e3 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -426,8 +426,15 @@ static void init_once(void *data)
 	inode_init_once(&ei->vfs_inode);
 }
 
+#ifdef CONFIG_BLK_FILTER
+static void bdev_filter_cleanup(struct block_device *bdev);
+#endif
+
 static void bdev_evict_inode(struct inode *inode)
 {
+#ifdef CONFIG_BLK_FILTER
+	bdev_filter_cleanup(I_BDEV(inode));
+#endif
 	truncate_inode_pages_final(&inode->i_data);
 	invalidate_inode_buffers(inode); /* is it needed here? */
 	clear_inode(inode);
@@ -503,6 +510,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 		return NULL;
 	}
 	bdev->bd_disk = disk;
+
+#ifdef CONFIG_BLK_FILTER
+	memset(bdev->bd_filters, 0, sizeof(bdev->bd_filters));
+	spin_lock_init(&bdev->bd_filters_lock);
+#endif
 	return bdev;
 }
 
@@ -1071,3 +1083,120 @@ void sync_bdevs(bool wait)
 	spin_unlock(&blockdev_superblock->s_inode_list_lock);
 	iput(old_inode);
 }
+
+#ifdef CONFIG_BLK_FILTER
+static void bdev_filter_cleanup(struct block_device *bdev)
+{
+	int altitude;
+	struct bdev_filter *flt;
+
+	for (altitude = 0; altitude < bdev_filter_alt_end; altitude++) {
+		spin_lock(&bdev->bd_filters_lock);
+		flt = bdev->bd_filters[altitude];
+		bdev->bd_filters[altitude] = NULL;
+		spin_unlock(&bdev->bd_filters_lock);
+
+		bdev_filter_put(flt);
+	}
+}
+
+/**
+ * bdev_filter_attach - Attach a filter to the original block device.
+ * @bdev:
+ *	Block device.
+ * @name:
+ *	Name of the block device filter.
+ * @altitude:
+ *	Altituda number of the block device filter.
+ * @flt:
+ *	Pointer to the filter structure.
+ *
+ * Before adding a filter, it is necessary to initialize &struct bdev_filter.
+ *
+ * The bdev_filter_detach() function allows to detach the filter from the block
+ * device.
+ *
+ * Return:
+ * 0 - OK
+ * -EALREADY - a filter with this name already exists
+ */
+int bdev_filter_attach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude,
+		       struct bdev_filter *flt)
+{
+	int ret = 0;
+
+	spin_lock(&bdev->bd_filters_lock);
+	if (bdev->bd_filters[altitude])
+		ret = -EALREADY;
+	else
+		bdev->bd_filters[altitude] = flt;
+	spin_unlock(&bdev->bd_filters_lock);
+
+	if (!ret)
+		pr_info("block device filter '%s' has been attached to %d:%d",
+			name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_attach);
+
+/**
+ * bdev_filter_detach - Detach a filter from the block device.
+ * @bdev:
+ *	Block device.
+ * @name:
+ *	Name of the block device filter.
+ * @altitude:
+ *	Altituda number of the block device filter.
+ *
+ * The filter should be added using the bdev_filter_attach() function.
+ *
+ * Return:
+ * 0 - OK
+ * -ENOENT - the filter was not found in the linked list
+ */
+int bdev_filter_detach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude)
+{
+	struct bdev_filter *flt = NULL;
+
+	spin_lock(&bdev->bd_filters_lock);
+	flt = bdev->bd_filters[altitude];
+	bdev->bd_filters[altitude] = NULL;
+	spin_unlock(&bdev->bd_filters_lock);
+
+	if (!flt)
+		return -ENOENT;
+
+	bdev_filter_put(flt);
+	pr_info("block device filter '%s' has been detached from %d:%d",
+		name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_detach);
+
+/**
+ * bdev_filter_get_by_altitude - Get filter by altitude.
+ * @bdev:
+ *	Pointer to the block device structure.
+ *
+ * Return:
+ * pointer - pointer to filters structure from &struct blk_filter
+ * NULL - no filter has been set
+ */
+struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev,
+				const enum bdev_filter_altitudes altitude)
+{
+	struct bdev_filter *flt;
+
+	spin_lock(&bdev->bd_filters_lock);
+	flt = bdev->bd_filters[altitude];
+	if (flt)
+		bdev_filter_get(flt);
+	spin_unlock(&bdev->bd_filters_lock);
+
+	return flt;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_get_by_altitude);
+#endif
diff --git a/block/blk-core.c b/block/blk-core.c
index 06ff5bbfe8f6..a44906fb08aa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -757,6 +757,86 @@ void submit_bio_noacct_nocheck(struct bio *bio)
 		__submit_bio_noacct(bio);
 }
 
+#ifdef CONFIG_BLK_FILTER
+
+/**
+ * __filter_bio() - Process bio by the block device filter.
+ * @flt:
+ *	Block device filter.
+ * @bio:
+ *	Original I/O unit.
+ *
+ * Return:
+ * bdev_filter_pass - original bio should be submitted
+ * bdev_filter_skip - do not submit original bio
+ * bdev_filter_redirect - repeat bio processing for another block device
+ */
+static inline enum bdev_filter_result __filter_bio(struct bdev_filter *flt,
+						   struct bio *bio)
+{
+	enum bdev_filter_result result;
+	struct bio *new_bio;
+	struct bio_list bio_list[2] = { };
+
+	do {
+		bio_list_init(&bio_list[0]);
+		current->bio_list = bio_list;
+
+		result = flt->fops->submit_bio_cb(bio, flt);
+
+		current->bio_list = NULL;
+
+		while ((new_bio = bio_list_pop(&bio_list[0]))) {
+			bio_set_flag(new_bio, BIO_FILTERED);
+			submit_bio_noacct(new_bio);
+		};
+	} while (result == bdev_filter_repeat);
+
+	return result;
+}
+
+/**
+ * filter_bio() - Pass bio to the block device filters.
+ * @bio:
+ *	Original I/O unit.
+ *
+ * Return:
+ * true - original bio should be submitted
+ * false - do not submit original bio
+ */
+static bool filter_bio(struct bio *bio)
+{
+	enum bdev_filter_result result = bdev_filter_pass;
+
+	if (bio_flagged(bio, BIO_FILTERED))
+		return true;
+	do {
+		struct block_device *bdev = bio->bi_bdev;
+		unsigned int altitude = 0;
+
+		while (altitude < bdev_filter_alt_end) {
+			struct bdev_filter *flt;
+
+			spin_lock(&bdev->bd_filters_lock);
+			flt = bdev->bd_filters[altitude];
+			if (flt)
+				bdev_filter_get(flt);
+			spin_unlock(&bdev->bd_filters_lock);
+
+			if (flt) {
+				result = __filter_bio(flt, bio);
+				bdev_filter_put(flt);
+				if (result != bdev_filter_pass)
+					break;
+			}
+			altitude++;
+		}
+	} while (result == bdev_filter_redirect);
+
+	return (result == bdev_filter_pass);
+}
+#endif
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -790,6 +870,14 @@ void submit_bio_noacct(struct bio *bio)
 		goto end_io;
 	if (unlikely(bio_check_ro(bio)))
 		goto end_io;
+#ifdef CONFIG_BLK_FILTER
+	/*
+	 * It looks like should_fail_bio() and bio_check_ro() can be placed
+	 * in a separate block device filter for debugging.
+	 */
+	if (!filter_bio(bio))
+		goto end_io;
+#endif
 	if (!bio_flagged(bio, BIO_REMAPPED)) {
 		if (unlikely(bio_check_eod(bio)))
 			goto end_io;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a24d4078fb21..b88f506ea59e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -37,6 +37,23 @@ struct bio_crypt_ctx;
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
+#ifdef CONFIG_BLK_FILTER
+/**
+ * enum bdev_filter_altitudes - Set of reserved altitudes for block device
+ *	filters.
+ *
+ * @bdev_filter_alt_blksnap:
+ *	An altitude for the blksnap module.
+ * @bdev_filter_alt_end:
+ *	Indicates the end of the altitude set.
+ */
+enum bdev_filter_altitudes {
+	bdev_filter_alt_blksnap = 0,
+	bdev_filter_alt_end
+};
+struct bdev_filter;
+#endif
+
 struct block_device {
 	sector_t		bd_start_sect;
 	sector_t		bd_nr_sectors;
@@ -68,6 +85,10 @@ struct block_device {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	bool			bd_make_it_fail;
 #endif
+#ifdef CONFIG_BLK_FILTER
+	struct bdev_filter	*bd_filters[bdev_filter_alt_end];
+	spinlock_t		bd_filters_lock;
+#endif
 } __randomize_layout;
 
 #define bdev_whole(_bdev) \
@@ -332,6 +353,7 @@ enum {
 	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
+	BIO_FILTERED,		/* bio has already been filtered */
 	BIO_FLAG_LAST
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 608d577734c2..24cb5293897f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1573,4 +1573,85 @@ struct io_comp_batch {
 
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
+#ifdef CONFIG_BLK_FILTER
+/**
+ * enum bdev_filter_result - The result of bio processing by
+ *	the block device filter.
+ *
+ * @bdev_filter_skip:
+ *	Original bio does not need to be submitted.
+ * @bdev_filter_pass:
+ *	It is necessary to submit the original request.
+ * @bdev_filter_repeat:
+ *	Bio processing has not been completed, a second call is required.
+ * @bdev_filter_redirect:
+ *	Original bio was redirected to another block device. The set
+ *	of filters on it is different, so processing must be repeated.
+ */
+enum bdev_filter_result {
+	bdev_filter_skip = 0,
+	bdev_filter_pass,
+	bdev_filter_repeat,
+	bdev_filter_redirect
+};
+struct bdev_filter;
+/**
+ * bdev_filter_operations - List of callback functions for the filter.
+ *
+ * @submit_bio_cb:
+ *	A callback function for bio processing.
+ * @detach_cb:
+ *	A callback function to disable the filter when removing a block
+ *	device from the system.
+ */
+struct bdev_filter_operations {
+	enum bdev_filter_result (*submit_bio_cb)(struct bio *bio,
+						 struct bdev_filter *flt);
+	void (*detach_cb)(struct kref *kref);
+};
+/**
+ * struct bdev_filter - Block device filter.
+ *
+ * @kref:
+ *	Kernel reference counter.
+ * @fops:
+ *	The pointer to &struct bdev_filter_operations with callback
+ *	functions for the filter.
+ */
+struct bdev_filter {
+	struct kref kref;
+	const struct bdev_filter_operations *fops;
+};
+/**
+ * bdev_filter_init - Initialization of the filter structure.
+ * @flt:
+ *	Pointer to the &struct bdev_filter to be initialized.
+ * @fops:
+ *	The callback functions for the filter.
+ */
+static inline void bdev_filter_init(struct bdev_filter *flt,
+		const struct bdev_filter_operations *fops)
+{
+	kref_init(&flt->kref);
+	flt->fops = fops;
+};
+int bdev_filter_attach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude,
+		       struct bdev_filter *flt);
+int bdev_filter_detach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude);
+struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev,
+		       const enum bdev_filter_altitudes altitude);
+static inline void bdev_filter_get(struct bdev_filter *flt)
+{
+	kref_get(&flt->kref);
+}
+static inline void bdev_filter_put(struct bdev_filter *flt)
+{
+	if (flt)
+		kref_put(&flt->kref, flt->fops->detach_cb);
+};
+
+#endif
+
 #endif /* _LINUX_BLKDEV_H */
-- 
2.20.1


  reply	other threads:[~2022-06-13 18:55 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-13 15:52 [PATCH 00/20] blksnap - creating non-persistent snapshots for backup Sergei Shtepa
2022-06-13 15:52 ` Sergei Shtepa [this message]
2022-06-13 21:50   ` [PATCH 01/20] block, blk_filter: enable block device filters Randy Dunlap
2022-06-14  9:19     ` Sergei Shtepa
2022-06-14  9:21     ` Sergei Shtepa
2022-06-14  9:01   ` kernel test robot
2022-07-06 12:59   ` Christoph Hellwig
2022-07-07  8:26     ` Sergei Shtepa
2022-07-07 17:26       ` Christoph Hellwig
2022-07-08 10:45         ` Sergei Shtepa
2022-07-13 11:56           ` Christoph Hellwig
2022-07-13 13:47             ` Sergei Shtepa
2022-07-14  5:12               ` Christoph Hellwig
2022-07-14  9:22                 ` Sergei Shtepa
2022-06-13 15:52 ` [PATCH 02/20] block, blksnap: header file of the module interface Sergei Shtepa
2022-07-06 13:03   ` Christoph Hellwig
2022-06-13 15:52 ` [PATCH 03/20] block, blksnap: module management interface functions Sergei Shtepa
2022-06-13 22:44   ` Chaitanya Kulkarni
2022-06-13 15:52 ` [PATCH 04/20] block, blksnap: init() and exit() functions Sergei Shtepa
2022-06-13 15:52 ` [PATCH 05/20] block, blksnap: interaction with sysfs Sergei Shtepa
2022-06-13 15:52 ` [PATCH 06/20] block, blksnap: attaching and detaching the filter and handling a bios Sergei Shtepa
2022-06-13 15:53 ` [PATCH 07/20] block, blksnap: map of change block tracking Sergei Shtepa
2022-06-13 15:53 ` [PATCH 08/20] block, blksnap: big buffer in the form of an array of pages Sergei Shtepa
2022-07-06 13:09   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 09/20] block, blksnap: minimum data storage unit of the original block device Sergei Shtepa
2022-06-13 15:53 ` [PATCH 10/20] block, blksnap: buffer in memory for the minimum data storage unit Sergei Shtepa
2022-06-13 15:53 ` [PATCH 11/20] block, blksnap: functions and structures for performing block I/O operations Sergei Shtepa
2022-07-07 17:33   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 12/20] block, blksnap: storage for storing difference blocks Sergei Shtepa
2022-06-13 15:53 ` [PATCH 13/20] block, blksnap: event queue from the difference storage Sergei Shtepa
2022-06-13 15:53 ` [PATCH 14/20] block, blksnap: owner of information about overwritten blocks of the original block device Sergei Shtepa
2022-06-13 15:53 ` [PATCH 15/20] block, blksnap: snapshot image " Sergei Shtepa
2022-07-06 13:13   ` Christoph Hellwig
2022-07-07  9:16     ` Sergei Shtepa
2022-07-07 17:24       ` Christoph Hellwig
2022-07-08  7:58         ` Sergei Shtepa
2022-07-08  8:04           ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 16/20] block, blksnap: snapshot Sergei Shtepa
2022-06-13 15:53 ` [PATCH 17/20] block, blksnap: debugging mechanism for monitoring memory consumption Sergei Shtepa
2022-06-13 15:53 ` [PATCH 18/20] block, blksnap: Kconfig Sergei Shtepa
2022-06-13 21:38   ` Randy Dunlap
2022-07-06 13:06   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 19/20] block, blksnap: Makefile Sergei Shtepa
2022-07-06 13:06   ` Christoph Hellwig
2022-06-13 15:53 ` [PATCH 20/20] block, blksnap: adds a blksnap to the kernel tree Sergei Shtepa
2022-06-18  2:11   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1655135593-1900-2-git-send-email-sergei.shtepa@veeam.com \
    --to=sergei.shtepa@veeam.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.