linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sergei Shtepa <sergei.shtepa@veeam.com>
To: <axboe@kernel.dk>, <corbet@lwn.net>
Cc: <linux-block@vger.kernel.org>, <linux-doc@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>,
	Sergei Shtepa <sergei.shtepa@veeam.com>
Subject: [PATCH v2 14/21] block, blksnap: storage for storing difference blocks
Date: Fri, 9 Dec 2022 15:23:24 +0100	[thread overview]
Message-ID: <20221209142331.26395-15-sergei.shtepa@veeam.com> (raw)
In-Reply-To: <20221209142331.26395-1-sergei.shtepa@veeam.com>

Provides management of regions of block devices available for storing
difference blocks of a snapshot. Contains lists of free and already
occupied regions.

Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 drivers/block/blksnap/diff_storage.c | 317 +++++++++++++++++++++++++++
 drivers/block/blksnap/diff_storage.h |  93 ++++++++
 2 files changed, 410 insertions(+)
 create mode 100644 drivers/block/blksnap/diff_storage.c
 create mode 100644 drivers/block/blksnap/diff_storage.h

diff --git a/drivers/block/blksnap/diff_storage.c b/drivers/block/blksnap/diff_storage.c
new file mode 100644
index 000000000000..20e61963237d
--- /dev/null
+++ b/drivers/block/blksnap/diff_storage.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME "-diff-storage: " fmt
+
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/blksnap.h>
+#include "params.h"
+#include "chunk.h"
+#include "diff_io.h"
+#include "diff_buffer.h"
+#include "diff_storage.h"
+
+/**
+ * struct storage_bdev - Information about the opened block device.
+ *
+ * @link:
+ *	Allows to combine structures into a linked list.
+ * @dev_id:
+ *	ID of the block device.
+ * @bdev:
+ *	A pointer to an open block device.
+ */
+struct storage_bdev {
+	struct list_head link;
+	dev_t dev_id;
+	struct block_device *bdev;
+};
+
+/**
+ * struct storage_block - A storage unit reserved for storing differences.
+ *
+ * @link:
+ *	Allows to combine structures into a linked list.
+ * @bdev:
+ *	A pointer to a block device.
+ * @sector:
+ *	The number of the first sector of the range of allocated space for
+ *	storing the difference.
+ * @count:
+ *	The count of sectors in the range of allocated space for storing the
+ *	difference.
+ * @used:
+ *	The count of used sectors in the range of allocated space for storing
+ *	the difference.
+ */
+struct storage_block {
+	struct list_head link;
+	struct block_device *bdev;
+	sector_t sector;
+	sector_t count;
+	sector_t used;
+};
+
+static inline void diff_storage_event_low(struct diff_storage *diff_storage)
+{
+	struct blk_snap_event_low_free_space data = {
+		.requested_nr_sect = diff_storage_minimum,
+	};
+
+	diff_storage->requested += data.requested_nr_sect;
+	pr_debug("Diff storage low free space. Portion: %llu sectors, requested: %llu\n",
+		data.requested_nr_sect, diff_storage->requested);
+	event_gen(&diff_storage->event_queue, GFP_NOIO,
+		  blk_snap_event_code_low_free_space, &data, sizeof(data));
+}
+
+struct diff_storage *diff_storage_new(void)
+{
+	struct diff_storage *diff_storage;
+
+	diff_storage = kzalloc(sizeof(struct diff_storage), GFP_KERNEL);
+	if (!diff_storage)
+		return NULL;
+
+	kref_init(&diff_storage->kref);
+	spin_lock_init(&diff_storage->lock);
+	INIT_LIST_HEAD(&diff_storage->storage_bdevs);
+	INIT_LIST_HEAD(&diff_storage->empty_blocks);
+	INIT_LIST_HEAD(&diff_storage->filled_blocks);
+
+	event_queue_init(&diff_storage->event_queue);
+	diff_storage_event_low(diff_storage);
+
+	return diff_storage;
+}
+
+static inline struct storage_block *
+first_empty_storage_block(struct diff_storage *diff_storage)
+{
+	return list_first_entry_or_null(&diff_storage->empty_blocks,
+					struct storage_block, link);
+};
+
+static inline struct storage_block *
+first_filled_storage_block(struct diff_storage *diff_storage)
+{
+	return list_first_entry_or_null(&diff_storage->filled_blocks,
+					struct storage_block, link);
+};
+
+static inline struct storage_bdev *
+first_storage_bdev(struct diff_storage *diff_storage)
+{
+	return list_first_entry_or_null(&diff_storage->storage_bdevs,
+					struct storage_bdev, link);
+};
+
+void diff_storage_free(struct kref *kref)
+{
+	struct diff_storage *diff_storage =
+		container_of(kref, struct diff_storage, kref);
+	struct storage_block *blk;
+	struct storage_bdev *storage_bdev;
+
+	while ((blk = first_empty_storage_block(diff_storage))) {
+		list_del(&blk->link);
+		kfree(blk);
+	}
+
+	while ((blk = first_filled_storage_block(diff_storage))) {
+		list_del(&blk->link);
+		kfree(blk);
+	}
+
+	while ((storage_bdev = first_storage_bdev(diff_storage))) {
+		blkdev_put(storage_bdev->bdev, FMODE_READ | FMODE_WRITE);
+		list_del(&storage_bdev->link);
+		kfree(storage_bdev);
+	}
+	event_queue_done(&diff_storage->event_queue);
+
+	kfree(diff_storage);
+}
+
+static struct block_device *
+diff_storage_bdev_by_id(struct diff_storage *diff_storage, dev_t dev_id)
+{
+	struct block_device *bdev = NULL;
+	struct storage_bdev *storage_bdev;
+
+	spin_lock(&diff_storage->lock);
+	list_for_each_entry(storage_bdev, &diff_storage->storage_bdevs, link) {
+		if (storage_bdev->dev_id == dev_id) {
+			bdev = storage_bdev->bdev;
+			break;
+		}
+	}
+	spin_unlock(&diff_storage->lock);
+
+	return bdev;
+}
+
+static inline struct block_device *
+diff_storage_add_storage_bdev(struct diff_storage *diff_storage, dev_t dev_id)
+{
+	struct block_device *bdev;
+	struct storage_bdev *storage_bdev;
+
+	bdev = blkdev_get_by_dev(dev_id, FMODE_READ | FMODE_WRITE, NULL);
+	if (IS_ERR(bdev)) {
+		pr_err("Failed to open device. errno=%d\n",
+		       abs((int)PTR_ERR(bdev)));
+		return bdev;
+	}
+
+	storage_bdev = kzalloc(sizeof(struct storage_bdev), GFP_KERNEL);
+	if (!storage_bdev) {
+		blkdev_put(bdev, FMODE_READ | FMODE_WRITE);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	storage_bdev->bdev = bdev;
+	storage_bdev->dev_id = dev_id;
+	INIT_LIST_HEAD(&storage_bdev->link);
+
+	spin_lock(&diff_storage->lock);
+	list_add_tail(&storage_bdev->link, &diff_storage->storage_bdevs);
+	spin_unlock(&diff_storage->lock);
+
+	return bdev;
+}
+
+static inline int diff_storage_add_range(struct diff_storage *diff_storage,
+					 struct block_device *bdev,
+					 sector_t sector, sector_t count)
+{
+	struct storage_block *storage_block;
+
+	pr_debug("Add range to diff storage: [%u:%u] %llu:%llu\n",
+		 MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev), sector, count);
+
+	storage_block = kzalloc(sizeof(struct storage_block), GFP_KERNEL);
+	if (!storage_block)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&storage_block->link);
+	storage_block->bdev = bdev;
+	storage_block->sector = sector;
+	storage_block->count = count;
+
+	spin_lock(&diff_storage->lock);
+	list_add_tail(&storage_block->link, &diff_storage->empty_blocks);
+	diff_storage->capacity += count;
+	spin_unlock(&diff_storage->lock);
+
+	return 0;
+}
+
+int diff_storage_append_block(struct diff_storage *diff_storage, dev_t dev_id,
+			      struct blk_snap_block_range __user *ranges,
+			      unsigned int range_count)
+{
+	int ret;
+	int inx;
+	struct block_device *bdev;
+	struct blk_snap_block_range range;
+	const unsigned long range_size = sizeof(struct blk_snap_block_range);
+
+	pr_debug("Append %u blocks\n", range_count);
+
+	bdev = diff_storage_bdev_by_id(diff_storage, dev_id);
+	if (!bdev) {
+		bdev = diff_storage_add_storage_bdev(diff_storage, dev_id);
+		if (IS_ERR(bdev))
+			return PTR_ERR(bdev);
+	}
+
+	for (inx = 0; inx < range_count; inx++) {
+		if (unlikely(copy_from_user(&range, ranges+inx, range_size)))
+			return -EINVAL;
+
+		ret = diff_storage_add_range(diff_storage, bdev,
+					     range.sector_offset,
+					     range.sector_count);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (atomic_read(&diff_storage->low_space_flag) &&
+	    (diff_storage->capacity >= diff_storage->requested))
+		atomic_set(&diff_storage->low_space_flag, 0);
+
+	return 0;
+}
+
+static inline bool is_halffull(const sector_t sectors_left)
+{
+	return sectors_left <= ((diff_storage_minimum >> 1) & ~(PAGE_SECTORS - 1));
+}
+
+struct diff_region *diff_storage_new_region(struct diff_storage *diff_storage,
+					   sector_t count)
+{
+	int ret = 0;
+	struct diff_region *diff_region;
+	sector_t sectors_left;
+
+	if (atomic_read(&diff_storage->overflow_flag))
+		return ERR_PTR(-ENOSPC);
+
+	diff_region = kzalloc(sizeof(struct diff_region), GFP_NOIO);
+	if (!diff_region)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock(&diff_storage->lock);
+	do {
+		struct storage_block *storage_block;
+		sector_t available;
+
+		storage_block = first_empty_storage_block(diff_storage);
+		if (unlikely(!storage_block)) {
+			atomic_inc(&diff_storage->overflow_flag);
+			ret = -ENOSPC;
+			break;
+		}
+
+		available = storage_block->count - storage_block->used;
+		if (likely(available >= count)) {
+			diff_region->bdev = storage_block->bdev;
+			diff_region->sector =
+				storage_block->sector + storage_block->used;
+			diff_region->count = count;
+
+			storage_block->used += count;
+			diff_storage->filled += count;
+			break;
+		}
+
+		list_del(&storage_block->link);
+		list_add_tail(&storage_block->link,
+			      &diff_storage->filled_blocks);
+		/*
+		 * If there is still free space in the storage block, but
+		 * it is not enough to store a piece, then such a block is
+		 * considered used.
+		 * We believe that the storage blocks are large enough
+		 * to accommodate several pieces entirely.
+		 */
+		diff_storage->filled += available;
+	} while (1);
+	sectors_left = diff_storage->requested - diff_storage->filled;
+	spin_unlock(&diff_storage->lock);
+
+	if (ret) {
+		pr_err("Cannot get empty storage block\n");
+		diff_storage_free_region(diff_region);
+		return ERR_PTR(ret);
+	}
+
+	if (is_halffull(sectors_left) &&
+	    (atomic_inc_return(&diff_storage->low_space_flag) == 1))
+		diff_storage_event_low(diff_storage);
+
+	return diff_region;
+}
diff --git a/drivers/block/blksnap/diff_storage.h b/drivers/block/blksnap/diff_storage.h
new file mode 100644
index 000000000000..efd0525afd01
--- /dev/null
+++ b/drivers/block/blksnap/diff_storage.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BLK_SNAP_DIFF_STORAGE_H
+#define __BLK_SNAP_DIFF_STORAGE_H
+
+#include "event_queue.h"
+
+struct blk_snap_block_range;
+struct diff_region;
+
+/**
+ * struct diff_storage - Difference storage.
+ *
+ * @kref:
+ *	The reference counter.
+ * @lock:
+ *	Spinlock allows to guarantee the safety of linked lists.
+ * @storage_bdevs:
+ *	List of opened block devices. Blocks for storing snapshot data can be
+ *	located on different block devices. So, all opened block devices are
+ *	located in this list. Blocks on opened block devices are allocated for
+ *	storing the chunks data.
+ * @empty_blocks:
+ *	List of empty blocks on storage. This list can be updated while
+ *	holding a snapshot. This allows us to dynamically increase the
+ *	storage size for these snapshots.
+ * @filled_blocks:
+ *	List of filled blocks. When the blocks from the list of empty blocks are filled,
+ *	we move them to the list of filled blocks.
+ * @capacity:
+ *	Total amount of available storage space.
+ * @filled:
+ *	The number of sectors already filled in.
+ * @requested:
+ *	The number of sectors already requested from user space.
+ * @low_space_flag:
+ *	The flag is set if the number of free regions available in the
+ *	difference storage is less than the allowed minimum.
+ * @overflow_flag:
+ *	The request for a free region failed due to the absence of free
+ *	regions in the difference storage.
+ * @event_queue:
+ *	A queue of events to pass events to user space. Diff storage and its
+ *	owner can notify its snapshot about events like snapshot overflow,
+ *	low free space and snapshot terminated.
+ *
+ * The difference storage manages the regions of block devices that are used
+ * to store the data of the original block devices in the snapshot.
+ * The difference storage is created one per snapshot and is used to store
+ * data from all the original snapshot block devices. At the same time, the
+ * difference storage itself can contain regions on various block devices.
+ */
+struct diff_storage {
+	struct kref kref;
+	spinlock_t lock;
+
+	struct list_head storage_bdevs;
+	struct list_head empty_blocks;
+	struct list_head filled_blocks;
+
+	sector_t capacity;
+	sector_t filled;
+	sector_t requested;
+
+	atomic_t low_space_flag;
+	atomic_t overflow_flag;
+
+	struct event_queue event_queue;
+};
+
+struct diff_storage *diff_storage_new(void);
+void diff_storage_free(struct kref *kref);
+
+static inline void diff_storage_get(struct diff_storage *diff_storage)
+{
+	kref_get(&diff_storage->kref);
+};
+static inline void diff_storage_put(struct diff_storage *diff_storage)
+{
+	if (likely(diff_storage))
+		kref_put(&diff_storage->kref, diff_storage_free);
+};
+
+int diff_storage_append_block(struct diff_storage *diff_storage, dev_t dev_id,
+			      struct blk_snap_block_range __user *ranges,
+			      unsigned int range_count);
+struct diff_region *diff_storage_new_region(struct diff_storage *diff_storage,
+					    sector_t count);
+
+static inline void diff_storage_free_region(struct diff_region *region)
+{
+	kfree(region);
+}
+#endif /* __BLK_SNAP_DIFF_STORAGE_H */
-- 
2.20.1


  parent reply	other threads:[~2022-12-09 15:04 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-09 14:23 [PATCH v2 00/21] blksnap - block devices snapshots module Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 01/21] documentation, blkfilter: Block Device Filtering Mechanism Sergei Shtepa
2022-12-10  4:15   ` Bagas Sanjaya
2022-12-09 14:23 ` [PATCH v2 02/21] block, " Sergei Shtepa
2022-12-15  9:26   ` Christoph Hellwig
2022-12-15 10:46     ` Sergei Shtepa
2022-12-16  7:04       ` Christoph Hellwig
2023-01-31 23:58   ` Mike Snitzer
2023-02-01 11:09     ` Fabio Fantoni
2023-02-01 13:16     ` Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 03/21] documentation, capability: fix Generic Block Device Capability Sergei Shtepa
2022-12-13 12:13   ` Fabio Fantoni
2022-12-30 15:35     ` Fabio Fantoni
2022-12-09 14:23 ` [PATCH v2 04/21] documentation, blksnap: Block Devices Snapshots Module Sergei Shtepa
2022-12-10  3:50   ` Bagas Sanjaya
2022-12-09 14:23 ` [PATCH v2 05/21] block, blksnap: header file of the module interface Sergei Shtepa
2022-12-09 22:13   ` kernel test robot
2022-12-09 23:14   ` kernel test robot
2022-12-09 14:23 ` [PATCH v2 06/21] block, blksnap: module management interface functions Sergei Shtepa
2022-12-15  9:28   ` Christoph Hellwig
     [not found]   ` <CGME20230103153406eucas1p205c48bd767e6a86f6f1121db7eb5fc19@eucas1p2.samsung.com>
2023-01-03 15:26     ` Pankaj Raghav
2022-12-09 14:23 ` [PATCH v2 07/21] block, blksnap: init() and exit() functions Sergei Shtepa
2022-12-15  9:30   ` Christoph Hellwig
2022-12-09 14:23 ` [PATCH v2 08/21] block, blksnap: interaction with sysfs Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 09/21] block, blksnap: attaching and detaching the filter and handling I/O units Sergei Shtepa
2022-12-15 10:01   ` Christoph Hellwig
2022-12-09 14:23 ` [PATCH v2 10/21] block, blksnap: map of change block tracking Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 11/21] block, blksnap: minimum data storage unit of the original block device Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 12/21] block, blksnap: buffer in memory for the minimum data storage unit Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 13/21] block, blksnap: functions and structures for performing block I/O operations Sergei Shtepa
2022-12-15 10:06   ` Christoph Hellwig
2022-12-09 14:23 ` Sergei Shtepa [this message]
2022-12-09 14:23 ` [PATCH v2 15/21] block, blksnap: event queue from the difference storage Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 16/21] block, blksnap: owner of information about overwritten blocks of the original block device Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 17/21] block, blksnap: snapshot image " Sergei Shtepa
2022-12-15  9:45   ` Christoph Hellwig
2022-12-09 14:23 ` [PATCH v2 18/21] block, blksnap: snapshot Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 19/21] block, blksnap: Kconfig and Makefile Sergei Shtepa
2022-12-09 14:23 ` [PATCH v2 20/21] block, blksnap: adds a blksnap to the kernel tree Sergei Shtepa
2022-12-09 21:53   ` kernel test robot
2022-12-09 14:23 ` [PATCH v2 21/21] block, blksnap: adds a maintainer for new files Sergei Shtepa
2022-12-10  3:23 ` [PATCH v2 00/21] blksnap - block devices snapshots module Bagas Sanjaya
2022-12-10 22:57   ` Sergei Shtepa
     [not found] ` <20230101071813.3329-1-hdanton@sina.com>
2023-01-02  9:44   ` [PATCH v2 17/21] block, blksnap: snapshot image block device Sergei Shtepa
     [not found] ` <20230101110542.3395-1-hdanton@sina.com>
2023-01-02  9:58   ` [PATCH v2 18/21] block, blksnap: snapshot Sergei Shtepa
2023-01-17 21:04 ` [PATCH v2 00/21] blksnap - block devices snapshots module Mike Snitzer
2023-01-18 10:51   ` Hannes Reinecke
2023-01-24 11:34   ` Sergei Shtepa
2023-01-31 20:47     ` Mike Snitzer
2023-02-01  2:32       ` Mason Giles

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221209142331.26395-15-sergei.shtepa@veeam.com \
    --to=sergei.shtepa@veeam.com \
    --cc=axboe@kernel.dk \
    --cc=corbet@lwn.net \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).