All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5] mm: add zblock - new allocator for use via zpool API
@ 2022-09-28  8:01 ananda
  2022-09-28 17:29 ` Matthew Wilcox
  2022-09-28 18:37 ` Yosry Ahmed
  0 siblings, 2 replies; 10+ messages in thread
From: ananda @ 2022-09-28  8:01 UTC (permalink / raw)
  To: linux-mm, vitaly.wool; +Cc: Ananda

From: Ananda <a.badmaev@clicknet.pro>

    Zblock stores integer number of compressed objects per zblock block.
These blocks consist of several physical pages (1/2/4/8) and are arranged
in linked lists.
    The range from 0 to PAGE_SIZE is divided into the number of intervals
corresponding to the number of lists and each list only operates objects
of size from its interval. Thus the block lists are isolated from each
other, which makes it possible to simultaneously perform actions with
several objects from different lists.
    Blocks make it possible to densely arrange objects of various sizes
resulting in low internal fragmentation. Also this allocator tries to fill
incomplete blocks instead of adding new ones thus in many cases providing
a compression ratio substantially higher than z3fold and zbud.
    Zblock does not require MMU and also is superior to zsmalloc with
regard to the worst execution times, thus allowing for better response time
and real-time characteristics of the whole system.

Signed-off-by: Ananda <a.badmaev@clicknet.pro>
---

v2: fixed compiler warnings

v3: added documentation and const modifier to struct tree_descr

v4: - fixed gfp flags for block allocation
    - fixed potential memory leak when allocating blocks
    - resolved some issues with code style and warnings from checkpatch
      (except warning about single line config symbol description)
    - moved test results from documentation to changelog

v5: - "direct" handle mapping and use of linked lists instead of red-black
      trees resulting in faster operations and a bit simpler code
    - renamed ztree -> zblock
    - edited various comments and descriptions

 Documentation/mm/zblock.rst |  31 ++
 MAINTAINERS                 |   7 +
 mm/Kconfig                  |  17 +
 mm/Makefile                 |   1 +
 mm/zblock.c                 | 637 ++++++++++++++++++++++++++++++++++++
 5 files changed, 693 insertions(+)
 create mode 100644 Documentation/mm/zblock.rst
 create mode 100644 mm/zblock.c

diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst
new file mode 100644
index 000000000000..5008ce90b54b
--- /dev/null
+++ b/Documentation/mm/zblock.rst
@@ -0,0 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _block:
+
+======
+zblock
+======
+
+Zblock stores integer number of compressed objects per block. These
+blocks consist of several consecutive physical pages (from 1 to 8) and
+are arranged in lists. The range from 0 to PAGE_SIZE is divided into the
+number of intervals corresponding to the number of lists and each list
+only operates objects of size from its interval. Thus the block lists are
+isolated from each other, which makes it possible to simultaneously
+perform actions with several objects from different lists.
+
+Blocks make it possible to densely arrange objects of various sizes
+resulting in low internal fragmentation. Also this allocator tries to fill
+incomplete blocks instead of adding new ones thus in many cases providing
+a compression ratio substantially higher than z3fold and zbud. Zblock does
+not require MMU and also is superior to zsmalloc with regard to the worst
+execution times, thus allowing for better response time and real-time
+characteristics of the whole system.
+
+Like z3fold and zsmalloc zblock_alloc() does not return a dereferenceable
+pointer. Instead, it returns an unsigned long handle which encodes actual
+location of the allocated object.
+
+Unlike zbud and z3fold zblock works well with objects of various sizes - both
+highly compressed and poorly compressed including cases where both types
+are present.
diff --git a/MAINTAINERS b/MAINTAINERS
index f1390b8270b2..014fb19eb2cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22457,6 +22457,13 @@ L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/z3fold.c
 
+ZBLOCK COMPRESSED PAGE ALLOCATOR
+M:	Ananda Badmaev <a.badmaev@clicknet.pro>
+M:	Vitaly Wool <vitaly.wool@konsulko.com>
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	mm/zblock.c
+
 ZD1211RW WIRELESS DRIVER
 M:	Ulrich Kunitz <kune@deine-taler.de>
 L:	linux-wireless@vger.kernel.org
diff --git a/mm/Kconfig b/mm/Kconfig
index 0331f1461f81..470c80f5726d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -149,6 +149,12 @@ config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
 	select ZSMALLOC
 	help
 	  Use the zsmalloc allocator as the default allocator.
+
+config ZSWAP_ZPOOL_DEFAULT_ZBLOCK
+	bool "zblock"
+	select ZBLOCK
+	help
+	  Use the zblock allocator as the default allocator.
 endchoice
 
 config ZSWAP_ZPOOL_DEFAULT
@@ -157,6 +163,7 @@ config ZSWAP_ZPOOL_DEFAULT
        default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
        default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
        default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
+	   default "zblock" if ZSWAP_ZPOOL_DEFAULT_ZBLOCK
        default ""
 
 config ZBUD
@@ -187,6 +194,16 @@ config ZSMALLOC
 	  pages of various compression levels efficiently. It achieves
 	  the highest storage density with the least amount of fragmentation.
 
+config ZBLOCK
+	tristate "Simple block allocator (zblock)"
+	depends on ZPOOL
+	help
+	  A special purpose allocator for storing compressed pages.
+	  It stores integer number of compressed pages per block and
+	  each block consists of number of physical pages being a power of 2.
+	  zblock provides fast read/write, limited worst case time for
+	  operations and good compression ratio in most scenarios.
+
 config ZSMALLOC_STAT
 	bool "Export zsmalloc statistics"
 	depends on ZSMALLOC
diff --git a/mm/Makefile b/mm/Makefile
index 9a564f836403..eb7235da6e61 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_ZPOOL)	+= zpool.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_Z3FOLD)	+= z3fold.o
+obj-$(CONFIG_ZBLOCK)	+= zblock.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
 obj-$(CONFIG_CMA)	+= cma.o
 obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
diff --git a/mm/zblock.c b/mm/zblock.c
new file mode 100644
index 000000000000..b389f43e0c26
--- /dev/null
+++ b/mm/zblock.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * zblock.c
+ *
+ * Author: Ananda Badmaev <a.badmaev@clicknet.pro>
+ * Copyright (C) 2022, Konsulko AB.
+ *
+ * This implementation is based on z3fold written by Vitaly Wool.
+ * Zblock is a small object allocator with the intention to serve as a
+ * zpool backend. It operates on page blocks which consist of number
+ * of physical pages being a power of 2 and store integer number of
+ * compressed pages per block which results in determinism and simplicity.
+ *
+ * zblock doesn't export any API and is meant to be used via zpool API.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/zpool.h>
+
+#define SLOT_FREE 0
+#define SLOT_OCCUPIED 1
+#define SLOT_MAPPED 2
+#define SLOT_UNMAPPED 3
+
+#define SLOT_BITS 5
+#define MAX_SLOTS (1 << SLOT_BITS)
+#define SLOT_MASK ((0x1UL << SLOT_BITS) - 1)
+
+#define BLOCK_DATA_SIZE(order) ((PAGE_SIZE << order) - sizeof(struct zblock_block))
+#define SLOT_SIZE(nslots, order) (round_down((BLOCK_DATA_SIZE(order) / nslots), sizeof(long)))
+
+#define BLOCK_CACHE_SIZE 32
+
+struct zblock_pool;
+
+struct zblock_ops {
+	int (*evict)(struct zblock_pool *pool, unsigned long handle);
+};
+
+/**
+ * struct zblock_block - block metadata
+ * Block consists of several (1/2/4/8) pages and contains fixed
+ * integer number of slots for allocating compressed pages.
+ *
+ * lock:			 protects block
+ * block_node:		 links block into the relevant list in the pool
+ * slot_info:	     contains data about free/occupied slots
+ * free_slots:		 number of free slots in the block
+ * under_reclaim:    if true shows that block is being evicted
+ */
+struct zblock_block {
+	spinlock_t lock;
+	struct list_head block_node;
+	u8 slot_info[MAX_SLOTS];
+	unsigned int free_slots;
+	bool under_reclaim;
+};
+/**
+ * struct block_desc - general metadata for block lists
+ * Each block list stores only blocks of corresponding type which means
+ * that all blocks in it have the same number and size of slots.
+ * All slots are aligned to size of long.
+ *
+ * slot_size:		size of slot for this list
+ * slots_per_block:	number of slots per block for this list
+ * order:			order for __get_free_pages
+ */
+static const struct block_desc {
+	const unsigned int slot_size;
+	const unsigned short slots_per_block;
+	const unsigned short order;
+} block_desc[] = {
+	{ SLOT_SIZE(32, 0), 32, 0 },
+	{ SLOT_SIZE(22, 0), 22, 0 },
+	{ SLOT_SIZE(17, 0), 17, 0 },
+	{ SLOT_SIZE(13, 0), 13, 0 },
+	{ SLOT_SIZE(11, 0), 11, 0 },
+	{ SLOT_SIZE(9, 0), 9, 0 },
+	{ SLOT_SIZE(8, 0), 8, 0 },
+	{ SLOT_SIZE(14, 1), 14, 1 },
+	{ SLOT_SIZE(12, 1), 12, 1 },
+	{ SLOT_SIZE(11, 1), 11, 1 },
+	{ SLOT_SIZE(10, 1), 10, 1 },
+	{ SLOT_SIZE(9, 1), 9, 1 },
+	{ SLOT_SIZE(8, 1), 8, 1 },
+	{ SLOT_SIZE(15, 2), 15, 2 },
+	{ SLOT_SIZE(14, 2), 14, 2 },
+	{ SLOT_SIZE(13, 2), 13, 2 },
+	{ SLOT_SIZE(12, 2), 12, 2 },
+	{ SLOT_SIZE(11, 2), 11, 2 },
+	{ SLOT_SIZE(10, 2), 10, 2 },
+	{ SLOT_SIZE(9, 2), 9, 2 },
+	{ SLOT_SIZE(8, 2), 8, 2 },
+	{ SLOT_SIZE(15, 3), 15, 3 },
+	{ SLOT_SIZE(14, 3), 14, 3 },
+	{ SLOT_SIZE(13, 3), 13, 3 },
+	{ SLOT_SIZE(12, 3), 12, 3 },
+	{ SLOT_SIZE(11, 3), 11, 3 },
+	{ SLOT_SIZE(10, 3), 10, 3 },
+	{ SLOT_SIZE(9, 3), 9, 3 },
+	{ SLOT_SIZE(7, 3), 7, 3 }
+};
+
+/**
+ * struct block_list - stores metadata of particular list
+ * lock:			protects list
+ * head:			head of this list
+ * block_cache:		blocks with free slots
+ * block_count:		total number of blocks in the list
+ */
+struct block_list {
+	spinlock_t lock;
+	struct list_head head;
+	struct zblock_block *block_cache[BLOCK_CACHE_SIZE];
+	unsigned long block_count;
+};
+
+/**
+ * struct zblock_pool - stores metadata for each zblock pool
+ * @block_lists:	array of block lists
+ * @ops:			pointer to a structure of user defined operations specified at
+ *					pool creation time.
+ * @zpool:			zpool driver
+ * @zpool_ops:		zpool operations structure with an evict callback
+ * @alloc_flag:		protects block allocation from memory leak
+ *
+ * This structure is allocated at pool creation time and maintains metadata
+ * for a particular zblock pool.
+ */
+struct zblock_pool {
+	struct block_list block_lists[ARRAY_SIZE(block_desc)];
+	const struct zblock_ops *ops;
+	struct zpool *zpool;
+	const struct zpool_ops *zpool_ops;
+	atomic_t alloc_flag;
+};
+
+/*****************
+ * Helpers
+ *****************/
+
+static void cache_insert_block(struct zblock_block *block, struct block_list *list)
+{
+	unsigned int i, min_free_slots, min_index;
+
+	min_free_slots = MAX_SLOTS;
+	for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
+		if (!list->block_cache[i] || !(list->block_cache[i])->free_slots) {
+			list->block_cache[i] = block;
+			return;
+		}
+		if ((list->block_cache[i])->free_slots < min_free_slots) {
+			min_free_slots = (list->block_cache[i])->free_slots;
+			min_index = i;
+		}
+	}
+	list->block_cache[min_index] = block;
+}
+
+static struct zblock_block *cache_find_block(struct block_list *list)
+{
+	int i;
+
+	for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
+		if (list->block_cache[i] && (list->block_cache[i])->free_slots)
+			return list->block_cache[i];
+	}
+	return NULL;
+}
+
+static int is_in_cache(struct zblock_block *block, struct block_list *list)
+{
+	int i;
+
+	for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
+		if (block == list->block_cache[i])
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * allocate new block and add it to corresponding block list
+ */
+static struct zblock_block *alloc_block(struct zblock_pool *pool,
+									int block_type, gfp_t gfp)
+{
+	struct zblock_block *block;
+	struct block_list *list;
+
+	block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
+	if (!block)
+		return NULL;
+
+	list = &(pool->block_lists)[block_type];
+
+	/* init block data  */
+	spin_lock_init(&block->lock);
+	memset(block->slot_info, SLOT_FREE, block_desc[block_type].slots_per_block);
+	block->free_slots = block_desc[block_type].slots_per_block;
+	block->under_reclaim = false;
+
+	spin_lock(&list->lock);
+	/* inserting block into list */
+	INIT_LIST_HEAD(&block->block_node);
+	list_add(&block->block_node, &list->head);
+	cache_insert_block(block, list);
+	list->block_count++;
+	spin_unlock(&list->lock);
+	return block;
+}
+
+/*
+ * Encodes the handle of a particular slot in the pool using metadata
+ */
+static inline unsigned long metadata_to_handle(struct zblock_block *block,
+							unsigned int block_type, unsigned int slot)
+{
+	return (unsigned long)(block) + (block_type << SLOT_BITS) + slot;
+}
+
+/* Returns block, block type and slot in the pool corresponding to handle */
+static inline struct zblock_block *handle_to_metadata(unsigned long handle,
+						unsigned int *block_type, unsigned int *slot)
+{
+	*block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
+	*slot = handle & SLOT_MASK;
+	return (struct zblock_block *)(handle & PAGE_MASK);
+}
+
+
+/*****************
+ * API Functions
+ *****************/
+/**
+ * zblock_create_pool() - create a new zblock pool
+ * @gfp:	gfp flags when allocating the zblock pool structure
+ * @ops:	user-defined operations for the zblock pool
+ *
+ * Return: pointer to the new zblock pool or NULL if the metadata allocation
+ * failed.
+ */
+static struct zblock_pool *zblock_create_pool(gfp_t gfp, const struct zblock_ops *ops)
+{
+	struct zblock_pool *pool;
+	struct block_list *list;
+	int i, j;
+
+	pool = kmalloc(sizeof(struct zblock_pool), gfp);
+	if (!pool)
+		return NULL;
+
+	/* init each block list */
+	for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
+		list = &(pool->block_lists)[i];
+		spin_lock_init(&list->lock);
+		INIT_LIST_HEAD(&list->head);
+		for (j = 0; j < BLOCK_CACHE_SIZE; j++)
+			list->block_cache[j] = NULL;
+		list->block_count = 0;
+	}
+	pool->ops = ops;
+	atomic_set(&pool->alloc_flag, 0);
+	return pool;
+}
+
+/**
+ * zblock_destroy_pool() - destroys an existing zblock pool
+ * @pool:	the zblock pool to be destroyed
+ *
+ */
+static void zblock_destroy_pool(struct zblock_pool *pool)
+{
+	kfree(pool);
+}
+
+
+/**
+ * zblock_alloc() - allocates a slot of appropriate size
+ * @pool:	zblock pool from which to allocate
+ * @size:	size in bytes of the desired allocation
+ * @gfp:	gfp flags used if the pool needs to grow
+ * @handle:	handle of the new allocation
+ *
+ * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
+ * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
+ * a new slot.
+ */
+static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
+			unsigned long *handle)
+{
+	unsigned int block_type, slot;
+	struct zblock_block *block;
+	struct block_list *list;
+
+	if (!size)
+		return -EINVAL;
+
+	if (size > PAGE_SIZE)
+		return -ENOSPC;
+
+	/* find basic block type with suitable slot size */
+	for (block_type = 0; block_type < ARRAY_SIZE(block_desc); block_type++) {
+		if (size <= block_desc[block_type].slot_size)
+			break;
+	}
+	list = &(pool->block_lists[block_type]);
+
+check:
+	spin_lock(&list->lock);
+	/* check if there are free slots in cache */
+	block = cache_find_block(list);
+	if (block)
+		goto found;
+	spin_unlock(&list->lock);
+
+	/* not found block with free slots try to allocate new empty block */
+	if (atomic_cmpxchg(&pool->alloc_flag, 0, 1))
+		goto check;
+	block = alloc_block(pool, block_type, gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
+	if (block) {
+		spin_lock(&list->lock);
+		goto found;
+	}
+	atomic_set(&pool->alloc_flag, 0);
+	return -ENOMEM;
+
+found:
+	spin_lock(&block->lock);
+	block->free_slots--;
+	spin_unlock(&list->lock);
+	/* find the first free slot in block */
+	for (slot = 0; slot < block_desc[block_type].slots_per_block; slot++) {
+		if (block->slot_info[slot] == SLOT_FREE)
+			break;
+	}
+	block->slot_info[slot] = SLOT_OCCUPIED;
+	spin_unlock(&block->lock);
+	*handle = metadata_to_handle(block, block_type, slot);
+	atomic_set(&pool->alloc_flag, 0);
+	return 0;
+}
+
+/**
+ * zblock_free() - frees the allocation associated with the given handle
+ * @pool:	pool in which the allocation resided
+ * @handle:	handle associated with the allocation returned by zblock_alloc()
+ *
+ */
+static void zblock_free(struct zblock_pool *pool, unsigned long handle)
+{
+	unsigned int slot, block_type;
+	struct zblock_block *block;
+	struct block_list *list;
+	int i;
+
+	block = handle_to_metadata(handle, &block_type, &slot);
+	list = &(pool->block_lists[block_type]);
+
+	if (block->under_reclaim)
+		return;
+	spin_lock(&list->lock);
+	i = is_in_cache(block, list);
+	block->free_slots++;
+	/* if all slots in block are empty delete whole block */
+	if (block->free_slots == block_desc[block_type].slots_per_block) {
+		list_del(&block->block_node);
+		list->block_count--;
+
+		/* if cached block to be deleted */
+		if (i != -1)
+			list->block_cache[i] = NULL;
+		spin_unlock(&list->lock);
+		free_pages((unsigned long)block, block_desc[block_type].order);
+		return;
+	}
+	/* if block is not cached update cache */
+	if (i == -1)
+		cache_insert_block(block, list);
+
+	spin_lock(&block->lock);
+	spin_unlock(&list->lock);
+	block->slot_info[slot] = SLOT_FREE;
+	spin_unlock(&block->lock);
+}
+
+/**
+ * zblock_reclaim_block() - evicts allocations from block and frees it
+ * @pool:	pool from which a block will attempt to be evicted
+ *
+ * Returns: pages reclaimed count if block is successfully freed
+ *          otherwise -EINVAL if there are no blocks to evict
+ */
+static int zblock_reclaim_block(struct zblock_pool *pool)
+{
+	struct zblock_block *block;
+	struct block_list *list;
+	unsigned long handle, block_type, slot;
+	int ret, i, reclaimed;
+
+	/* start with list storing blocks with the worst compression and try
+	 * to evict the first added (oldest) block in this list
+	 */
+	for (block_type = ARRAY_SIZE(block_desc) - 1; block_type >= 0; --block_type) {
+		list = &(pool->block_lists[block_type]);
+		spin_lock(&list->lock);
+
+		/* find the oldest block in list */
+		block = list_last_entry(&list->head, struct zblock_block, block_node);
+
+		if (!block) {
+			spin_unlock(&list->lock);
+			continue;
+		}
+		i = is_in_cache(block, list);
+		/* skip iteration if this block is cached */
+		if (i != -1) {
+			spin_unlock(&list->lock);
+			continue;
+		}
+		block->under_reclaim = true;
+		spin_unlock(&list->lock);
+		reclaimed = 0;
+
+		/* try to evict all UNMAPPED slots in block */
+		for (slot = 0; slot < block_desc[block_type].slots_per_block; ++slot) {
+			if (block->slot_info[slot] == SLOT_UNMAPPED) {
+				handle = metadata_to_handle(block, block_type, slot);
+				ret = pool->ops->evict(pool, handle);
+				if (ret)
+					break;
+
+				++reclaimed;
+				spin_lock(&block->lock);
+				block->slot_info[slot] = SLOT_FREE;
+				spin_unlock(&block->lock);
+				block->free_slots++;
+			}
+		}
+		spin_lock(&list->lock);
+		/* some occupied slots remained - insert block */
+		if (block->free_slots != block_desc[block_type].slots_per_block) {
+			block->under_reclaim = false;
+			cache_insert_block(block, list);
+			spin_unlock(&list->lock);
+		} else {
+		/* all slots are free - delete this block */
+			list_del(&block->block_node);
+			list->block_count--;
+			spin_unlock(&list->lock);
+			free_pages((unsigned long)block, block_desc[block_type].order);
+		}
+		if (reclaimed != 0)
+			return reclaimed;
+		return -EAGAIN;
+	}
+	return -EINVAL;
+}
+
+
+/**
+ * zblock_map() - maps the allocation associated with the given handle
+ * @pool:	pool in which the allocation resides
+ * @handle:	handle associated with the allocation to be mapped
+ *
+ *
+ * Returns: a pointer to the mapped allocation
+ */
+static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
+{
+	unsigned int block_type, slot;
+	struct zblock_block *block;
+
+	block = handle_to_metadata(handle, &block_type, &slot);
+	spin_lock(&block->lock);
+	block->slot_info[slot] = SLOT_MAPPED;
+	spin_unlock(&block->lock);
+	return (void *)(block + 1) + slot * block_desc[block_type].slot_size;
+}
+
+/**
+ * zblock_unmap() - unmaps the allocation associated with the given handle
+ * @pool:	pool in which the allocation resides
+ * @handle:	handle associated with the allocation to be unmapped
+ */
+static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
+{
+	unsigned int block_type, slot;
+	struct zblock_block *block;
+
+	block = handle_to_metadata(handle, &block_type, &slot);
+	spin_lock(&block->lock);
+	block->slot_info[slot] = SLOT_UNMAPPED;
+	spin_unlock(&block->lock);
+}
+
+/**
+ * zblock_get_pool_size() - gets the zblock pool size in bytes
+ * @pool:	pool whose size is being queried
+ *
+ * Returns: size in bytes of the given pool.
+ */
+static u64 zblock_get_pool_size(struct zblock_pool *pool)
+{
+	u64 total_size;
+	int i;
+
+	total_size = 0;
+	for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
+		total_size += (pool->block_lists)[i].block_count
+				* (PAGE_SIZE << block_desc[i].order);
+	}
+	return total_size;
+}
+
+/*****************
+ * zpool
+ ****************/
+
+static int zblock_zpool_evict(struct zblock_pool *pool, unsigned long handle)
+{
+	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
+		return pool->zpool_ops->evict(pool->zpool, handle);
+	else
+		return -ENOENT;
+}
+
+static const struct zblock_ops zblock_zpool_ops = {
+	.evict =	zblock_zpool_evict
+};
+
+static void *zblock_zpool_create(const char *name, gfp_t gfp,
+				   const struct zpool_ops *zpool_ops,
+				   struct zpool *zpool)
+{
+	struct zblock_pool *pool;
+
+	pool = zblock_create_pool(gfp, &zblock_zpool_ops);
+	if (pool) {
+		pool->zpool = zpool;
+		pool->zpool_ops = zpool_ops;
+	}
+	return pool;
+}
+
+static void zblock_zpool_destroy(void *pool)
+{
+	zblock_destroy_pool(pool);
+}
+
+static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+			unsigned long *handle)
+{
+	return zblock_alloc(pool, size, gfp, handle);
+}
+
+static void zblock_zpool_free(void *pool, unsigned long handle)
+{
+	zblock_free(pool, handle);
+}
+
+static int zblock_zpool_shrink(void *pool, unsigned int pages,
+			unsigned int *reclaimed)
+{
+	unsigned int total = 0;
+	int ret = -EINVAL;
+
+	while (total < pages) {
+		ret = zblock_reclaim_block(pool);
+		if (ret < 0)
+			break;
+		total += ret;
+	}
+	if (reclaimed)
+		*reclaimed = total;
+
+	return ret;
+}
+
+static void *zblock_zpool_map(void *pool, unsigned long handle,
+			enum zpool_mapmode mm)
+{
+	return zblock_map(pool, handle);
+}
+
+static void zblock_zpool_unmap(void *pool, unsigned long handle)
+{
+	zblock_unmap(pool, handle);
+}
+
+static u64 zblock_zpool_total_size(void *pool)
+{
+	return zblock_get_pool_size(pool);
+}
+
+static struct zpool_driver zblock_zpool_driver = {
+	.type =		"zblock",
+	.owner =	THIS_MODULE,
+	.create =	zblock_zpool_create,
+	.destroy =	zblock_zpool_destroy,
+	.malloc =	zblock_zpool_malloc,
+	.free =		zblock_zpool_free,
+	.shrink =	zblock_zpool_shrink,
+	.map =		zblock_zpool_map,
+	.unmap =	zblock_zpool_unmap,
+	.total_size =	zblock_zpool_total_size,
+};
+
+MODULE_ALIAS("zpool-zblock");
+
+static int __init init_zblock(void)
+{
+	pr_info("loaded\n");
+	zpool_register_driver(&zblock_zpool_driver);
+	return 0;
+}
+
+static void __exit exit_zblock(void)
+{
+	zpool_unregister_driver(&zblock_zpool_driver);
+	pr_info("unloaded\n");
+}
+
+module_init(init_zblock);
+module_exit(exit_zblock);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ananda Badmaeb <a.badmaev@clicknet.pro>");
+MODULE_DESCRIPTION("Block allocator for compressed pages");
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-28  8:01 [PATCH v5] mm: add zblock - new allocator for use via zpool API ananda
@ 2022-09-28 17:29 ` Matthew Wilcox
  2022-09-29  5:01   ` Ananda Badmaev
  2022-09-28 18:37 ` Yosry Ahmed
  1 sibling, 1 reply; 10+ messages in thread
From: Matthew Wilcox @ 2022-09-28 17:29 UTC (permalink / raw)
  To: ananda; +Cc: linux-mm, vitaly.wool

On Wed, Sep 28, 2022 at 11:01:00AM +0300, ananda wrote:
> +static struct zblock_block *alloc_block(struct zblock_pool *pool,
> +									int block_type, gfp_t gfp)

Your indentation is completely screwed up.  What are you trying to do
here?


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-28  8:01 [PATCH v5] mm: add zblock - new allocator for use via zpool API ananda
  2022-09-28 17:29 ` Matthew Wilcox
@ 2022-09-28 18:37 ` Yosry Ahmed
  2022-09-29  5:24   ` Ananda Badmaev
  2022-09-29  6:55   ` Vitaly Wool
  1 sibling, 2 replies; 10+ messages in thread
From: Yosry Ahmed @ 2022-09-28 18:37 UTC (permalink / raw)
  To: ananda; +Cc: Linux-MM, vitaly.wool

On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
>
> From: Ananda <a.badmaev@clicknet.pro>
>
>     Zblock stores integer number of compressed objects per zblock block.
> These blocks consist of several physical pages (1/2/4/8) and are arranged
> in linked lists.
>     The range from 0 to PAGE_SIZE is divided into the number of intervals
> corresponding to the number of lists and each list only operates objects
> of size from its interval. Thus the block lists are isolated from each
> other, which makes it possible to simultaneously perform actions with
> several objects from different lists.
>     Blocks make it possible to densely arrange objects of various sizes
> resulting in low internal fragmentation. Also this allocator tries to fill
> incomplete blocks instead of adding new ones thus in many cases providing
> a compression ratio substantially higher than z3fold and zbud.
>     Zblock does not require MMU and also is superior to zsmalloc with
> regard to the worst execution times, thus allowing for better response time
> and real-time characteristics of the whole system.
>

It seems to me, and I can be wrong, that there is some overlap in
design and goals between this zpool backend and zsmalloc. They both
try to avoid internal fragmentation by avoiding the static slots used
by zbud and z3fold, and instead pack compressed pages more
dynamically. They both have some sort of concurrency handling
(separate block lists in zblock vs. classes in zsmalloc). A key
difference is that zsmalloc avoids higher order allocations (at least
based on its docs), and instead allows compressed pages to span across
0-order page boundaries.

The key differences I see here (based on this commit message and
zsmalloc docs) are:
a) Blocks in zblock can consist of higher order pages.
b) Compressed pages in zsmalloc can span page boundaries (I am
assuming this isn't the case for zblock).

It appears to me that if zblock has better performance than zsmalloc,
it can be because pages in zblock are physically contiguous vs. the
0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
assumption correct?

If yes, would it be better to implement those changes as some tunable
extension to zsmalloc? It would make it easier if we have overall less
zpool backends, and also easier for current users of zsmalloc to
experiment with these changes.

> Signed-off-by: Ananda <a.badmaev@clicknet.pro>
> ---
>
> v2: fixed compiler warnings
>
> v3: added documentation and const modifier to struct tree_descr
>
> v4: - fixed gfp flags for block allocation
>     - fixed potential memory leak when allocating blocks
>     - resolved some issues with code style and warnings from checkpatch
>       (except warning about single line config symbol description)
>     - moved test results from documentation to changelog
>
> v5: - "direct" handle mapping and use of linked lists instead of red-black
>       trees resulting in faster operations and a bit simpler code
>     - renamed ztree -> zblock
>     - edited various comments and descriptions
>
>  Documentation/mm/zblock.rst |  31 ++
>  MAINTAINERS                 |   7 +
>  mm/Kconfig                  |  17 +
>  mm/Makefile                 |   1 +
>  mm/zblock.c                 | 637 ++++++++++++++++++++++++++++++++++++
>  5 files changed, 693 insertions(+)
>  create mode 100644 Documentation/mm/zblock.rst
>  create mode 100644 mm/zblock.c
>
> diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst
> new file mode 100644
> index 000000000000..5008ce90b54b
> --- /dev/null
> +++ b/Documentation/mm/zblock.rst
> @@ -0,0 +1,31 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +.. _block:
> +
> +======
> +zblock
> +======
> +
> +Zblock stores integer number of compressed objects per block. These
> +blocks consist of several consecutive physical pages (from 1 to 8) and
> +are arranged in lists. The range from 0 to PAGE_SIZE is divided into the
> +number of intervals corresponding to the number of lists and each list
> +only operates objects of size from its interval. Thus the block lists are
> +isolated from each other, which makes it possible to simultaneously
> +perform actions with several objects from different lists.
> +
> +Blocks make it possible to densely arrange objects of various sizes
> +resulting in low internal fragmentation. Also this allocator tries to fill
> +incomplete blocks instead of adding new ones thus in many cases providing
> +a compression ratio substantially higher than z3fold and zbud. Zblock does
> +not require MMU and also is superior to zsmalloc with regard to the worst
> +execution times, thus allowing for better response time and real-time
> +characteristics of the whole system.
> +
> +Like z3fold and zsmalloc zblock_alloc() does not return a dereferenceable
> +pointer. Instead, it returns an unsigned long handle which encodes actual
> +location of the allocated object.
> +
> +Unlike zbud and z3fold zblock works well with objects of various sizes - both
> +highly compressed and poorly compressed including cases where both types
> +are present.
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f1390b8270b2..014fb19eb2cc 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22457,6 +22457,13 @@ L:     linux-mm@kvack.org
>  S:     Maintained
>  F:     mm/z3fold.c
>
> +ZBLOCK COMPRESSED PAGE ALLOCATOR
> +M:     Ananda Badmaev <a.badmaev@clicknet.pro>
> +M:     Vitaly Wool <vitaly.wool@konsulko.com>
> +L:     linux-mm@kvack.org
> +S:     Maintained
> +F:     mm/zblock.c
> +
>  ZD1211RW WIRELESS DRIVER
>  M:     Ulrich Kunitz <kune@deine-taler.de>
>  L:     linux-wireless@vger.kernel.org
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 0331f1461f81..470c80f5726d 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -149,6 +149,12 @@ config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
>         select ZSMALLOC
>         help
>           Use the zsmalloc allocator as the default allocator.
> +
> +config ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> +       bool "zblock"
> +       select ZBLOCK
> +       help
> +         Use the zblock allocator as the default allocator.
>  endchoice
>
>  config ZSWAP_ZPOOL_DEFAULT
> @@ -157,6 +163,7 @@ config ZSWAP_ZPOOL_DEFAULT
>         default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
>         default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
>         default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> +          default "zblock" if ZSWAP_ZPOOL_DEFAULT_ZBLOCK
>         default ""
>
>  config ZBUD
> @@ -187,6 +194,16 @@ config ZSMALLOC
>           pages of various compression levels efficiently. It achieves
>           the highest storage density with the least amount of fragmentation.
>
> +config ZBLOCK
> +       tristate "Simple block allocator (zblock)"
> +       depends on ZPOOL
> +       help
> +         A special purpose allocator for storing compressed pages.
> +         It stores integer number of compressed pages per block and
> +         each block consists of number of physical pages being a power of 2.
> +         zblock provides fast read/write, limited worst case time for
> +         operations and good compression ratio in most scenarios.
> +
>  config ZSMALLOC_STAT
>         bool "Export zsmalloc statistics"
>         depends on ZSMALLOC
> diff --git a/mm/Makefile b/mm/Makefile
> index 9a564f836403..eb7235da6e61 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -110,6 +110,7 @@ obj-$(CONFIG_ZPOOL) += zpool.o
>  obj-$(CONFIG_ZBUD)     += zbud.o
>  obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
>  obj-$(CONFIG_Z3FOLD)   += z3fold.o
> +obj-$(CONFIG_ZBLOCK)   += zblock.o
>  obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
>  obj-$(CONFIG_CMA)      += cma.o
>  obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
> diff --git a/mm/zblock.c b/mm/zblock.c
> new file mode 100644
> index 000000000000..b389f43e0c26
> --- /dev/null
> +++ b/mm/zblock.c
> @@ -0,0 +1,637 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * zblock.c
> + *
> + * Author: Ananda Badmaev <a.badmaev@clicknet.pro>
> + * Copyright (C) 2022, Konsulko AB.
> + *
> + * This implementation is based on z3fold written by Vitaly Wool.
> + * Zblock is a small object allocator with the intention to serve as a
> + * zpool backend. It operates on page blocks which consist of number
> + * of physical pages being a power of 2 and store integer number of
> + * compressed pages per block which results in determinism and simplicity.
> + *
> + * zblock doesn't export any API and is meant to be used via zpool API.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/atomic.h>
> +#include <linux/list.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/preempt.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/zpool.h>
> +
> +#define SLOT_FREE 0
> +#define SLOT_OCCUPIED 1
> +#define SLOT_MAPPED 2
> +#define SLOT_UNMAPPED 3
> +
> +#define SLOT_BITS 5
> +#define MAX_SLOTS (1 << SLOT_BITS)
> +#define SLOT_MASK ((0x1UL << SLOT_BITS) - 1)
> +
> +#define BLOCK_DATA_SIZE(order) ((PAGE_SIZE << order) - sizeof(struct zblock_block))
> +#define SLOT_SIZE(nslots, order) (round_down((BLOCK_DATA_SIZE(order) / nslots), sizeof(long)))
> +
> +#define BLOCK_CACHE_SIZE 32
> +
> +struct zblock_pool;
> +
> +struct zblock_ops {
> +       int (*evict)(struct zblock_pool *pool, unsigned long handle);
> +};
> +
> +/**
> + * struct zblock_block - block metadata
> + * Block consists of several (1/2/4/8) pages and contains fixed
> + * integer number of slots for allocating compressed pages.
> + *
> + * lock:                        protects block
> + * block_node:          links block into the relevant list in the pool
> + * slot_info:       contains data about free/occupied slots
> + * free_slots:          number of free slots in the block
> + * under_reclaim:    if true shows that block is being evicted
> + */
> +struct zblock_block {
> +       spinlock_t lock;
> +       struct list_head block_node;
> +       u8 slot_info[MAX_SLOTS];
> +       unsigned int free_slots;
> +       bool under_reclaim;
> +};
> +/**
> + * struct block_desc - general metadata for block lists
> + * Each block list stores only blocks of corresponding type which means
> + * that all blocks in it have the same number and size of slots.
> + * All slots are aligned to size of long.
> + *
> + * slot_size:          size of slot for this list
> + * slots_per_block:    number of slots per block for this list
> + * order:                      order for __get_free_pages
> + */
> +static const struct block_desc {
> +       const unsigned int slot_size;
> +       const unsigned short slots_per_block;
> +       const unsigned short order;
> +} block_desc[] = {
> +       { SLOT_SIZE(32, 0), 32, 0 },
> +       { SLOT_SIZE(22, 0), 22, 0 },
> +       { SLOT_SIZE(17, 0), 17, 0 },
> +       { SLOT_SIZE(13, 0), 13, 0 },
> +       { SLOT_SIZE(11, 0), 11, 0 },
> +       { SLOT_SIZE(9, 0), 9, 0 },
> +       { SLOT_SIZE(8, 0), 8, 0 },
> +       { SLOT_SIZE(14, 1), 14, 1 },
> +       { SLOT_SIZE(12, 1), 12, 1 },
> +       { SLOT_SIZE(11, 1), 11, 1 },
> +       { SLOT_SIZE(10, 1), 10, 1 },
> +       { SLOT_SIZE(9, 1), 9, 1 },
> +       { SLOT_SIZE(8, 1), 8, 1 },
> +       { SLOT_SIZE(15, 2), 15, 2 },
> +       { SLOT_SIZE(14, 2), 14, 2 },
> +       { SLOT_SIZE(13, 2), 13, 2 },
> +       { SLOT_SIZE(12, 2), 12, 2 },
> +       { SLOT_SIZE(11, 2), 11, 2 },
> +       { SLOT_SIZE(10, 2), 10, 2 },
> +       { SLOT_SIZE(9, 2), 9, 2 },
> +       { SLOT_SIZE(8, 2), 8, 2 },
> +       { SLOT_SIZE(15, 3), 15, 3 },
> +       { SLOT_SIZE(14, 3), 14, 3 },
> +       { SLOT_SIZE(13, 3), 13, 3 },
> +       { SLOT_SIZE(12, 3), 12, 3 },
> +       { SLOT_SIZE(11, 3), 11, 3 },
> +       { SLOT_SIZE(10, 3), 10, 3 },
> +       { SLOT_SIZE(9, 3), 9, 3 },
> +       { SLOT_SIZE(7, 3), 7, 3 }
> +};
> +
> +/**
> + * struct block_list - stores metadata of particular list
> + * lock:                       protects list
> + * head:                       head of this list
> + * block_cache:                blocks with free slots
> + * block_count:                total number of blocks in the list
> + */
> +struct block_list {
> +       spinlock_t lock;
> +       struct list_head head;
> +       struct zblock_block *block_cache[BLOCK_CACHE_SIZE];
> +       unsigned long block_count;
> +};
> +
> +/**
> + * struct zblock_pool - stores metadata for each zblock pool
> + * @block_lists:       array of block lists
> + * @ops:                       pointer to a structure of user defined operations specified at
> + *                                     pool creation time.
> + * @zpool:                     zpool driver
> + * @zpool_ops:         zpool operations structure with an evict callback
> + * @alloc_flag:                protects block allocation from memory leak
> + *
> + * This structure is allocated at pool creation time and maintains metadata
> + * for a particular zblock pool.
> + */
> +struct zblock_pool {
> +       struct block_list block_lists[ARRAY_SIZE(block_desc)];
> +       const struct zblock_ops *ops;
> +       struct zpool *zpool;
> +       const struct zpool_ops *zpool_ops;
> +       atomic_t alloc_flag;
> +};
> +
> +/*****************
> + * Helpers
> + *****************/
> +
> +static void cache_insert_block(struct zblock_block *block, struct block_list *list)
> +{
> +       unsigned int i, min_free_slots, min_index;
> +
> +       min_free_slots = MAX_SLOTS;
> +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> +               if (!list->block_cache[i] || !(list->block_cache[i])->free_slots) {
> +                       list->block_cache[i] = block;
> +                       return;
> +               }
> +               if ((list->block_cache[i])->free_slots < min_free_slots) {
> +                       min_free_slots = (list->block_cache[i])->free_slots;
> +                       min_index = i;
> +               }
> +       }
> +       list->block_cache[min_index] = block;
> +}
> +
> +static struct zblock_block *cache_find_block(struct block_list *list)
> +{
> +       int i;
> +
> +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> +               if (list->block_cache[i] && (list->block_cache[i])->free_slots)
> +                       return list->block_cache[i];
> +       }
> +       return NULL;
> +}
> +
> +static int is_in_cache(struct zblock_block *block, struct block_list *list)
> +{
> +       int i;
> +
> +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> +               if (block == list->block_cache[i])
> +                       return i;
> +       }
> +       return -1;
> +}
> +
> +/*
> + * allocate new block and add it to corresponding block list
> + */
> +static struct zblock_block *alloc_block(struct zblock_pool *pool,
> +                                                                       int block_type, gfp_t gfp)
> +{
> +       struct zblock_block *block;
> +       struct block_list *list;
> +
> +       block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
> +       if (!block)
> +               return NULL;
> +
> +       list = &(pool->block_lists)[block_type];
> +
> +       /* init block data  */
> +       spin_lock_init(&block->lock);
> +       memset(block->slot_info, SLOT_FREE, block_desc[block_type].slots_per_block);
> +       block->free_slots = block_desc[block_type].slots_per_block;
> +       block->under_reclaim = false;
> +
> +       spin_lock(&list->lock);
> +       /* inserting block into list */
> +       INIT_LIST_HEAD(&block->block_node);
> +       list_add(&block->block_node, &list->head);
> +       cache_insert_block(block, list);
> +       list->block_count++;
> +       spin_unlock(&list->lock);
> +       return block;
> +}
> +
> +/*
> + * Encodes the handle of a particular slot in the pool using metadata
> + */
> +static inline unsigned long metadata_to_handle(struct zblock_block *block,
> +                                                       unsigned int block_type, unsigned int slot)
> +{
> +       return (unsigned long)(block) + (block_type << SLOT_BITS) + slot;
> +}
> +
> +/* Returns block, block type and slot in the pool corresponding to handle */
> +static inline struct zblock_block *handle_to_metadata(unsigned long handle,
> +                                               unsigned int *block_type, unsigned int *slot)
> +{
> +       *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
> +       *slot = handle & SLOT_MASK;
> +       return (struct zblock_block *)(handle & PAGE_MASK);
> +}
> +
> +
> +/*****************
> + * API Functions
> + *****************/
> +/**
> + * zblock_create_pool() - create a new zblock pool
> + * @gfp:       gfp flags when allocating the zblock pool structure
> + * @ops:       user-defined operations for the zblock pool
> + *
> + * Return: pointer to the new zblock pool or NULL if the metadata allocation
> + * failed.
> + */
> +static struct zblock_pool *zblock_create_pool(gfp_t gfp, const struct zblock_ops *ops)
> +{
> +       struct zblock_pool *pool;
> +       struct block_list *list;
> +       int i, j;
> +
> +       pool = kmalloc(sizeof(struct zblock_pool), gfp);
> +       if (!pool)
> +               return NULL;
> +
> +       /* init each block list */
> +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> +               list = &(pool->block_lists)[i];
> +               spin_lock_init(&list->lock);
> +               INIT_LIST_HEAD(&list->head);
> +               for (j = 0; j < BLOCK_CACHE_SIZE; j++)
> +                       list->block_cache[j] = NULL;
> +               list->block_count = 0;
> +       }
> +       pool->ops = ops;
> +       atomic_set(&pool->alloc_flag, 0);
> +       return pool;
> +}
> +
> +/**
> + * zblock_destroy_pool() - destroys an existing zblock pool
> + * @pool:      the zblock pool to be destroyed
> + *
> + */
> +static void zblock_destroy_pool(struct zblock_pool *pool)
> +{
> +       kfree(pool);
> +}
> +
> +
> +/**
> + * zblock_alloc() - allocates a slot of appropriate size
> + * @pool:      zblock pool from which to allocate
> + * @size:      size in bytes of the desired allocation
> + * @gfp:       gfp flags used if the pool needs to grow
> + * @handle:    handle of the new allocation
> + *
> + * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
> + * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
> + * a new slot.
> + */
> +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
> +                       unsigned long *handle)
> +{
> +       unsigned int block_type, slot;
> +       struct zblock_block *block;
> +       struct block_list *list;
> +
> +       if (!size)
> +               return -EINVAL;
> +
> +       if (size > PAGE_SIZE)
> +               return -ENOSPC;
> +
> +       /* find basic block type with suitable slot size */
> +       for (block_type = 0; block_type < ARRAY_SIZE(block_desc); block_type++) {
> +               if (size <= block_desc[block_type].slot_size)
> +                       break;
> +       }
> +       list = &(pool->block_lists[block_type]);
> +
> +check:
> +       spin_lock(&list->lock);
> +       /* check if there are free slots in cache */
> +       block = cache_find_block(list);
> +       if (block)
> +               goto found;
> +       spin_unlock(&list->lock);
> +
> +       /* not found block with free slots try to allocate new empty block */
> +       if (atomic_cmpxchg(&pool->alloc_flag, 0, 1))
> +               goto check;
> +       block = alloc_block(pool, block_type, gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
> +       if (block) {
> +               spin_lock(&list->lock);
> +               goto found;
> +       }
> +       atomic_set(&pool->alloc_flag, 0);
> +       return -ENOMEM;
> +
> +found:
> +       spin_lock(&block->lock);
> +       block->free_slots--;
> +       spin_unlock(&list->lock);
> +       /* find the first free slot in block */
> +       for (slot = 0; slot < block_desc[block_type].slots_per_block; slot++) {
> +               if (block->slot_info[slot] == SLOT_FREE)
> +                       break;
> +       }
> +       block->slot_info[slot] = SLOT_OCCUPIED;
> +       spin_unlock(&block->lock);
> +       *handle = metadata_to_handle(block, block_type, slot);
> +       atomic_set(&pool->alloc_flag, 0);
> +       return 0;
> +}
> +
> +/**
> + * zblock_free() - frees the allocation associated with the given handle
> + * @pool:      pool in which the allocation resided
> + * @handle:    handle associated with the allocation returned by zblock_alloc()
> + *
> + */
> +static void zblock_free(struct zblock_pool *pool, unsigned long handle)
> +{
> +       unsigned int slot, block_type;
> +       struct zblock_block *block;
> +       struct block_list *list;
> +       int i;
> +
> +       block = handle_to_metadata(handle, &block_type, &slot);
> +       list = &(pool->block_lists[block_type]);
> +
> +       if (block->under_reclaim)
> +               return;
> +       spin_lock(&list->lock);
> +       i = is_in_cache(block, list);
> +       block->free_slots++;
> +       /* if all slots in block are empty delete whole block */
> +       if (block->free_slots == block_desc[block_type].slots_per_block) {
> +               list_del(&block->block_node);
> +               list->block_count--;
> +
> +               /* if cached block to be deleted */
> +               if (i != -1)
> +                       list->block_cache[i] = NULL;
> +               spin_unlock(&list->lock);
> +               free_pages((unsigned long)block, block_desc[block_type].order);
> +               return;
> +       }
> +       /* if block is not cached update cache */
> +       if (i == -1)
> +               cache_insert_block(block, list);
> +
> +       spin_lock(&block->lock);
> +       spin_unlock(&list->lock);
> +       block->slot_info[slot] = SLOT_FREE;
> +       spin_unlock(&block->lock);
> +}
> +
> +/**
> + * zblock_reclaim_block() - evicts allocations from block and frees it
> + * @pool:      pool from which a block will attempt to be evicted
> + *
> + * Returns: pages reclaimed count if block is successfully freed
> + *          otherwise -EINVAL if there are no blocks to evict
> + */
> +static int zblock_reclaim_block(struct zblock_pool *pool)
> +{
> +       struct zblock_block *block;
> +       struct block_list *list;
> +       unsigned long handle, block_type, slot;
> +       int ret, i, reclaimed;
> +
> +       /* start with list storing blocks with the worst compression and try
> +        * to evict the first added (oldest) block in this list
> +        */
> +       for (block_type = ARRAY_SIZE(block_desc) - 1; block_type >= 0; --block_type) {
> +               list = &(pool->block_lists[block_type]);
> +               spin_lock(&list->lock);
> +
> +               /* find the oldest block in list */
> +               block = list_last_entry(&list->head, struct zblock_block, block_node);
> +
> +               if (!block) {
> +                       spin_unlock(&list->lock);
> +                       continue;
> +               }
> +               i = is_in_cache(block, list);
> +               /* skip iteration if this block is cached */
> +               if (i != -1) {
> +                       spin_unlock(&list->lock);
> +                       continue;
> +               }
> +               block->under_reclaim = true;
> +               spin_unlock(&list->lock);
> +               reclaimed = 0;
> +
> +               /* try to evict all UNMAPPED slots in block */
> +               for (slot = 0; slot < block_desc[block_type].slots_per_block; ++slot) {
> +                       if (block->slot_info[slot] == SLOT_UNMAPPED) {
> +                               handle = metadata_to_handle(block, block_type, slot);
> +                               ret = pool->ops->evict(pool, handle);
> +                               if (ret)
> +                                       break;
> +
> +                               ++reclaimed;
> +                               spin_lock(&block->lock);
> +                               block->slot_info[slot] = SLOT_FREE;
> +                               spin_unlock(&block->lock);
> +                               block->free_slots++;
> +                       }
> +               }
> +               spin_lock(&list->lock);
> +               /* some occupied slots remained - insert block */
> +               if (block->free_slots != block_desc[block_type].slots_per_block) {
> +                       block->under_reclaim = false;
> +                       cache_insert_block(block, list);
> +                       spin_unlock(&list->lock);
> +               } else {
> +               /* all slots are free - delete this block */
> +                       list_del(&block->block_node);
> +                       list->block_count--;
> +                       spin_unlock(&list->lock);
> +                       free_pages((unsigned long)block, block_desc[block_type].order);
> +               }
> +               if (reclaimed != 0)
> +                       return reclaimed;
> +               return -EAGAIN;
> +       }
> +       return -EINVAL;
> +}
> +
> +
> +/**
> + * zblock_map() - maps the allocation associated with the given handle
> + * @pool:      pool in which the allocation resides
> + * @handle:    handle associated with the allocation to be mapped
> + *
> + *
> + * Returns: a pointer to the mapped allocation
> + */
> +static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
> +{
> +       unsigned int block_type, slot;
> +       struct zblock_block *block;
> +
> +       block = handle_to_metadata(handle, &block_type, &slot);
> +       spin_lock(&block->lock);
> +       block->slot_info[slot] = SLOT_MAPPED;
> +       spin_unlock(&block->lock);
> +       return (void *)(block + 1) + slot * block_desc[block_type].slot_size;
> +}
> +
> +/**
> + * zblock_unmap() - unmaps the allocation associated with the given handle
> + * @pool:      pool in which the allocation resides
> + * @handle:    handle associated with the allocation to be unmapped
> + */
> +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
> +{
> +       unsigned int block_type, slot;
> +       struct zblock_block *block;
> +
> +       block = handle_to_metadata(handle, &block_type, &slot);
> +       spin_lock(&block->lock);
> +       block->slot_info[slot] = SLOT_UNMAPPED;
> +       spin_unlock(&block->lock);
> +}
> +
> +/**
> + * zblock_get_pool_size() - gets the zblock pool size in bytes
> + * @pool:      pool whose size is being queried
> + *
> + * Returns: size in bytes of the given pool.
> + */
> +static u64 zblock_get_pool_size(struct zblock_pool *pool)
> +{
> +       u64 total_size;
> +       int i;
> +
> +       total_size = 0;
> +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> +               total_size += (pool->block_lists)[i].block_count
> +                               * (PAGE_SIZE << block_desc[i].order);
> +       }
> +       return total_size;
> +}
> +
> +/*****************
> + * zpool
> + ****************/
> +
> +static int zblock_zpool_evict(struct zblock_pool *pool, unsigned long handle)
> +{
> +       if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
> +               return pool->zpool_ops->evict(pool->zpool, handle);
> +       else
> +               return -ENOENT;
> +}
> +
> +static const struct zblock_ops zblock_zpool_ops = {
> +       .evict =        zblock_zpool_evict
> +};
> +
> +static void *zblock_zpool_create(const char *name, gfp_t gfp,
> +                                  const struct zpool_ops *zpool_ops,
> +                                  struct zpool *zpool)
> +{
> +       struct zblock_pool *pool;
> +
> +       pool = zblock_create_pool(gfp, &zblock_zpool_ops);
> +       if (pool) {
> +               pool->zpool = zpool;
> +               pool->zpool_ops = zpool_ops;
> +       }
> +       return pool;
> +}
> +
> +static void zblock_zpool_destroy(void *pool)
> +{
> +       zblock_destroy_pool(pool);
> +}
> +
> +static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
> +                       unsigned long *handle)
> +{
> +       return zblock_alloc(pool, size, gfp, handle);
> +}
> +
> +static void zblock_zpool_free(void *pool, unsigned long handle)
> +{
> +       zblock_free(pool, handle);
> +}
> +
> +static int zblock_zpool_shrink(void *pool, unsigned int pages,
> +                       unsigned int *reclaimed)
> +{
> +       unsigned int total = 0;
> +       int ret = -EINVAL;
> +
> +       while (total < pages) {
> +               ret = zblock_reclaim_block(pool);
> +               if (ret < 0)
> +                       break;
> +               total += ret;
> +       }
> +       if (reclaimed)
> +               *reclaimed = total;
> +
> +       return ret;
> +}
> +
> +static void *zblock_zpool_map(void *pool, unsigned long handle,
> +                       enum zpool_mapmode mm)
> +{
> +       return zblock_map(pool, handle);
> +}
> +
> +static void zblock_zpool_unmap(void *pool, unsigned long handle)
> +{
> +       zblock_unmap(pool, handle);
> +}
> +
> +static u64 zblock_zpool_total_size(void *pool)
> +{
> +       return zblock_get_pool_size(pool);
> +}
> +
> +static struct zpool_driver zblock_zpool_driver = {
> +       .type =         "zblock",
> +       .owner =        THIS_MODULE,
> +       .create =       zblock_zpool_create,
> +       .destroy =      zblock_zpool_destroy,
> +       .malloc =       zblock_zpool_malloc,
> +       .free =         zblock_zpool_free,
> +       .shrink =       zblock_zpool_shrink,
> +       .map =          zblock_zpool_map,
> +       .unmap =        zblock_zpool_unmap,
> +       .total_size =   zblock_zpool_total_size,
> +};
> +
> +MODULE_ALIAS("zpool-zblock");
> +
> +static int __init init_zblock(void)
> +{
> +       pr_info("loaded\n");
> +       zpool_register_driver(&zblock_zpool_driver);
> +       return 0;
> +}
> +
> +static void __exit exit_zblock(void)
> +{
> +       zpool_unregister_driver(&zblock_zpool_driver);
> +       pr_info("unloaded\n");
> +}
> +
> +module_init(init_zblock);
> +module_exit(exit_zblock);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Ananda Badmaeb <a.badmaev@clicknet.pro>");
> +MODULE_DESCRIPTION("Block allocator for compressed pages");
> --
> 2.34.1
>
>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-28 17:29 ` Matthew Wilcox
@ 2022-09-29  5:01   ` Ananda Badmaev
  0 siblings, 0 replies; 10+ messages in thread
From: Ananda Badmaev @ 2022-09-29  5:01 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-mm, vitaly.wool

28.09.2022 20:29, Matthew Wilcox пишет:
> On Wed, Sep 28, 2022 at 11:01:00AM +0300, ananda wrote:
>> +static struct zblock_block *alloc_block(struct zblock_pool *pool,
>> +									int block_type, gfp_t gfp)
> 
> Your indentation is completely screwed up.  What are you trying to do
> here?Default tab size was 4 in my code editor. I just have set it to 8 so 
there should be no such problems in the future.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-28 18:37 ` Yosry Ahmed
@ 2022-09-29  5:24   ` Ananda Badmaev
  2022-09-29  6:55     ` Vitaly Wool
  2022-09-29  6:55   ` Vitaly Wool
  1 sibling, 1 reply; 10+ messages in thread
From: Ananda Badmaev @ 2022-09-29  5:24 UTC (permalink / raw)
  To: Yosry Ahmed; +Cc: Linux-MM, vitaly.wool

28.09.2022 21:37, Yosry Ahmed пишет:
> On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
>>
>> From: Ananda <a.badmaev@clicknet.pro>
>>
>>      Zblock stores integer number of compressed objects per zblock block.
>> These blocks consist of several physical pages (1/2/4/8) and are arranged
>> in linked lists.
>>      The range from 0 to PAGE_SIZE is divided into the number of intervals
>> corresponding to the number of lists and each list only operates objects
>> of size from its interval. Thus the block lists are isolated from each
>> other, which makes it possible to simultaneously perform actions with
>> several objects from different lists.
>>      Blocks make it possible to densely arrange objects of various sizes
>> resulting in low internal fragmentation. Also this allocator tries to fill
>> incomplete blocks instead of adding new ones thus in many cases providing
>> a compression ratio substantially higher than z3fold and zbud.
>>      Zblock does not require MMU and also is superior to zsmalloc with
>> regard to the worst execution times, thus allowing for better response time
>> and real-time characteristics of the whole system.
>>
> 
> It seems to me, and I can be wrong, that there is some overlap in
> design and goals between this zpool backend and zsmalloc. They both
> try to avoid internal fragmentation by avoiding the static slots used
> by zbud and z3fold, and instead pack compressed pages more
> dynamically. They both have some sort of concurrency handling
> (separate block lists in zblock vs. classes in zsmalloc). A key
> difference is that zsmalloc avoids higher order allocations (at least
> based on its docs), and instead allows compressed pages to span across
> 0-order page boundaries.

You are right.

> The key differences I see here (based on this commit message and
> zsmalloc docs) are:
> a) Blocks in zblock can consist of higher order pages.
> b) Compressed pages in zsmalloc can span page boundaries (I am
> assuming this isn't the case for zblock).
> 
> It appears to me that if zblock has better performance than zsmalloc,
> it can be because pages in zblock are physically contiguous vs. the
> 0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
> assumption correct?
> 

Yes.

> If yes, would it be better to implement those changes as some tunable
> extension to zsmalloc? It would make it easier if we have overall less
> zpool backends, and also easier for current users of zsmalloc to
> experiment with these changes.
> 

Sounds reasonable, but I'm afraid in this case zsmalloc code will become 
overloaded also there might be problems with keeping zblock simplicity.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-28 18:37 ` Yosry Ahmed
  2022-09-29  5:24   ` Ananda Badmaev
@ 2022-09-29  6:55   ` Vitaly Wool
  2022-09-29  7:58     ` Yosry Ahmed
  1 sibling, 1 reply; 10+ messages in thread
From: Vitaly Wool @ 2022-09-29  6:55 UTC (permalink / raw)
  To: Yosry Ahmed; +Cc: ananda, Linux-MM

On Wed, Sep 28, 2022 at 8:38 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
> >
> > From: Ananda <a.badmaev@clicknet.pro>
> >
> >     Zblock stores integer number of compressed objects per zblock block.
> > These blocks consist of several physical pages (1/2/4/8) and are arranged
> > in linked lists.
> >     The range from 0 to PAGE_SIZE is divided into the number of intervals
> > corresponding to the number of lists and each list only operates objects
> > of size from its interval. Thus the block lists are isolated from each
> > other, which makes it possible to simultaneously perform actions with
> > several objects from different lists.
> >     Blocks make it possible to densely arrange objects of various sizes
> > resulting in low internal fragmentation. Also this allocator tries to fill
> > incomplete blocks instead of adding new ones thus in many cases providing
> > a compression ratio substantially higher than z3fold and zbud.
> >     Zblock does not require MMU and also is superior to zsmalloc with
> > regard to the worst execution times, thus allowing for better response time
> > and real-time characteristics of the whole system.
> >
>
> It seems to me, and I can be wrong, that there is some overlap in
> design and goals between this zpool backend and zsmalloc. They both
> try to avoid internal fragmentation by avoiding the static slots used
> by zbud and z3fold, and instead pack compressed pages more
> dynamically. They both have some sort of concurrency handling
> (separate block lists in zblock vs. classes in zsmalloc). A key
> difference is that zsmalloc avoids higher order allocations (at least
> based on its docs), and instead allows compressed pages to span across
> 0-order page boundaries.

Well, another key difference is that zsmalloc may only work on
MMU-enabled systems.

> The key differences I see here (based on this commit message and
> zsmalloc docs) are:
> a) Blocks in zblock can consist of higher order pages.
> b) Compressed pages in zsmalloc can span page boundaries (I am
> assuming this isn't the case for zblock).
>
> It appears to me that if zblock has better performance than zsmalloc,
> it can be because pages in zblock are physically contiguous vs. the
> 0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
> assumption correct?
>
> If yes, would it be better to implement those changes as some tunable
> extension to zsmalloc? It would make it easier if we have overall less
> zpool backends, and also easier for current users of zsmalloc to
> experiment with these changes.

Easier to whom? Not for me, nor for anyone using zswap, that's what I
have to say.
zpool API is created to unify compression backends and so I would
strongly prefer continuing having zpool for backend configuration and
selection, rather than implementing a custom in-zsmalloc selection
mechanism.

I don't think merging is a good idea, and here are the reasons:
- zsmalloc's code is already almost incomprehensible
- zsmalloc's main objective is density, while zblock aims for low latency
- merging the two approaches within zsmalloc would mean creating some
internal selection mechanism within zsmalloc which I would absolutely
like to avoid (for smaller RAM devices I usually don't compile
zsmalloc at all or compile it as a module).

Thanks,
Vitaly

> > Signed-off-by: Ananda <a.badmaev@clicknet.pro>
> > ---
> >
> > v2: fixed compiler warnings
> >
> > v3: added documentation and const modifier to struct tree_descr
> >
> > v4: - fixed gfp flags for block allocation
> >     - fixed potential memory leak when allocating blocks
> >     - resolved some issues with code style and warnings from checkpatch
> >       (except warning about single line config symbol description)
> >     - moved test results from documentation to changelog
> >
> > v5: - "direct" handle mapping and use of linked lists instead of red-black
> >       trees resulting in faster operations and a bit simpler code
> >     - renamed ztree -> zblock
> >     - edited various comments and descriptions
> >
> >  Documentation/mm/zblock.rst |  31 ++
> >  MAINTAINERS                 |   7 +
> >  mm/Kconfig                  |  17 +
> >  mm/Makefile                 |   1 +
> >  mm/zblock.c                 | 637 ++++++++++++++++++++++++++++++++++++
> >  5 files changed, 693 insertions(+)
> >  create mode 100644 Documentation/mm/zblock.rst
> >  create mode 100644 mm/zblock.c
> >
> > diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst
> > new file mode 100644
> > index 000000000000..5008ce90b54b
> > --- /dev/null
> > +++ b/Documentation/mm/zblock.rst
> > @@ -0,0 +1,31 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +.. _block:
> > +
> > +======
> > +zblock
> > +======
> > +
> > +Zblock stores integer number of compressed objects per block. These
> > +blocks consist of several consecutive physical pages (from 1 to 8) and
> > +are arranged in lists. The range from 0 to PAGE_SIZE is divided into the
> > +number of intervals corresponding to the number of lists and each list
> > +only operates objects of size from its interval. Thus the block lists are
> > +isolated from each other, which makes it possible to simultaneously
> > +perform actions with several objects from different lists.
> > +
> > +Blocks make it possible to densely arrange objects of various sizes
> > +resulting in low internal fragmentation. Also this allocator tries to fill
> > +incomplete blocks instead of adding new ones thus in many cases providing
> > +a compression ratio substantially higher than z3fold and zbud. Zblock does
> > +not require MMU and also is superior to zsmalloc with regard to the worst
> > +execution times, thus allowing for better response time and real-time
> > +characteristics of the whole system.
> > +
> > +Like z3fold and zsmalloc zblock_alloc() does not return a dereferenceable
> > +pointer. Instead, it returns an unsigned long handle which encodes actual
> > +location of the allocated object.
> > +
> > +Unlike zbud and z3fold zblock works well with objects of various sizes - both
> > +highly compressed and poorly compressed including cases where both types
> > +are present.
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index f1390b8270b2..014fb19eb2cc 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -22457,6 +22457,13 @@ L:     linux-mm@kvack.org
> >  S:     Maintained
> >  F:     mm/z3fold.c
> >
> > +ZBLOCK COMPRESSED PAGE ALLOCATOR
> > +M:     Ananda Badmaev <a.badmaev@clicknet.pro>
> > +M:     Vitaly Wool <vitaly.wool@konsulko.com>
> > +L:     linux-mm@kvack.org
> > +S:     Maintained
> > +F:     mm/zblock.c
> > +
> >  ZD1211RW WIRELESS DRIVER
> >  M:     Ulrich Kunitz <kune@deine-taler.de>
> >  L:     linux-wireless@vger.kernel.org
> > diff --git a/mm/Kconfig b/mm/Kconfig
> > index 0331f1461f81..470c80f5726d 100644
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -149,6 +149,12 @@ config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> >         select ZSMALLOC
> >         help
> >           Use the zsmalloc allocator as the default allocator.
> > +
> > +config ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > +       bool "zblock"
> > +       select ZBLOCK
> > +       help
> > +         Use the zblock allocator as the default allocator.
> >  endchoice
> >
> >  config ZSWAP_ZPOOL_DEFAULT
> > @@ -157,6 +163,7 @@ config ZSWAP_ZPOOL_DEFAULT
> >         default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
> >         default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
> >         default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > +          default "zblock" if ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> >         default ""
> >
> >  config ZBUD
> > @@ -187,6 +194,16 @@ config ZSMALLOC
> >           pages of various compression levels efficiently. It achieves
> >           the highest storage density with the least amount of fragmentation.
> >
> > +config ZBLOCK
> > +       tristate "Simple block allocator (zblock)"
> > +       depends on ZPOOL
> > +       help
> > +         A special purpose allocator for storing compressed pages.
> > +         It stores integer number of compressed pages per block and
> > +         each block consists of number of physical pages being a power of 2.
> > +         zblock provides fast read/write, limited worst case time for
> > +         operations and good compression ratio in most scenarios.
> > +
> >  config ZSMALLOC_STAT
> >         bool "Export zsmalloc statistics"
> >         depends on ZSMALLOC
> > diff --git a/mm/Makefile b/mm/Makefile
> > index 9a564f836403..eb7235da6e61 100644
> > --- a/mm/Makefile
> > +++ b/mm/Makefile
> > @@ -110,6 +110,7 @@ obj-$(CONFIG_ZPOOL) += zpool.o
> >  obj-$(CONFIG_ZBUD)     += zbud.o
> >  obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
> >  obj-$(CONFIG_Z3FOLD)   += z3fold.o
> > +obj-$(CONFIG_ZBLOCK)   += zblock.o
> >  obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
> >  obj-$(CONFIG_CMA)      += cma.o
> >  obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
> > diff --git a/mm/zblock.c b/mm/zblock.c
> > new file mode 100644
> > index 000000000000..b389f43e0c26
> > --- /dev/null
> > +++ b/mm/zblock.c
> > @@ -0,0 +1,637 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * zblock.c
> > + *
> > + * Author: Ananda Badmaev <a.badmaev@clicknet.pro>
> > + * Copyright (C) 2022, Konsulko AB.
> > + *
> > + * This implementation is based on z3fold written by Vitaly Wool.
> > + * Zblock is a small object allocator with the intention to serve as a
> > + * zpool backend. It operates on page blocks which consist of number
> > + * of physical pages being a power of 2 and store integer number of
> > + * compressed pages per block which results in determinism and simplicity.
> > + *
> > + * zblock doesn't export any API and is meant to be used via zpool API.
> > + */
> > +
> > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > +
> > +#include <linux/atomic.h>
> > +#include <linux/list.h>
> > +#include <linux/mm.h>
> > +#include <linux/module.h>
> > +#include <linux/preempt.h>
> > +#include <linux/slab.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/zpool.h>
> > +
> > +#define SLOT_FREE 0
> > +#define SLOT_OCCUPIED 1
> > +#define SLOT_MAPPED 2
> > +#define SLOT_UNMAPPED 3
> > +
> > +#define SLOT_BITS 5
> > +#define MAX_SLOTS (1 << SLOT_BITS)
> > +#define SLOT_MASK ((0x1UL << SLOT_BITS) - 1)
> > +
> > +#define BLOCK_DATA_SIZE(order) ((PAGE_SIZE << order) - sizeof(struct zblock_block))
> > +#define SLOT_SIZE(nslots, order) (round_down((BLOCK_DATA_SIZE(order) / nslots), sizeof(long)))
> > +
> > +#define BLOCK_CACHE_SIZE 32
> > +
> > +struct zblock_pool;
> > +
> > +struct zblock_ops {
> > +       int (*evict)(struct zblock_pool *pool, unsigned long handle);
> > +};
> > +
> > +/**
> > + * struct zblock_block - block metadata
> > + * Block consists of several (1/2/4/8) pages and contains fixed
> > + * integer number of slots for allocating compressed pages.
> > + *
> > + * lock:                        protects block
> > + * block_node:          links block into the relevant list in the pool
> > + * slot_info:       contains data about free/occupied slots
> > + * free_slots:          number of free slots in the block
> > + * under_reclaim:    if true shows that block is being evicted
> > + */
> > +struct zblock_block {
> > +       spinlock_t lock;
> > +       struct list_head block_node;
> > +       u8 slot_info[MAX_SLOTS];
> > +       unsigned int free_slots;
> > +       bool under_reclaim;
> > +};
> > +/**
> > + * struct block_desc - general metadata for block lists
> > + * Each block list stores only blocks of corresponding type which means
> > + * that all blocks in it have the same number and size of slots.
> > + * All slots are aligned to size of long.
> > + *
> > + * slot_size:          size of slot for this list
> > + * slots_per_block:    number of slots per block for this list
> > + * order:                      order for __get_free_pages
> > + */
> > +static const struct block_desc {
> > +       const unsigned int slot_size;
> > +       const unsigned short slots_per_block;
> > +       const unsigned short order;
> > +} block_desc[] = {
> > +       { SLOT_SIZE(32, 0), 32, 0 },
> > +       { SLOT_SIZE(22, 0), 22, 0 },
> > +       { SLOT_SIZE(17, 0), 17, 0 },
> > +       { SLOT_SIZE(13, 0), 13, 0 },
> > +       { SLOT_SIZE(11, 0), 11, 0 },
> > +       { SLOT_SIZE(9, 0), 9, 0 },
> > +       { SLOT_SIZE(8, 0), 8, 0 },
> > +       { SLOT_SIZE(14, 1), 14, 1 },
> > +       { SLOT_SIZE(12, 1), 12, 1 },
> > +       { SLOT_SIZE(11, 1), 11, 1 },
> > +       { SLOT_SIZE(10, 1), 10, 1 },
> > +       { SLOT_SIZE(9, 1), 9, 1 },
> > +       { SLOT_SIZE(8, 1), 8, 1 },
> > +       { SLOT_SIZE(15, 2), 15, 2 },
> > +       { SLOT_SIZE(14, 2), 14, 2 },
> > +       { SLOT_SIZE(13, 2), 13, 2 },
> > +       { SLOT_SIZE(12, 2), 12, 2 },
> > +       { SLOT_SIZE(11, 2), 11, 2 },
> > +       { SLOT_SIZE(10, 2), 10, 2 },
> > +       { SLOT_SIZE(9, 2), 9, 2 },
> > +       { SLOT_SIZE(8, 2), 8, 2 },
> > +       { SLOT_SIZE(15, 3), 15, 3 },
> > +       { SLOT_SIZE(14, 3), 14, 3 },
> > +       { SLOT_SIZE(13, 3), 13, 3 },
> > +       { SLOT_SIZE(12, 3), 12, 3 },
> > +       { SLOT_SIZE(11, 3), 11, 3 },
> > +       { SLOT_SIZE(10, 3), 10, 3 },
> > +       { SLOT_SIZE(9, 3), 9, 3 },
> > +       { SLOT_SIZE(7, 3), 7, 3 }
> > +};
> > +
> > +/**
> > + * struct block_list - stores metadata of particular list
> > + * lock:                       protects list
> > + * head:                       head of this list
> > + * block_cache:                blocks with free slots
> > + * block_count:                total number of blocks in the list
> > + */
> > +struct block_list {
> > +       spinlock_t lock;
> > +       struct list_head head;
> > +       struct zblock_block *block_cache[BLOCK_CACHE_SIZE];
> > +       unsigned long block_count;
> > +};
> > +
> > +/**
> > + * struct zblock_pool - stores metadata for each zblock pool
> > + * @block_lists:       array of block lists
> > + * @ops:                       pointer to a structure of user defined operations specified at
> > + *                                     pool creation time.
> > + * @zpool:                     zpool driver
> > + * @zpool_ops:         zpool operations structure with an evict callback
> > + * @alloc_flag:                protects block allocation from memory leak
> > + *
> > + * This structure is allocated at pool creation time and maintains metadata
> > + * for a particular zblock pool.
> > + */
> > +struct zblock_pool {
> > +       struct block_list block_lists[ARRAY_SIZE(block_desc)];
> > +       const struct zblock_ops *ops;
> > +       struct zpool *zpool;
> > +       const struct zpool_ops *zpool_ops;
> > +       atomic_t alloc_flag;
> > +};
> > +
> > +/*****************
> > + * Helpers
> > + *****************/
> > +
> > +static void cache_insert_block(struct zblock_block *block, struct block_list *list)
> > +{
> > +       unsigned int i, min_free_slots, min_index;
> > +
> > +       min_free_slots = MAX_SLOTS;
> > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > +               if (!list->block_cache[i] || !(list->block_cache[i])->free_slots) {
> > +                       list->block_cache[i] = block;
> > +                       return;
> > +               }
> > +               if ((list->block_cache[i])->free_slots < min_free_slots) {
> > +                       min_free_slots = (list->block_cache[i])->free_slots;
> > +                       min_index = i;
> > +               }
> > +       }
> > +       list->block_cache[min_index] = block;
> > +}
> > +
> > +static struct zblock_block *cache_find_block(struct block_list *list)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > +               if (list->block_cache[i] && (list->block_cache[i])->free_slots)
> > +                       return list->block_cache[i];
> > +       }
> > +       return NULL;
> > +}
> > +
> > +static int is_in_cache(struct zblock_block *block, struct block_list *list)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > +               if (block == list->block_cache[i])
> > +                       return i;
> > +       }
> > +       return -1;
> > +}
> > +
> > +/*
> > + * allocate new block and add it to corresponding block list
> > + */
> > +static struct zblock_block *alloc_block(struct zblock_pool *pool,
> > +                                                                       int block_type, gfp_t gfp)
> > +{
> > +       struct zblock_block *block;
> > +       struct block_list *list;
> > +
> > +       block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
> > +       if (!block)
> > +               return NULL;
> > +
> > +       list = &(pool->block_lists)[block_type];
> > +
> > +       /* init block data  */
> > +       spin_lock_init(&block->lock);
> > +       memset(block->slot_info, SLOT_FREE, block_desc[block_type].slots_per_block);
> > +       block->free_slots = block_desc[block_type].slots_per_block;
> > +       block->under_reclaim = false;
> > +
> > +       spin_lock(&list->lock);
> > +       /* inserting block into list */
> > +       INIT_LIST_HEAD(&block->block_node);
> > +       list_add(&block->block_node, &list->head);
> > +       cache_insert_block(block, list);
> > +       list->block_count++;
> > +       spin_unlock(&list->lock);
> > +       return block;
> > +}
> > +
> > +/*
> > + * Encodes the handle of a particular slot in the pool using metadata
> > + */
> > +static inline unsigned long metadata_to_handle(struct zblock_block *block,
> > +                                                       unsigned int block_type, unsigned int slot)
> > +{
> > +       return (unsigned long)(block) + (block_type << SLOT_BITS) + slot;
> > +}
> > +
> > +/* Returns block, block type and slot in the pool corresponding to handle */
> > +static inline struct zblock_block *handle_to_metadata(unsigned long handle,
> > +                                               unsigned int *block_type, unsigned int *slot)
> > +{
> > +       *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
> > +       *slot = handle & SLOT_MASK;
> > +       return (struct zblock_block *)(handle & PAGE_MASK);
> > +}
> > +
> > +
> > +/*****************
> > + * API Functions
> > + *****************/
> > +/**
> > + * zblock_create_pool() - create a new zblock pool
> > + * @gfp:       gfp flags when allocating the zblock pool structure
> > + * @ops:       user-defined operations for the zblock pool
> > + *
> > + * Return: pointer to the new zblock pool or NULL if the metadata allocation
> > + * failed.
> > + */
> > +static struct zblock_pool *zblock_create_pool(gfp_t gfp, const struct zblock_ops *ops)
> > +{
> > +       struct zblock_pool *pool;
> > +       struct block_list *list;
> > +       int i, j;
> > +
> > +       pool = kmalloc(sizeof(struct zblock_pool), gfp);
> > +       if (!pool)
> > +               return NULL;
> > +
> > +       /* init each block list */
> > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > +               list = &(pool->block_lists)[i];
> > +               spin_lock_init(&list->lock);
> > +               INIT_LIST_HEAD(&list->head);
> > +               for (j = 0; j < BLOCK_CACHE_SIZE; j++)
> > +                       list->block_cache[j] = NULL;
> > +               list->block_count = 0;
> > +       }
> > +       pool->ops = ops;
> > +       atomic_set(&pool->alloc_flag, 0);
> > +       return pool;
> > +}
> > +
> > +/**
> > + * zblock_destroy_pool() - destroys an existing zblock pool
> > + * @pool:      the zblock pool to be destroyed
> > + *
> > + */
> > +static void zblock_destroy_pool(struct zblock_pool *pool)
> > +{
> > +       kfree(pool);
> > +}
> > +
> > +
> > +/**
> > + * zblock_alloc() - allocates a slot of appropriate size
> > + * @pool:      zblock pool from which to allocate
> > + * @size:      size in bytes of the desired allocation
> > + * @gfp:       gfp flags used if the pool needs to grow
> > + * @handle:    handle of the new allocation
> > + *
> > + * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
> > + * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
> > + * a new slot.
> > + */
> > +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
> > +                       unsigned long *handle)
> > +{
> > +       unsigned int block_type, slot;
> > +       struct zblock_block *block;
> > +       struct block_list *list;
> > +
> > +       if (!size)
> > +               return -EINVAL;
> > +
> > +       if (size > PAGE_SIZE)
> > +               return -ENOSPC;
> > +
> > +       /* find basic block type with suitable slot size */
> > +       for (block_type = 0; block_type < ARRAY_SIZE(block_desc); block_type++) {
> > +               if (size <= block_desc[block_type].slot_size)
> > +                       break;
> > +       }
> > +       list = &(pool->block_lists[block_type]);
> > +
> > +check:
> > +       spin_lock(&list->lock);
> > +       /* check if there are free slots in cache */
> > +       block = cache_find_block(list);
> > +       if (block)
> > +               goto found;
> > +       spin_unlock(&list->lock);
> > +
> > +       /* not found block with free slots try to allocate new empty block */
> > +       if (atomic_cmpxchg(&pool->alloc_flag, 0, 1))
> > +               goto check;
> > +       block = alloc_block(pool, block_type, gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
> > +       if (block) {
> > +               spin_lock(&list->lock);
> > +               goto found;
> > +       }
> > +       atomic_set(&pool->alloc_flag, 0);
> > +       return -ENOMEM;
> > +
> > +found:
> > +       spin_lock(&block->lock);
> > +       block->free_slots--;
> > +       spin_unlock(&list->lock);
> > +       /* find the first free slot in block */
> > +       for (slot = 0; slot < block_desc[block_type].slots_per_block; slot++) {
> > +               if (block->slot_info[slot] == SLOT_FREE)
> > +                       break;
> > +       }
> > +       block->slot_info[slot] = SLOT_OCCUPIED;
> > +       spin_unlock(&block->lock);
> > +       *handle = metadata_to_handle(block, block_type, slot);
> > +       atomic_set(&pool->alloc_flag, 0);
> > +       return 0;
> > +}
> > +
> > +/**
> > + * zblock_free() - frees the allocation associated with the given handle
> > + * @pool:      pool in which the allocation resided
> > + * @handle:    handle associated with the allocation returned by zblock_alloc()
> > + *
> > + */
> > +static void zblock_free(struct zblock_pool *pool, unsigned long handle)
> > +{
> > +       unsigned int slot, block_type;
> > +       struct zblock_block *block;
> > +       struct block_list *list;
> > +       int i;
> > +
> > +       block = handle_to_metadata(handle, &block_type, &slot);
> > +       list = &(pool->block_lists[block_type]);
> > +
> > +       if (block->under_reclaim)
> > +               return;
> > +       spin_lock(&list->lock);
> > +       i = is_in_cache(block, list);
> > +       block->free_slots++;
> > +       /* if all slots in block are empty delete whole block */
> > +       if (block->free_slots == block_desc[block_type].slots_per_block) {
> > +               list_del(&block->block_node);
> > +               list->block_count--;
> > +
> > +               /* if cached block to be deleted */
> > +               if (i != -1)
> > +                       list->block_cache[i] = NULL;
> > +               spin_unlock(&list->lock);
> > +               free_pages((unsigned long)block, block_desc[block_type].order);
> > +               return;
> > +       }
> > +       /* if block is not cached update cache */
> > +       if (i == -1)
> > +               cache_insert_block(block, list);
> > +
> > +       spin_lock(&block->lock);
> > +       spin_unlock(&list->lock);
> > +       block->slot_info[slot] = SLOT_FREE;
> > +       spin_unlock(&block->lock);
> > +}
> > +
> > +/**
> > + * zblock_reclaim_block() - evicts allocations from block and frees it
> > + * @pool:      pool from which a block will attempt to be evicted
> > + *
> > + * Returns: pages reclaimed count if block is successfully freed
> > + *          otherwise -EINVAL if there are no blocks to evict
> > + */
> > +static int zblock_reclaim_block(struct zblock_pool *pool)
> > +{
> > +       struct zblock_block *block;
> > +       struct block_list *list;
> > +       unsigned long handle, block_type, slot;
> > +       int ret, i, reclaimed;
> > +
> > +       /* start with list storing blocks with the worst compression and try
> > +        * to evict the first added (oldest) block in this list
> > +        */
> > +       for (block_type = ARRAY_SIZE(block_desc) - 1; block_type >= 0; --block_type) {
> > +               list = &(pool->block_lists[block_type]);
> > +               spin_lock(&list->lock);
> > +
> > +               /* find the oldest block in list */
> > +               block = list_last_entry(&list->head, struct zblock_block, block_node);
> > +
> > +               if (!block) {
> > +                       spin_unlock(&list->lock);
> > +                       continue;
> > +               }
> > +               i = is_in_cache(block, list);
> > +               /* skip iteration if this block is cached */
> > +               if (i != -1) {
> > +                       spin_unlock(&list->lock);
> > +                       continue;
> > +               }
> > +               block->under_reclaim = true;
> > +               spin_unlock(&list->lock);
> > +               reclaimed = 0;
> > +
> > +               /* try to evict all UNMAPPED slots in block */
> > +               for (slot = 0; slot < block_desc[block_type].slots_per_block; ++slot) {
> > +                       if (block->slot_info[slot] == SLOT_UNMAPPED) {
> > +                               handle = metadata_to_handle(block, block_type, slot);
> > +                               ret = pool->ops->evict(pool, handle);
> > +                               if (ret)
> > +                                       break;
> > +
> > +                               ++reclaimed;
> > +                               spin_lock(&block->lock);
> > +                               block->slot_info[slot] = SLOT_FREE;
> > +                               spin_unlock(&block->lock);
> > +                               block->free_slots++;
> > +                       }
> > +               }
> > +               spin_lock(&list->lock);
> > +               /* some occupied slots remained - insert block */
> > +               if (block->free_slots != block_desc[block_type].slots_per_block) {
> > +                       block->under_reclaim = false;
> > +                       cache_insert_block(block, list);
> > +                       spin_unlock(&list->lock);
> > +               } else {
> > +               /* all slots are free - delete this block */
> > +                       list_del(&block->block_node);
> > +                       list->block_count--;
> > +                       spin_unlock(&list->lock);
> > +                       free_pages((unsigned long)block, block_desc[block_type].order);
> > +               }
> > +               if (reclaimed != 0)
> > +                       return reclaimed;
> > +               return -EAGAIN;
> > +       }
> > +       return -EINVAL;
> > +}
> > +
> > +
> > +/**
> > + * zblock_map() - maps the allocation associated with the given handle
> > + * @pool:      pool in which the allocation resides
> > + * @handle:    handle associated with the allocation to be mapped
> > + *
> > + *
> > + * Returns: a pointer to the mapped allocation
> > + */
> > +static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
> > +{
> > +       unsigned int block_type, slot;
> > +       struct zblock_block *block;
> > +
> > +       block = handle_to_metadata(handle, &block_type, &slot);
> > +       spin_lock(&block->lock);
> > +       block->slot_info[slot] = SLOT_MAPPED;
> > +       spin_unlock(&block->lock);
> > +       return (void *)(block + 1) + slot * block_desc[block_type].slot_size;
> > +}
> > +
> > +/**
> > + * zblock_unmap() - unmaps the allocation associated with the given handle
> > + * @pool:      pool in which the allocation resides
> > + * @handle:    handle associated with the allocation to be unmapped
> > + */
> > +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
> > +{
> > +       unsigned int block_type, slot;
> > +       struct zblock_block *block;
> > +
> > +       block = handle_to_metadata(handle, &block_type, &slot);
> > +       spin_lock(&block->lock);
> > +       block->slot_info[slot] = SLOT_UNMAPPED;
> > +       spin_unlock(&block->lock);
> > +}
> > +
> > +/**
> > + * zblock_get_pool_size() - gets the zblock pool size in bytes
> > + * @pool:      pool whose size is being queried
> > + *
> > + * Returns: size in bytes of the given pool.
> > + */
> > +static u64 zblock_get_pool_size(struct zblock_pool *pool)
> > +{
> > +       u64 total_size;
> > +       int i;
> > +
> > +       total_size = 0;
> > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > +               total_size += (pool->block_lists)[i].block_count
> > +                               * (PAGE_SIZE << block_desc[i].order);
> > +       }
> > +       return total_size;
> > +}
> > +
> > +/*****************
> > + * zpool
> > + ****************/
> > +
> > +static int zblock_zpool_evict(struct zblock_pool *pool, unsigned long handle)
> > +{
> > +       if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
> > +               return pool->zpool_ops->evict(pool->zpool, handle);
> > +       else
> > +               return -ENOENT;
> > +}
> > +
> > +static const struct zblock_ops zblock_zpool_ops = {
> > +       .evict =        zblock_zpool_evict
> > +};
> > +
> > +static void *zblock_zpool_create(const char *name, gfp_t gfp,
> > +                                  const struct zpool_ops *zpool_ops,
> > +                                  struct zpool *zpool)
> > +{
> > +       struct zblock_pool *pool;
> > +
> > +       pool = zblock_create_pool(gfp, &zblock_zpool_ops);
> > +       if (pool) {
> > +               pool->zpool = zpool;
> > +               pool->zpool_ops = zpool_ops;
> > +       }
> > +       return pool;
> > +}
> > +
> > +static void zblock_zpool_destroy(void *pool)
> > +{
> > +       zblock_destroy_pool(pool);
> > +}
> > +
> > +static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
> > +                       unsigned long *handle)
> > +{
> > +       return zblock_alloc(pool, size, gfp, handle);
> > +}
> > +
> > +static void zblock_zpool_free(void *pool, unsigned long handle)
> > +{
> > +       zblock_free(pool, handle);
> > +}
> > +
> > +static int zblock_zpool_shrink(void *pool, unsigned int pages,
> > +                       unsigned int *reclaimed)
> > +{
> > +       unsigned int total = 0;
> > +       int ret = -EINVAL;
> > +
> > +       while (total < pages) {
> > +               ret = zblock_reclaim_block(pool);
> > +               if (ret < 0)
> > +                       break;
> > +               total += ret;
> > +       }
> > +       if (reclaimed)
> > +               *reclaimed = total;
> > +
> > +       return ret;
> > +}
> > +
> > +static void *zblock_zpool_map(void *pool, unsigned long handle,
> > +                       enum zpool_mapmode mm)
> > +{
> > +       return zblock_map(pool, handle);
> > +}
> > +
> > +static void zblock_zpool_unmap(void *pool, unsigned long handle)
> > +{
> > +       zblock_unmap(pool, handle);
> > +}
> > +
> > +static u64 zblock_zpool_total_size(void *pool)
> > +{
> > +       return zblock_get_pool_size(pool);
> > +}
> > +
> > +static struct zpool_driver zblock_zpool_driver = {
> > +       .type =         "zblock",
> > +       .owner =        THIS_MODULE,
> > +       .create =       zblock_zpool_create,
> > +       .destroy =      zblock_zpool_destroy,
> > +       .malloc =       zblock_zpool_malloc,
> > +       .free =         zblock_zpool_free,
> > +       .shrink =       zblock_zpool_shrink,
> > +       .map =          zblock_zpool_map,
> > +       .unmap =        zblock_zpool_unmap,
> > +       .total_size =   zblock_zpool_total_size,
> > +};
> > +
> > +MODULE_ALIAS("zpool-zblock");
> > +
> > +static int __init init_zblock(void)
> > +{
> > +       pr_info("loaded\n");
> > +       zpool_register_driver(&zblock_zpool_driver);
> > +       return 0;
> > +}
> > +
> > +static void __exit exit_zblock(void)
> > +{
> > +       zpool_unregister_driver(&zblock_zpool_driver);
> > +       pr_info("unloaded\n");
> > +}
> > +
> > +module_init(init_zblock);
> > +module_exit(exit_zblock);
> > +
> > +MODULE_LICENSE("GPL");
> > +MODULE_AUTHOR("Ananda Badmaeb <a.badmaev@clicknet.pro>");
> > +MODULE_DESCRIPTION("Block allocator for compressed pages");
> > --
> > 2.34.1
> >
> >


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-29  5:24   ` Ananda Badmaev
@ 2022-09-29  6:55     ` Vitaly Wool
  0 siblings, 0 replies; 10+ messages in thread
From: Vitaly Wool @ 2022-09-29  6:55 UTC (permalink / raw)
  To: Ananda Badmaev; +Cc: Yosry Ahmed, Linux-MM

On Thu, Sep 29, 2022 at 7:25 AM Ananda Badmaev <a.badmaev@clicknet.pro> wrote:
>
> 28.09.2022 21:37, Yosry Ahmed пишет:
> > On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
> >>
> >> From: Ananda <a.badmaev@clicknet.pro>
> >>
> >>      Zblock stores integer number of compressed objects per zblock block.
> >> These blocks consist of several physical pages (1/2/4/8) and are arranged
> >> in linked lists.
> >>      The range from 0 to PAGE_SIZE is divided into the number of intervals
> >> corresponding to the number of lists and each list only operates objects
> >> of size from its interval. Thus the block lists are isolated from each
> >> other, which makes it possible to simultaneously perform actions with
> >> several objects from different lists.
> >>      Blocks make it possible to densely arrange objects of various sizes
> >> resulting in low internal fragmentation. Also this allocator tries to fill
> >> incomplete blocks instead of adding new ones thus in many cases providing
> >> a compression ratio substantially higher than z3fold and zbud.
> >>      Zblock does not require MMU and also is superior to zsmalloc with
> >> regard to the worst execution times, thus allowing for better response time
> >> and real-time characteristics of the whole system.
> >>
> >
> > It seems to me, and I can be wrong, that there is some overlap in
> > design and goals between this zpool backend and zsmalloc. They both
> > try to avoid internal fragmentation by avoiding the static slots used
> > by zbud and z3fold, and instead pack compressed pages more
> > dynamically. They both have some sort of concurrency handling
> > (separate block lists in zblock vs. classes in zsmalloc). A key
> > difference is that zsmalloc avoids higher order allocations (at least
> > based on its docs), and instead allows compressed pages to span across
> > 0-order page boundaries.
>
> You are right.
>
> > The key differences I see here (based on this commit message and
> > zsmalloc docs) are:
> > a) Blocks in zblock can consist of higher order pages.
> > b) Compressed pages in zsmalloc can span page boundaries (I am
> > assuming this isn't the case for zblock).
> >
> > It appears to me that if zblock has better performance than zsmalloc,
> > it can be because pages in zblock are physically contiguous vs. the
> > 0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
> > assumption correct?
> >
>
> Yes.
>
> > If yes, would it be better to implement those changes as some tunable
> > extension to zsmalloc? It would make it easier if we have overall less
> > zpool backends, and also easier for current users of zsmalloc to
> > experiment with these changes.
> >
>
> Sounds reasonable, but I'm afraid in this case zsmalloc code will become
> overloaded also there might be problems with keeping zblock simplicity.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-29  6:55   ` Vitaly Wool
@ 2022-09-29  7:58     ` Yosry Ahmed
  2022-09-29  8:53       ` Vitaly Wool
  0 siblings, 1 reply; 10+ messages in thread
From: Yosry Ahmed @ 2022-09-29  7:58 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: ananda, Linux-MM

On Wed, Sep 28, 2022 at 11:55 PM Vitaly Wool <vitaly.wool@konsulko.com> wrote:
>
> On Wed, Sep 28, 2022 at 8:38 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> >
> > On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
> > >
> > > From: Ananda <a.badmaev@clicknet.pro>
> > >
> > >     Zblock stores integer number of compressed objects per zblock block.
> > > These blocks consist of several physical pages (1/2/4/8) and are arranged
> > > in linked lists.
> > >     The range from 0 to PAGE_SIZE is divided into the number of intervals
> > > corresponding to the number of lists and each list only operates objects
> > > of size from its interval. Thus the block lists are isolated from each
> > > other, which makes it possible to simultaneously perform actions with
> > > several objects from different lists.
> > >     Blocks make it possible to densely arrange objects of various sizes
> > > resulting in low internal fragmentation. Also this allocator tries to fill
> > > incomplete blocks instead of adding new ones thus in many cases providing
> > > a compression ratio substantially higher than z3fold and zbud.
> > >     Zblock does not require MMU and also is superior to zsmalloc with
> > > regard to the worst execution times, thus allowing for better response time
> > > and real-time characteristics of the whole system.
> > >
> >
> > It seems to me, and I can be wrong, that there is some overlap in
> > design and goals between this zpool backend and zsmalloc. They both
> > try to avoid internal fragmentation by avoiding the static slots used
> > by zbud and z3fold, and instead pack compressed pages more
> > dynamically. They both have some sort of concurrency handling
> > (separate block lists in zblock vs. classes in zsmalloc). A key
> > difference is that zsmalloc avoids higher order allocations (at least
> > based on its docs), and instead allows compressed pages to span across
> > 0-order page boundaries.
>
> Well, another key difference is that zsmalloc may only work on
> MMU-enabled systems.
>
> > The key differences I see here (based on this commit message and
> > zsmalloc docs) are:
> > a) Blocks in zblock can consist of higher order pages.
> > b) Compressed pages in zsmalloc can span page boundaries (I am
> > assuming this isn't the case for zblock).
> >
> > It appears to me that if zblock has better performance than zsmalloc,
> > it can be because pages in zblock are physically contiguous vs. the
> > 0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
> > assumption correct?
> >
> > If yes, would it be better to implement those changes as some tunable
> > extension to zsmalloc? It would make it easier if we have overall less
> > zpool backends, and also easier for current users of zsmalloc to
> > experiment with these changes.
>
> Easier to whom? Not for me, nor for anyone using zswap, that's what I
> have to say.
> zpool API is created to unify compression backends and so I would
> strongly prefer continuing having zpool for backend configuration and
> selection, rather than implementing a custom in-zsmalloc selection
> mechanism.
>
> I don't think merging is a good idea, and here are the reasons:
> - zsmalloc's code is already almost incomprehensible
> - zsmalloc's main objective is density, while zblock aims for low latency
> - merging the two approaches within zsmalloc would mean creating some
> internal selection mechanism within zsmalloc which I would absolutely
> like to avoid (for smaller RAM devices I usually don't compile
> zsmalloc at all or compile it as a module).
>

Thanks for taking the time to respond to this :)

I am sorry if my intention was not clear, but I did not mean that we
should have zblock be added to zsmalloc such that we "select" between
zsmalloc and zblock. What I meant (which can still be utter nonsense)
is that if the differences between zsmalloc and zblock can be
reimagined as improvements to zsmalloc, then maybe this would be
better, to have less zpool backends overall if we don't need more.

For example, maybe we can have the default allocation order be a
config option. By default it would be 0, which maintains the current
behavior, and then we can configure it to something higher to get a
behavior closer to zblock. This is of course an oversimplification,
but if most key differences can be formulated similarly, then maybe we
can get improved zsmalloc instead of zblock, with perhaps a few
tunables (tunables like allocation order *not* different selectable
modes).

You are, of course, way more familiar with this code than me, so
please excuse me if what I am saying still sounds like nonsense. I am
just trying to avoid having similar zpool backends if possible.

zsmalloc code being incomprehensible is another point that I am not
considering here as well, so perhaps even if everything else checks
out the added complexity isn't worth it. I can't judge this. I was
only making a suggestion.

> Thanks,
> Vitaly
>
> > > Signed-off-by: Ananda <a.badmaev@clicknet.pro>
> > > ---
> > >
> > > v2: fixed compiler warnings
> > >
> > > v3: added documentation and const modifier to struct tree_descr
> > >
> > > v4: - fixed gfp flags for block allocation
> > >     - fixed potential memory leak when allocating blocks
> > >     - resolved some issues with code style and warnings from checkpatch
> > >       (except warning about single line config symbol description)
> > >     - moved test results from documentation to changelog
> > >
> > > v5: - "direct" handle mapping and use of linked lists instead of red-black
> > >       trees resulting in faster operations and a bit simpler code
> > >     - renamed ztree -> zblock
> > >     - edited various comments and descriptions
> > >
> > >  Documentation/mm/zblock.rst |  31 ++
> > >  MAINTAINERS                 |   7 +
> > >  mm/Kconfig                  |  17 +
> > >  mm/Makefile                 |   1 +
> > >  mm/zblock.c                 | 637 ++++++++++++++++++++++++++++++++++++
> > >  5 files changed, 693 insertions(+)
> > >  create mode 100644 Documentation/mm/zblock.rst
> > >  create mode 100644 mm/zblock.c
> > >
> > > diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst
> > > new file mode 100644
> > > index 000000000000..5008ce90b54b
> > > --- /dev/null
> > > +++ b/Documentation/mm/zblock.rst
> > > @@ -0,0 +1,31 @@
> > > +.. SPDX-License-Identifier: GPL-2.0
> > > +
> > > +.. _block:
> > > +
> > > +======
> > > +zblock
> > > +======
> > > +
> > > +Zblock stores integer number of compressed objects per block. These
> > > +blocks consist of several consecutive physical pages (from 1 to 8) and
> > > +are arranged in lists. The range from 0 to PAGE_SIZE is divided into the
> > > +number of intervals corresponding to the number of lists and each list
> > > +only operates objects of size from its interval. Thus the block lists are
> > > +isolated from each other, which makes it possible to simultaneously
> > > +perform actions with several objects from different lists.
> > > +
> > > +Blocks make it possible to densely arrange objects of various sizes
> > > +resulting in low internal fragmentation. Also this allocator tries to fill
> > > +incomplete blocks instead of adding new ones thus in many cases providing
> > > +a compression ratio substantially higher than z3fold and zbud. Zblock does
> > > +not require MMU and also is superior to zsmalloc with regard to the worst
> > > +execution times, thus allowing for better response time and real-time
> > > +characteristics of the whole system.
> > > +
> > > +Like z3fold and zsmalloc zblock_alloc() does not return a dereferenceable
> > > +pointer. Instead, it returns an unsigned long handle which encodes actual
> > > +location of the allocated object.
> > > +
> > > +Unlike zbud and z3fold zblock works well with objects of various sizes - both
> > > +highly compressed and poorly compressed including cases where both types
> > > +are present.
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index f1390b8270b2..014fb19eb2cc 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -22457,6 +22457,13 @@ L:     linux-mm@kvack.org
> > >  S:     Maintained
> > >  F:     mm/z3fold.c
> > >
> > > +ZBLOCK COMPRESSED PAGE ALLOCATOR
> > > +M:     Ananda Badmaev <a.badmaev@clicknet.pro>
> > > +M:     Vitaly Wool <vitaly.wool@konsulko.com>
> > > +L:     linux-mm@kvack.org
> > > +S:     Maintained
> > > +F:     mm/zblock.c
> > > +
> > >  ZD1211RW WIRELESS DRIVER
> > >  M:     Ulrich Kunitz <kune@deine-taler.de>
> > >  L:     linux-wireless@vger.kernel.org
> > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > index 0331f1461f81..470c80f5726d 100644
> > > --- a/mm/Kconfig
> > > +++ b/mm/Kconfig
> > > @@ -149,6 +149,12 @@ config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > >         select ZSMALLOC
> > >         help
> > >           Use the zsmalloc allocator as the default allocator.
> > > +
> > > +config ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > > +       bool "zblock"
> > > +       select ZBLOCK
> > > +       help
> > > +         Use the zblock allocator as the default allocator.
> > >  endchoice
> > >
> > >  config ZSWAP_ZPOOL_DEFAULT
> > > @@ -157,6 +163,7 @@ config ZSWAP_ZPOOL_DEFAULT
> > >         default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
> > >         default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
> > >         default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > > +          default "zblock" if ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > >         default ""
> > >
> > >  config ZBUD
> > > @@ -187,6 +194,16 @@ config ZSMALLOC
> > >           pages of various compression levels efficiently. It achieves
> > >           the highest storage density with the least amount of fragmentation.
> > >
> > > +config ZBLOCK
> > > +       tristate "Simple block allocator (zblock)"
> > > +       depends on ZPOOL
> > > +       help
> > > +         A special purpose allocator for storing compressed pages.
> > > +         It stores integer number of compressed pages per block and
> > > +         each block consists of number of physical pages being a power of 2.
> > > +         zblock provides fast read/write, limited worst case time for
> > > +         operations and good compression ratio in most scenarios.
> > > +
> > >  config ZSMALLOC_STAT
> > >         bool "Export zsmalloc statistics"
> > >         depends on ZSMALLOC
> > > diff --git a/mm/Makefile b/mm/Makefile
> > > index 9a564f836403..eb7235da6e61 100644
> > > --- a/mm/Makefile
> > > +++ b/mm/Makefile
> > > @@ -110,6 +110,7 @@ obj-$(CONFIG_ZPOOL) += zpool.o
> > >  obj-$(CONFIG_ZBUD)     += zbud.o
> > >  obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
> > >  obj-$(CONFIG_Z3FOLD)   += z3fold.o
> > > +obj-$(CONFIG_ZBLOCK)   += zblock.o
> > >  obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
> > >  obj-$(CONFIG_CMA)      += cma.o
> > >  obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
> > > diff --git a/mm/zblock.c b/mm/zblock.c
> > > new file mode 100644
> > > index 000000000000..b389f43e0c26
> > > --- /dev/null
> > > +++ b/mm/zblock.c
> > > @@ -0,0 +1,637 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * zblock.c
> > > + *
> > > + * Author: Ananda Badmaev <a.badmaev@clicknet.pro>
> > > + * Copyright (C) 2022, Konsulko AB.
> > > + *
> > > + * This implementation is based on z3fold written by Vitaly Wool.
> > > + * Zblock is a small object allocator with the intention to serve as a
> > > + * zpool backend. It operates on page blocks which consist of number
> > > + * of physical pages being a power of 2 and store integer number of
> > > + * compressed pages per block which results in determinism and simplicity.
> > > + *
> > > + * zblock doesn't export any API and is meant to be used via zpool API.
> > > + */
> > > +
> > > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > > +
> > > +#include <linux/atomic.h>
> > > +#include <linux/list.h>
> > > +#include <linux/mm.h>
> > > +#include <linux/module.h>
> > > +#include <linux/preempt.h>
> > > +#include <linux/slab.h>
> > > +#include <linux/spinlock.h>
> > > +#include <linux/zpool.h>
> > > +
> > > +#define SLOT_FREE 0
> > > +#define SLOT_OCCUPIED 1
> > > +#define SLOT_MAPPED 2
> > > +#define SLOT_UNMAPPED 3
> > > +
> > > +#define SLOT_BITS 5
> > > +#define MAX_SLOTS (1 << SLOT_BITS)
> > > +#define SLOT_MASK ((0x1UL << SLOT_BITS) - 1)
> > > +
> > > +#define BLOCK_DATA_SIZE(order) ((PAGE_SIZE << order) - sizeof(struct zblock_block))
> > > +#define SLOT_SIZE(nslots, order) (round_down((BLOCK_DATA_SIZE(order) / nslots), sizeof(long)))
> > > +
> > > +#define BLOCK_CACHE_SIZE 32
> > > +
> > > +struct zblock_pool;
> > > +
> > > +struct zblock_ops {
> > > +       int (*evict)(struct zblock_pool *pool, unsigned long handle);
> > > +};
> > > +
> > > +/**
> > > + * struct zblock_block - block metadata
> > > + * Block consists of several (1/2/4/8) pages and contains fixed
> > > + * integer number of slots for allocating compressed pages.
> > > + *
> > > + * lock:                        protects block
> > > + * block_node:          links block into the relevant list in the pool
> > > + * slot_info:       contains data about free/occupied slots
> > > + * free_slots:          number of free slots in the block
> > > + * under_reclaim:    if true shows that block is being evicted
> > > + */
> > > +struct zblock_block {
> > > +       spinlock_t lock;
> > > +       struct list_head block_node;
> > > +       u8 slot_info[MAX_SLOTS];
> > > +       unsigned int free_slots;
> > > +       bool under_reclaim;
> > > +};
> > > +/**
> > > + * struct block_desc - general metadata for block lists
> > > + * Each block list stores only blocks of corresponding type which means
> > > + * that all blocks in it have the same number and size of slots.
> > > + * All slots are aligned to size of long.
> > > + *
> > > + * slot_size:          size of slot for this list
> > > + * slots_per_block:    number of slots per block for this list
> > > + * order:                      order for __get_free_pages
> > > + */
> > > +static const struct block_desc {
> > > +       const unsigned int slot_size;
> > > +       const unsigned short slots_per_block;
> > > +       const unsigned short order;
> > > +} block_desc[] = {
> > > +       { SLOT_SIZE(32, 0), 32, 0 },
> > > +       { SLOT_SIZE(22, 0), 22, 0 },
> > > +       { SLOT_SIZE(17, 0), 17, 0 },
> > > +       { SLOT_SIZE(13, 0), 13, 0 },
> > > +       { SLOT_SIZE(11, 0), 11, 0 },
> > > +       { SLOT_SIZE(9, 0), 9, 0 },
> > > +       { SLOT_SIZE(8, 0), 8, 0 },
> > > +       { SLOT_SIZE(14, 1), 14, 1 },
> > > +       { SLOT_SIZE(12, 1), 12, 1 },
> > > +       { SLOT_SIZE(11, 1), 11, 1 },
> > > +       { SLOT_SIZE(10, 1), 10, 1 },
> > > +       { SLOT_SIZE(9, 1), 9, 1 },
> > > +       { SLOT_SIZE(8, 1), 8, 1 },
> > > +       { SLOT_SIZE(15, 2), 15, 2 },
> > > +       { SLOT_SIZE(14, 2), 14, 2 },
> > > +       { SLOT_SIZE(13, 2), 13, 2 },
> > > +       { SLOT_SIZE(12, 2), 12, 2 },
> > > +       { SLOT_SIZE(11, 2), 11, 2 },
> > > +       { SLOT_SIZE(10, 2), 10, 2 },
> > > +       { SLOT_SIZE(9, 2), 9, 2 },
> > > +       { SLOT_SIZE(8, 2), 8, 2 },
> > > +       { SLOT_SIZE(15, 3), 15, 3 },
> > > +       { SLOT_SIZE(14, 3), 14, 3 },
> > > +       { SLOT_SIZE(13, 3), 13, 3 },
> > > +       { SLOT_SIZE(12, 3), 12, 3 },
> > > +       { SLOT_SIZE(11, 3), 11, 3 },
> > > +       { SLOT_SIZE(10, 3), 10, 3 },
> > > +       { SLOT_SIZE(9, 3), 9, 3 },
> > > +       { SLOT_SIZE(7, 3), 7, 3 }
> > > +};
> > > +
> > > +/**
> > > + * struct block_list - stores metadata of particular list
> > > + * lock:                       protects list
> > > + * head:                       head of this list
> > > + * block_cache:                blocks with free slots
> > > + * block_count:                total number of blocks in the list
> > > + */
> > > +struct block_list {
> > > +       spinlock_t lock;
> > > +       struct list_head head;
> > > +       struct zblock_block *block_cache[BLOCK_CACHE_SIZE];
> > > +       unsigned long block_count;
> > > +};
> > > +
> > > +/**
> > > + * struct zblock_pool - stores metadata for each zblock pool
> > > + * @block_lists:       array of block lists
> > > + * @ops:                       pointer to a structure of user defined operations specified at
> > > + *                                     pool creation time.
> > > + * @zpool:                     zpool driver
> > > + * @zpool_ops:         zpool operations structure with an evict callback
> > > + * @alloc_flag:                protects block allocation from memory leak
> > > + *
> > > + * This structure is allocated at pool creation time and maintains metadata
> > > + * for a particular zblock pool.
> > > + */
> > > +struct zblock_pool {
> > > +       struct block_list block_lists[ARRAY_SIZE(block_desc)];
> > > +       const struct zblock_ops *ops;
> > > +       struct zpool *zpool;
> > > +       const struct zpool_ops *zpool_ops;
> > > +       atomic_t alloc_flag;
> > > +};
> > > +
> > > +/*****************
> > > + * Helpers
> > > + *****************/
> > > +
> > > +static void cache_insert_block(struct zblock_block *block, struct block_list *list)
> > > +{
> > > +       unsigned int i, min_free_slots, min_index;
> > > +
> > > +       min_free_slots = MAX_SLOTS;
> > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > +               if (!list->block_cache[i] || !(list->block_cache[i])->free_slots) {
> > > +                       list->block_cache[i] = block;
> > > +                       return;
> > > +               }
> > > +               if ((list->block_cache[i])->free_slots < min_free_slots) {
> > > +                       min_free_slots = (list->block_cache[i])->free_slots;
> > > +                       min_index = i;
> > > +               }
> > > +       }
> > > +       list->block_cache[min_index] = block;
> > > +}
> > > +
> > > +static struct zblock_block *cache_find_block(struct block_list *list)
> > > +{
> > > +       int i;
> > > +
> > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > +               if (list->block_cache[i] && (list->block_cache[i])->free_slots)
> > > +                       return list->block_cache[i];
> > > +       }
> > > +       return NULL;
> > > +}
> > > +
> > > +static int is_in_cache(struct zblock_block *block, struct block_list *list)
> > > +{
> > > +       int i;
> > > +
> > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > +               if (block == list->block_cache[i])
> > > +                       return i;
> > > +       }
> > > +       return -1;
> > > +}
> > > +
> > > +/*
> > > + * allocate new block and add it to corresponding block list
> > > + */
> > > +static struct zblock_block *alloc_block(struct zblock_pool *pool,
> > > +                                                                       int block_type, gfp_t gfp)
> > > +{
> > > +       struct zblock_block *block;
> > > +       struct block_list *list;
> > > +
> > > +       block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
> > > +       if (!block)
> > > +               return NULL;
> > > +
> > > +       list = &(pool->block_lists)[block_type];
> > > +
> > > +       /* init block data  */
> > > +       spin_lock_init(&block->lock);
> > > +       memset(block->slot_info, SLOT_FREE, block_desc[block_type].slots_per_block);
> > > +       block->free_slots = block_desc[block_type].slots_per_block;
> > > +       block->under_reclaim = false;
> > > +
> > > +       spin_lock(&list->lock);
> > > +       /* inserting block into list */
> > > +       INIT_LIST_HEAD(&block->block_node);
> > > +       list_add(&block->block_node, &list->head);
> > > +       cache_insert_block(block, list);
> > > +       list->block_count++;
> > > +       spin_unlock(&list->lock);
> > > +       return block;
> > > +}
> > > +
> > > +/*
> > > + * Encodes the handle of a particular slot in the pool using metadata
> > > + */
> > > +static inline unsigned long metadata_to_handle(struct zblock_block *block,
> > > +                                                       unsigned int block_type, unsigned int slot)
> > > +{
> > > +       return (unsigned long)(block) + (block_type << SLOT_BITS) + slot;
> > > +}
> > > +
> > > +/* Returns block, block type and slot in the pool corresponding to handle */
> > > +static inline struct zblock_block *handle_to_metadata(unsigned long handle,
> > > +                                               unsigned int *block_type, unsigned int *slot)
> > > +{
> > > +       *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
> > > +       *slot = handle & SLOT_MASK;
> > > +       return (struct zblock_block *)(handle & PAGE_MASK);
> > > +}
> > > +
> > > +
> > > +/*****************
> > > + * API Functions
> > > + *****************/
> > > +/**
> > > + * zblock_create_pool() - create a new zblock pool
> > > + * @gfp:       gfp flags when allocating the zblock pool structure
> > > + * @ops:       user-defined operations for the zblock pool
> > > + *
> > > + * Return: pointer to the new zblock pool or NULL if the metadata allocation
> > > + * failed.
> > > + */
> > > +static struct zblock_pool *zblock_create_pool(gfp_t gfp, const struct zblock_ops *ops)
> > > +{
> > > +       struct zblock_pool *pool;
> > > +       struct block_list *list;
> > > +       int i, j;
> > > +
> > > +       pool = kmalloc(sizeof(struct zblock_pool), gfp);
> > > +       if (!pool)
> > > +               return NULL;
> > > +
> > > +       /* init each block list */
> > > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > > +               list = &(pool->block_lists)[i];
> > > +               spin_lock_init(&list->lock);
> > > +               INIT_LIST_HEAD(&list->head);
> > > +               for (j = 0; j < BLOCK_CACHE_SIZE; j++)
> > > +                       list->block_cache[j] = NULL;
> > > +               list->block_count = 0;
> > > +       }
> > > +       pool->ops = ops;
> > > +       atomic_set(&pool->alloc_flag, 0);
> > > +       return pool;
> > > +}
> > > +
> > > +/**
> > > + * zblock_destroy_pool() - destroys an existing zblock pool
> > > + * @pool:      the zblock pool to be destroyed
> > > + *
> > > + */
> > > +static void zblock_destroy_pool(struct zblock_pool *pool)
> > > +{
> > > +       kfree(pool);
> > > +}
> > > +
> > > +
> > > +/**
> > > + * zblock_alloc() - allocates a slot of appropriate size
> > > + * @pool:      zblock pool from which to allocate
> > > + * @size:      size in bytes of the desired allocation
> > > + * @gfp:       gfp flags used if the pool needs to grow
> > > + * @handle:    handle of the new allocation
> > > + *
> > > + * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
> > > + * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
> > > + * a new slot.
> > > + */
> > > +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
> > > +                       unsigned long *handle)
> > > +{
> > > +       unsigned int block_type, slot;
> > > +       struct zblock_block *block;
> > > +       struct block_list *list;
> > > +
> > > +       if (!size)
> > > +               return -EINVAL;
> > > +
> > > +       if (size > PAGE_SIZE)
> > > +               return -ENOSPC;
> > > +
> > > +       /* find basic block type with suitable slot size */
> > > +       for (block_type = 0; block_type < ARRAY_SIZE(block_desc); block_type++) {
> > > +               if (size <= block_desc[block_type].slot_size)
> > > +                       break;
> > > +       }
> > > +       list = &(pool->block_lists[block_type]);
> > > +
> > > +check:
> > > +       spin_lock(&list->lock);
> > > +       /* check if there are free slots in cache */
> > > +       block = cache_find_block(list);
> > > +       if (block)
> > > +               goto found;
> > > +       spin_unlock(&list->lock);
> > > +
> > > +       /* not found block with free slots try to allocate new empty block */
> > > +       if (atomic_cmpxchg(&pool->alloc_flag, 0, 1))
> > > +               goto check;
> > > +       block = alloc_block(pool, block_type, gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
> > > +       if (block) {
> > > +               spin_lock(&list->lock);
> > > +               goto found;
> > > +       }
> > > +       atomic_set(&pool->alloc_flag, 0);
> > > +       return -ENOMEM;
> > > +
> > > +found:
> > > +       spin_lock(&block->lock);
> > > +       block->free_slots--;
> > > +       spin_unlock(&list->lock);
> > > +       /* find the first free slot in block */
> > > +       for (slot = 0; slot < block_desc[block_type].slots_per_block; slot++) {
> > > +               if (block->slot_info[slot] == SLOT_FREE)
> > > +                       break;
> > > +       }
> > > +       block->slot_info[slot] = SLOT_OCCUPIED;
> > > +       spin_unlock(&block->lock);
> > > +       *handle = metadata_to_handle(block, block_type, slot);
> > > +       atomic_set(&pool->alloc_flag, 0);
> > > +       return 0;
> > > +}
> > > +
> > > +/**
> > > + * zblock_free() - frees the allocation associated with the given handle
> > > + * @pool:      pool in which the allocation resided
> > > + * @handle:    handle associated with the allocation returned by zblock_alloc()
> > > + *
> > > + */
> > > +static void zblock_free(struct zblock_pool *pool, unsigned long handle)
> > > +{
> > > +       unsigned int slot, block_type;
> > > +       struct zblock_block *block;
> > > +       struct block_list *list;
> > > +       int i;
> > > +
> > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > +       list = &(pool->block_lists[block_type]);
> > > +
> > > +       if (block->under_reclaim)
> > > +               return;
> > > +       spin_lock(&list->lock);
> > > +       i = is_in_cache(block, list);
> > > +       block->free_slots++;
> > > +       /* if all slots in block are empty delete whole block */
> > > +       if (block->free_slots == block_desc[block_type].slots_per_block) {
> > > +               list_del(&block->block_node);
> > > +               list->block_count--;
> > > +
> > > +               /* if cached block to be deleted */
> > > +               if (i != -1)
> > > +                       list->block_cache[i] = NULL;
> > > +               spin_unlock(&list->lock);
> > > +               free_pages((unsigned long)block, block_desc[block_type].order);
> > > +               return;
> > > +       }
> > > +       /* if block is not cached update cache */
> > > +       if (i == -1)
> > > +               cache_insert_block(block, list);
> > > +
> > > +       spin_lock(&block->lock);
> > > +       spin_unlock(&list->lock);
> > > +       block->slot_info[slot] = SLOT_FREE;
> > > +       spin_unlock(&block->lock);
> > > +}
> > > +
> > > +/**
> > > + * zblock_reclaim_block() - evicts allocations from block and frees it
> > > + * @pool:      pool from which a block will attempt to be evicted
> > > + *
> > > + * Returns: pages reclaimed count if block is successfully freed
> > > + *          otherwise -EINVAL if there are no blocks to evict
> > > + */
> > > +static int zblock_reclaim_block(struct zblock_pool *pool)
> > > +{
> > > +       struct zblock_block *block;
> > > +       struct block_list *list;
> > > +       unsigned long handle, block_type, slot;
> > > +       int ret, i, reclaimed;
> > > +
> > > +       /* start with list storing blocks with the worst compression and try
> > > +        * to evict the first added (oldest) block in this list
> > > +        */
> > > +       for (block_type = ARRAY_SIZE(block_desc) - 1; block_type >= 0; --block_type) {
> > > +               list = &(pool->block_lists[block_type]);
> > > +               spin_lock(&list->lock);
> > > +
> > > +               /* find the oldest block in list */
> > > +               block = list_last_entry(&list->head, struct zblock_block, block_node);
> > > +
> > > +               if (!block) {
> > > +                       spin_unlock(&list->lock);
> > > +                       continue;
> > > +               }
> > > +               i = is_in_cache(block, list);
> > > +               /* skip iteration if this block is cached */
> > > +               if (i != -1) {
> > > +                       spin_unlock(&list->lock);
> > > +                       continue;
> > > +               }
> > > +               block->under_reclaim = true;
> > > +               spin_unlock(&list->lock);
> > > +               reclaimed = 0;
> > > +
> > > +               /* try to evict all UNMAPPED slots in block */
> > > +               for (slot = 0; slot < block_desc[block_type].slots_per_block; ++slot) {
> > > +                       if (block->slot_info[slot] == SLOT_UNMAPPED) {
> > > +                               handle = metadata_to_handle(block, block_type, slot);
> > > +                               ret = pool->ops->evict(pool, handle);
> > > +                               if (ret)
> > > +                                       break;
> > > +
> > > +                               ++reclaimed;
> > > +                               spin_lock(&block->lock);
> > > +                               block->slot_info[slot] = SLOT_FREE;
> > > +                               spin_unlock(&block->lock);
> > > +                               block->free_slots++;
> > > +                       }
> > > +               }
> > > +               spin_lock(&list->lock);
> > > +               /* some occupied slots remained - insert block */
> > > +               if (block->free_slots != block_desc[block_type].slots_per_block) {
> > > +                       block->under_reclaim = false;
> > > +                       cache_insert_block(block, list);
> > > +                       spin_unlock(&list->lock);
> > > +               } else {
> > > +               /* all slots are free - delete this block */
> > > +                       list_del(&block->block_node);
> > > +                       list->block_count--;
> > > +                       spin_unlock(&list->lock);
> > > +                       free_pages((unsigned long)block, block_desc[block_type].order);
> > > +               }
> > > +               if (reclaimed != 0)
> > > +                       return reclaimed;
> > > +               return -EAGAIN;
> > > +       }
> > > +       return -EINVAL;
> > > +}
> > > +
> > > +
> > > +/**
> > > + * zblock_map() - maps the allocation associated with the given handle
> > > + * @pool:      pool in which the allocation resides
> > > + * @handle:    handle associated with the allocation to be mapped
> > > + *
> > > + *
> > > + * Returns: a pointer to the mapped allocation
> > > + */
> > > +static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
> > > +{
> > > +       unsigned int block_type, slot;
> > > +       struct zblock_block *block;
> > > +
> > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > +       spin_lock(&block->lock);
> > > +       block->slot_info[slot] = SLOT_MAPPED;
> > > +       spin_unlock(&block->lock);
> > > +       return (void *)(block + 1) + slot * block_desc[block_type].slot_size;
> > > +}
> > > +
> > > +/**
> > > + * zblock_unmap() - unmaps the allocation associated with the given handle
> > > + * @pool:      pool in which the allocation resides
> > > + * @handle:    handle associated with the allocation to be unmapped
> > > + */
> > > +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
> > > +{
> > > +       unsigned int block_type, slot;
> > > +       struct zblock_block *block;
> > > +
> > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > +       spin_lock(&block->lock);
> > > +       block->slot_info[slot] = SLOT_UNMAPPED;
> > > +       spin_unlock(&block->lock);
> > > +}
> > > +
> > > +/**
> > > + * zblock_get_pool_size() - gets the zblock pool size in bytes
> > > + * @pool:      pool whose size is being queried
> > > + *
> > > + * Returns: size in bytes of the given pool.
> > > + */
> > > +static u64 zblock_get_pool_size(struct zblock_pool *pool)
> > > +{
> > > +       u64 total_size;
> > > +       int i;
> > > +
> > > +       total_size = 0;
> > > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > > +               total_size += (pool->block_lists)[i].block_count
> > > +                               * (PAGE_SIZE << block_desc[i].order);
> > > +       }
> > > +       return total_size;
> > > +}
> > > +
> > > +/*****************
> > > + * zpool
> > > + ****************/
> > > +
> > > +static int zblock_zpool_evict(struct zblock_pool *pool, unsigned long handle)
> > > +{
> > > +       if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
> > > +               return pool->zpool_ops->evict(pool->zpool, handle);
> > > +       else
> > > +               return -ENOENT;
> > > +}
> > > +
> > > +static const struct zblock_ops zblock_zpool_ops = {
> > > +       .evict =        zblock_zpool_evict
> > > +};
> > > +
> > > +static void *zblock_zpool_create(const char *name, gfp_t gfp,
> > > +                                  const struct zpool_ops *zpool_ops,
> > > +                                  struct zpool *zpool)
> > > +{
> > > +       struct zblock_pool *pool;
> > > +
> > > +       pool = zblock_create_pool(gfp, &zblock_zpool_ops);
> > > +       if (pool) {
> > > +               pool->zpool = zpool;
> > > +               pool->zpool_ops = zpool_ops;
> > > +       }
> > > +       return pool;
> > > +}
> > > +
> > > +static void zblock_zpool_destroy(void *pool)
> > > +{
> > > +       zblock_destroy_pool(pool);
> > > +}
> > > +
> > > +static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
> > > +                       unsigned long *handle)
> > > +{
> > > +       return zblock_alloc(pool, size, gfp, handle);
> > > +}
> > > +
> > > +static void zblock_zpool_free(void *pool, unsigned long handle)
> > > +{
> > > +       zblock_free(pool, handle);
> > > +}
> > > +
> > > +static int zblock_zpool_shrink(void *pool, unsigned int pages,
> > > +                       unsigned int *reclaimed)
> > > +{
> > > +       unsigned int total = 0;
> > > +       int ret = -EINVAL;
> > > +
> > > +       while (total < pages) {
> > > +               ret = zblock_reclaim_block(pool);
> > > +               if (ret < 0)
> > > +                       break;
> > > +               total += ret;
> > > +       }
> > > +       if (reclaimed)
> > > +               *reclaimed = total;
> > > +
> > > +       return ret;
> > > +}
> > > +
> > > +static void *zblock_zpool_map(void *pool, unsigned long handle,
> > > +                       enum zpool_mapmode mm)
> > > +{
> > > +       return zblock_map(pool, handle);
> > > +}
> > > +
> > > +static void zblock_zpool_unmap(void *pool, unsigned long handle)
> > > +{
> > > +       zblock_unmap(pool, handle);
> > > +}
> > > +
> > > +static u64 zblock_zpool_total_size(void *pool)
> > > +{
> > > +       return zblock_get_pool_size(pool);
> > > +}
> > > +
> > > +static struct zpool_driver zblock_zpool_driver = {
> > > +       .type =         "zblock",
> > > +       .owner =        THIS_MODULE,
> > > +       .create =       zblock_zpool_create,
> > > +       .destroy =      zblock_zpool_destroy,
> > > +       .malloc =       zblock_zpool_malloc,
> > > +       .free =         zblock_zpool_free,
> > > +       .shrink =       zblock_zpool_shrink,
> > > +       .map =          zblock_zpool_map,
> > > +       .unmap =        zblock_zpool_unmap,
> > > +       .total_size =   zblock_zpool_total_size,
> > > +};
> > > +
> > > +MODULE_ALIAS("zpool-zblock");
> > > +
> > > +static int __init init_zblock(void)
> > > +{
> > > +       pr_info("loaded\n");
> > > +       zpool_register_driver(&zblock_zpool_driver);
> > > +       return 0;
> > > +}
> > > +
> > > +static void __exit exit_zblock(void)
> > > +{
> > > +       zpool_unregister_driver(&zblock_zpool_driver);
> > > +       pr_info("unloaded\n");
> > > +}
> > > +
> > > +module_init(init_zblock);
> > > +module_exit(exit_zblock);
> > > +
> > > +MODULE_LICENSE("GPL");
> > > +MODULE_AUTHOR("Ananda Badmaeb <a.badmaev@clicknet.pro>");
> > > +MODULE_DESCRIPTION("Block allocator for compressed pages");
> > > --
> > > 2.34.1
> > >
> > >


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-29  7:58     ` Yosry Ahmed
@ 2022-09-29  8:53       ` Vitaly Wool
  2022-09-29 18:28         ` Yosry Ahmed
  0 siblings, 1 reply; 10+ messages in thread
From: Vitaly Wool @ 2022-09-29  8:53 UTC (permalink / raw)
  To: Yosry Ahmed; +Cc: ananda, Linux-MM

On Thu, Sep 29, 2022 at 9:59 AM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Wed, Sep 28, 2022 at 11:55 PM Vitaly Wool <vitaly.wool@konsulko.com> wrote:
> >
> > On Wed, Sep 28, 2022 at 8:38 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> > >
> > > On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
> > > >
> > > > From: Ananda <a.badmaev@clicknet.pro>
> > > >
> > > >     Zblock stores integer number of compressed objects per zblock block.
> > > > These blocks consist of several physical pages (1/2/4/8) and are arranged
> > > > in linked lists.
> > > >     The range from 0 to PAGE_SIZE is divided into the number of intervals
> > > > corresponding to the number of lists and each list only operates objects
> > > > of size from its interval. Thus the block lists are isolated from each
> > > > other, which makes it possible to simultaneously perform actions with
> > > > several objects from different lists.
> > > >     Blocks make it possible to densely arrange objects of various sizes
> > > > resulting in low internal fragmentation. Also this allocator tries to fill
> > > > incomplete blocks instead of adding new ones thus in many cases providing
> > > > a compression ratio substantially higher than z3fold and zbud.
> > > >     Zblock does not require MMU and also is superior to zsmalloc with
> > > > regard to the worst execution times, thus allowing for better response time
> > > > and real-time characteristics of the whole system.
> > > >
> > >
> > > It seems to me, and I can be wrong, that there is some overlap in
> > > design and goals between this zpool backend and zsmalloc. They both
> > > try to avoid internal fragmentation by avoiding the static slots used
> > > by zbud and z3fold, and instead pack compressed pages more
> > > dynamically. They both have some sort of concurrency handling
> > > (separate block lists in zblock vs. classes in zsmalloc). A key
> > > difference is that zsmalloc avoids higher order allocations (at least
> > > based on its docs), and instead allows compressed pages to span across
> > > 0-order page boundaries.
> >
> > Well, another key difference is that zsmalloc may only work on
> > MMU-enabled systems.
> >
> > > The key differences I see here (based on this commit message and
> > > zsmalloc docs) are:
> > > a) Blocks in zblock can consist of higher order pages.
> > > b) Compressed pages in zsmalloc can span page boundaries (I am
> > > assuming this isn't the case for zblock).
> > >
> > > It appears to me that if zblock has better performance than zsmalloc,
> > > it can be because pages in zblock are physically contiguous vs. the
> > > 0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
> > > assumption correct?
> > >
> > > If yes, would it be better to implement those changes as some tunable
> > > extension to zsmalloc? It would make it easier if we have overall less
> > > zpool backends, and also easier for current users of zsmalloc to
> > > experiment with these changes.
> >
> > Easier to whom? Not for me, nor for anyone using zswap, that's what I
> > have to say.
> > zpool API is created to unify compression backends and so I would
> > strongly prefer continuing having zpool for backend configuration and
> > selection, rather than implementing a custom in-zsmalloc selection
> > mechanism.
> >
> > I don't think merging is a good idea, and here are the reasons:
> > - zsmalloc's code is already almost incomprehensible
> > - zsmalloc's main objective is density, while zblock aims for low latency
> > - merging the two approaches within zsmalloc would mean creating some
> > internal selection mechanism within zsmalloc which I would absolutely
> > like to avoid (for smaller RAM devices I usually don't compile
> > zsmalloc at all or compile it as a module).
> >
>
> Thanks for taking the time to respond to this :)
>
> I am sorry if my intention was not clear, but I did not mean that we
> should have zblock be added to zsmalloc such that we "select" between
> zsmalloc and zblock. What I meant (which can still be utter nonsense)
> is that if the differences between zsmalloc and zblock can be
> reimagined as improvements to zsmalloc, then maybe this would be
> better, to have less zpool backends overall if we don't need more.

Well, I'm not entirely sure we should aim for having less zpool
backends, but if that's something we'd have to do then I'd rather mark
z3fold as obsolete and have zblock taking its place than merge zblock
with zsmalloc.

> For example, maybe we can have the default allocation order be a
> config option. By default it would be 0, which maintains the current
> behavior, and then we can configure it to something higher to get a
> behavior closer to zblock. This is of course an oversimplification,
> but if most key differences can be formulated similarly, then maybe we
> can get improved zsmalloc instead of zblock, with perhaps a few
> tunables (tunables like allocation order *not* different selectable
> modes).

There's one more important thing that I forgot to mention. zsmalloc
doesn't support reclaim and I greatly doubt it ever will, as opposed
to zblock which does. Not supporting reclaim makes zsmalloc a bad fit
for zswap, so having another backend almost as good as zsmalloc with
regard to compression ratio *and* supporting reclaim is an important
step forward

> You are, of course, way more familiar with this code than me, so
> please excuse me if what I am saying still sounds like nonsense. I am
> just trying to avoid having similar zpool backends if possible.
>
> zsmalloc code being incomprehensible is another point that I am not
> considering here as well, so perhaps even if everything else checks
> out the added complexity isn't worth it. I can't judge this. I was
> only making a suggestion.

I did not say it was incomprehensible, but it's getting close IMHO :)
And if we add zblock-like functionality there then it probably will
become one.
So far the zsmalloc's code is more than 4x larger than zblock's, and
if we can keep it this way, we'll have a separate nice backend more
versatile than zsmalloc, almost as good compression wise as zsmalloc,
and all that almost at the simplicity level of zbud. I believe this is
the way to go.

Thanks,
Vitaly

> >
> > > > Signed-off-by: Ananda <a.badmaev@clicknet.pro>
> > > > ---
> > > >
> > > > v2: fixed compiler warnings
> > > >
> > > > v3: added documentation and const modifier to struct tree_descr
> > > >
> > > > v4: - fixed gfp flags for block allocation
> > > >     - fixed potential memory leak when allocating blocks
> > > >     - resolved some issues with code style and warnings from checkpatch
> > > >       (except warning about single line config symbol description)
> > > >     - moved test results from documentation to changelog
> > > >
> > > > v5: - "direct" handle mapping and use of linked lists instead of red-black
> > > >       trees resulting in faster operations and a bit simpler code
> > > >     - renamed ztree -> zblock
> > > >     - edited various comments and descriptions
> > > >
> > > >  Documentation/mm/zblock.rst |  31 ++
> > > >  MAINTAINERS                 |   7 +
> > > >  mm/Kconfig                  |  17 +
> > > >  mm/Makefile                 |   1 +
> > > >  mm/zblock.c                 | 637 ++++++++++++++++++++++++++++++++++++
> > > >  5 files changed, 693 insertions(+)
> > > >  create mode 100644 Documentation/mm/zblock.rst
> > > >  create mode 100644 mm/zblock.c
> > > >
> > > > diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst
> > > > new file mode 100644
> > > > index 000000000000..5008ce90b54b
> > > > --- /dev/null
> > > > +++ b/Documentation/mm/zblock.rst
> > > > @@ -0,0 +1,31 @@
> > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > +
> > > > +.. _block:
> > > > +
> > > > +======
> > > > +zblock
> > > > +======
> > > > +
> > > > +Zblock stores integer number of compressed objects per block. These
> > > > +blocks consist of several consecutive physical pages (from 1 to 8) and
> > > > +are arranged in lists. The range from 0 to PAGE_SIZE is divided into the
> > > > +number of intervals corresponding to the number of lists and each list
> > > > +only operates objects of size from its interval. Thus the block lists are
> > > > +isolated from each other, which makes it possible to simultaneously
> > > > +perform actions with several objects from different lists.
> > > > +
> > > > +Blocks make it possible to densely arrange objects of various sizes
> > > > +resulting in low internal fragmentation. Also this allocator tries to fill
> > > > +incomplete blocks instead of adding new ones thus in many cases providing
> > > > +a compression ratio substantially higher than z3fold and zbud. Zblock does
> > > > +not require MMU and also is superior to zsmalloc with regard to the worst
> > > > +execution times, thus allowing for better response time and real-time
> > > > +characteristics of the whole system.
> > > > +
> > > > +Like z3fold and zsmalloc zblock_alloc() does not return a dereferenceable
> > > > +pointer. Instead, it returns an unsigned long handle which encodes actual
> > > > +location of the allocated object.
> > > > +
> > > > +Unlike zbud and z3fold zblock works well with objects of various sizes - both
> > > > +highly compressed and poorly compressed including cases where both types
> > > > +are present.
> > > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > > index f1390b8270b2..014fb19eb2cc 100644
> > > > --- a/MAINTAINERS
> > > > +++ b/MAINTAINERS
> > > > @@ -22457,6 +22457,13 @@ L:     linux-mm@kvack.org
> > > >  S:     Maintained
> > > >  F:     mm/z3fold.c
> > > >
> > > > +ZBLOCK COMPRESSED PAGE ALLOCATOR
> > > > +M:     Ananda Badmaev <a.badmaev@clicknet.pro>
> > > > +M:     Vitaly Wool <vitaly.wool@konsulko.com>
> > > > +L:     linux-mm@kvack.org
> > > > +S:     Maintained
> > > > +F:     mm/zblock.c
> > > > +
> > > >  ZD1211RW WIRELESS DRIVER
> > > >  M:     Ulrich Kunitz <kune@deine-taler.de>
> > > >  L:     linux-wireless@vger.kernel.org
> > > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > > index 0331f1461f81..470c80f5726d 100644
> > > > --- a/mm/Kconfig
> > > > +++ b/mm/Kconfig
> > > > @@ -149,6 +149,12 @@ config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > > >         select ZSMALLOC
> > > >         help
> > > >           Use the zsmalloc allocator as the default allocator.
> > > > +
> > > > +config ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > > > +       bool "zblock"
> > > > +       select ZBLOCK
> > > > +       help
> > > > +         Use the zblock allocator as the default allocator.
> > > >  endchoice
> > > >
> > > >  config ZSWAP_ZPOOL_DEFAULT
> > > > @@ -157,6 +163,7 @@ config ZSWAP_ZPOOL_DEFAULT
> > > >         default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
> > > >         default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
> > > >         default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > > > +          default "zblock" if ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > > >         default ""
> > > >
> > > >  config ZBUD
> > > > @@ -187,6 +194,16 @@ config ZSMALLOC
> > > >           pages of various compression levels efficiently. It achieves
> > > >           the highest storage density with the least amount of fragmentation.
> > > >
> > > > +config ZBLOCK
> > > > +       tristate "Simple block allocator (zblock)"
> > > > +       depends on ZPOOL
> > > > +       help
> > > > +         A special purpose allocator for storing compressed pages.
> > > > +         It stores integer number of compressed pages per block and
> > > > +         each block consists of number of physical pages being a power of 2.
> > > > +         zblock provides fast read/write, limited worst case time for
> > > > +         operations and good compression ratio in most scenarios.
> > > > +
> > > >  config ZSMALLOC_STAT
> > > >         bool "Export zsmalloc statistics"
> > > >         depends on ZSMALLOC
> > > > diff --git a/mm/Makefile b/mm/Makefile
> > > > index 9a564f836403..eb7235da6e61 100644
> > > > --- a/mm/Makefile
> > > > +++ b/mm/Makefile
> > > > @@ -110,6 +110,7 @@ obj-$(CONFIG_ZPOOL) += zpool.o
> > > >  obj-$(CONFIG_ZBUD)     += zbud.o
> > > >  obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
> > > >  obj-$(CONFIG_Z3FOLD)   += z3fold.o
> > > > +obj-$(CONFIG_ZBLOCK)   += zblock.o
> > > >  obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
> > > >  obj-$(CONFIG_CMA)      += cma.o
> > > >  obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
> > > > diff --git a/mm/zblock.c b/mm/zblock.c
> > > > new file mode 100644
> > > > index 000000000000..b389f43e0c26
> > > > --- /dev/null
> > > > +++ b/mm/zblock.c
> > > > @@ -0,0 +1,637 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/*
> > > > + * zblock.c
> > > > + *
> > > > + * Author: Ananda Badmaev <a.badmaev@clicknet.pro>
> > > > + * Copyright (C) 2022, Konsulko AB.
> > > > + *
> > > > + * This implementation is based on z3fold written by Vitaly Wool.
> > > > + * Zblock is a small object allocator with the intention to serve as a
> > > > + * zpool backend. It operates on page blocks which consist of number
> > > > + * of physical pages being a power of 2 and store integer number of
> > > > + * compressed pages per block which results in determinism and simplicity.
> > > > + *
> > > > + * zblock doesn't export any API and is meant to be used via zpool API.
> > > > + */
> > > > +
> > > > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > > > +
> > > > +#include <linux/atomic.h>
> > > > +#include <linux/list.h>
> > > > +#include <linux/mm.h>
> > > > +#include <linux/module.h>
> > > > +#include <linux/preempt.h>
> > > > +#include <linux/slab.h>
> > > > +#include <linux/spinlock.h>
> > > > +#include <linux/zpool.h>
> > > > +
> > > > +#define SLOT_FREE 0
> > > > +#define SLOT_OCCUPIED 1
> > > > +#define SLOT_MAPPED 2
> > > > +#define SLOT_UNMAPPED 3
> > > > +
> > > > +#define SLOT_BITS 5
> > > > +#define MAX_SLOTS (1 << SLOT_BITS)
> > > > +#define SLOT_MASK ((0x1UL << SLOT_BITS) - 1)
> > > > +
> > > > +#define BLOCK_DATA_SIZE(order) ((PAGE_SIZE << order) - sizeof(struct zblock_block))
> > > > +#define SLOT_SIZE(nslots, order) (round_down((BLOCK_DATA_SIZE(order) / nslots), sizeof(long)))
> > > > +
> > > > +#define BLOCK_CACHE_SIZE 32
> > > > +
> > > > +struct zblock_pool;
> > > > +
> > > > +struct zblock_ops {
> > > > +       int (*evict)(struct zblock_pool *pool, unsigned long handle);
> > > > +};
> > > > +
> > > > +/**
> > > > + * struct zblock_block - block metadata
> > > > + * Block consists of several (1/2/4/8) pages and contains fixed
> > > > + * integer number of slots for allocating compressed pages.
> > > > + *
> > > > + * lock:                        protects block
> > > > + * block_node:          links block into the relevant list in the pool
> > > > + * slot_info:       contains data about free/occupied slots
> > > > + * free_slots:          number of free slots in the block
> > > > + * under_reclaim:    if true shows that block is being evicted
> > > > + */
> > > > +struct zblock_block {
> > > > +       spinlock_t lock;
> > > > +       struct list_head block_node;
> > > > +       u8 slot_info[MAX_SLOTS];
> > > > +       unsigned int free_slots;
> > > > +       bool under_reclaim;
> > > > +};
> > > > +/**
> > > > + * struct block_desc - general metadata for block lists
> > > > + * Each block list stores only blocks of corresponding type which means
> > > > + * that all blocks in it have the same number and size of slots.
> > > > + * All slots are aligned to size of long.
> > > > + *
> > > > + * slot_size:          size of slot for this list
> > > > + * slots_per_block:    number of slots per block for this list
> > > > + * order:                      order for __get_free_pages
> > > > + */
> > > > +static const struct block_desc {
> > > > +       const unsigned int slot_size;
> > > > +       const unsigned short slots_per_block;
> > > > +       const unsigned short order;
> > > > +} block_desc[] = {
> > > > +       { SLOT_SIZE(32, 0), 32, 0 },
> > > > +       { SLOT_SIZE(22, 0), 22, 0 },
> > > > +       { SLOT_SIZE(17, 0), 17, 0 },
> > > > +       { SLOT_SIZE(13, 0), 13, 0 },
> > > > +       { SLOT_SIZE(11, 0), 11, 0 },
> > > > +       { SLOT_SIZE(9, 0), 9, 0 },
> > > > +       { SLOT_SIZE(8, 0), 8, 0 },
> > > > +       { SLOT_SIZE(14, 1), 14, 1 },
> > > > +       { SLOT_SIZE(12, 1), 12, 1 },
> > > > +       { SLOT_SIZE(11, 1), 11, 1 },
> > > > +       { SLOT_SIZE(10, 1), 10, 1 },
> > > > +       { SLOT_SIZE(9, 1), 9, 1 },
> > > > +       { SLOT_SIZE(8, 1), 8, 1 },
> > > > +       { SLOT_SIZE(15, 2), 15, 2 },
> > > > +       { SLOT_SIZE(14, 2), 14, 2 },
> > > > +       { SLOT_SIZE(13, 2), 13, 2 },
> > > > +       { SLOT_SIZE(12, 2), 12, 2 },
> > > > +       { SLOT_SIZE(11, 2), 11, 2 },
> > > > +       { SLOT_SIZE(10, 2), 10, 2 },
> > > > +       { SLOT_SIZE(9, 2), 9, 2 },
> > > > +       { SLOT_SIZE(8, 2), 8, 2 },
> > > > +       { SLOT_SIZE(15, 3), 15, 3 },
> > > > +       { SLOT_SIZE(14, 3), 14, 3 },
> > > > +       { SLOT_SIZE(13, 3), 13, 3 },
> > > > +       { SLOT_SIZE(12, 3), 12, 3 },
> > > > +       { SLOT_SIZE(11, 3), 11, 3 },
> > > > +       { SLOT_SIZE(10, 3), 10, 3 },
> > > > +       { SLOT_SIZE(9, 3), 9, 3 },
> > > > +       { SLOT_SIZE(7, 3), 7, 3 }
> > > > +};
> > > > +
> > > > +/**
> > > > + * struct block_list - stores metadata of particular list
> > > > + * lock:                       protects list
> > > > + * head:                       head of this list
> > > > + * block_cache:                blocks with free slots
> > > > + * block_count:                total number of blocks in the list
> > > > + */
> > > > +struct block_list {
> > > > +       spinlock_t lock;
> > > > +       struct list_head head;
> > > > +       struct zblock_block *block_cache[BLOCK_CACHE_SIZE];
> > > > +       unsigned long block_count;
> > > > +};
> > > > +
> > > > +/**
> > > > + * struct zblock_pool - stores metadata for each zblock pool
> > > > + * @block_lists:       array of block lists
> > > > + * @ops:                       pointer to a structure of user defined operations specified at
> > > > + *                                     pool creation time.
> > > > + * @zpool:                     zpool driver
> > > > + * @zpool_ops:         zpool operations structure with an evict callback
> > > > + * @alloc_flag:                protects block allocation from memory leak
> > > > + *
> > > > + * This structure is allocated at pool creation time and maintains metadata
> > > > + * for a particular zblock pool.
> > > > + */
> > > > +struct zblock_pool {
> > > > +       struct block_list block_lists[ARRAY_SIZE(block_desc)];
> > > > +       const struct zblock_ops *ops;
> > > > +       struct zpool *zpool;
> > > > +       const struct zpool_ops *zpool_ops;
> > > > +       atomic_t alloc_flag;
> > > > +};
> > > > +
> > > > +/*****************
> > > > + * Helpers
> > > > + *****************/
> > > > +
> > > > +static void cache_insert_block(struct zblock_block *block, struct block_list *list)
> > > > +{
> > > > +       unsigned int i, min_free_slots, min_index;
> > > > +
> > > > +       min_free_slots = MAX_SLOTS;
> > > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > > +               if (!list->block_cache[i] || !(list->block_cache[i])->free_slots) {
> > > > +                       list->block_cache[i] = block;
> > > > +                       return;
> > > > +               }
> > > > +               if ((list->block_cache[i])->free_slots < min_free_slots) {
> > > > +                       min_free_slots = (list->block_cache[i])->free_slots;
> > > > +                       min_index = i;
> > > > +               }
> > > > +       }
> > > > +       list->block_cache[min_index] = block;
> > > > +}
> > > > +
> > > > +static struct zblock_block *cache_find_block(struct block_list *list)
> > > > +{
> > > > +       int i;
> > > > +
> > > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > > +               if (list->block_cache[i] && (list->block_cache[i])->free_slots)
> > > > +                       return list->block_cache[i];
> > > > +       }
> > > > +       return NULL;
> > > > +}
> > > > +
> > > > +static int is_in_cache(struct zblock_block *block, struct block_list *list)
> > > > +{
> > > > +       int i;
> > > > +
> > > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > > +               if (block == list->block_cache[i])
> > > > +                       return i;
> > > > +       }
> > > > +       return -1;
> > > > +}
> > > > +
> > > > +/*
> > > > + * allocate new block and add it to corresponding block list
> > > > + */
> > > > +static struct zblock_block *alloc_block(struct zblock_pool *pool,
> > > > +                                                                       int block_type, gfp_t gfp)
> > > > +{
> > > > +       struct zblock_block *block;
> > > > +       struct block_list *list;
> > > > +
> > > > +       block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
> > > > +       if (!block)
> > > > +               return NULL;
> > > > +
> > > > +       list = &(pool->block_lists)[block_type];
> > > > +
> > > > +       /* init block data  */
> > > > +       spin_lock_init(&block->lock);
> > > > +       memset(block->slot_info, SLOT_FREE, block_desc[block_type].slots_per_block);
> > > > +       block->free_slots = block_desc[block_type].slots_per_block;
> > > > +       block->under_reclaim = false;
> > > > +
> > > > +       spin_lock(&list->lock);
> > > > +       /* inserting block into list */
> > > > +       INIT_LIST_HEAD(&block->block_node);
> > > > +       list_add(&block->block_node, &list->head);
> > > > +       cache_insert_block(block, list);
> > > > +       list->block_count++;
> > > > +       spin_unlock(&list->lock);
> > > > +       return block;
> > > > +}
> > > > +
> > > > +/*
> > > > + * Encodes the handle of a particular slot in the pool using metadata
> > > > + */
> > > > +static inline unsigned long metadata_to_handle(struct zblock_block *block,
> > > > +                                                       unsigned int block_type, unsigned int slot)
> > > > +{
> > > > +       return (unsigned long)(block) + (block_type << SLOT_BITS) + slot;
> > > > +}
> > > > +
> > > > +/* Returns block, block type and slot in the pool corresponding to handle */
> > > > +static inline struct zblock_block *handle_to_metadata(unsigned long handle,
> > > > +                                               unsigned int *block_type, unsigned int *slot)
> > > > +{
> > > > +       *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
> > > > +       *slot = handle & SLOT_MASK;
> > > > +       return (struct zblock_block *)(handle & PAGE_MASK);
> > > > +}
> > > > +
> > > > +
> > > > +/*****************
> > > > + * API Functions
> > > > + *****************/
> > > > +/**
> > > > + * zblock_create_pool() - create a new zblock pool
> > > > + * @gfp:       gfp flags when allocating the zblock pool structure
> > > > + * @ops:       user-defined operations for the zblock pool
> > > > + *
> > > > + * Return: pointer to the new zblock pool or NULL if the metadata allocation
> > > > + * failed.
> > > > + */
> > > > +static struct zblock_pool *zblock_create_pool(gfp_t gfp, const struct zblock_ops *ops)
> > > > +{
> > > > +       struct zblock_pool *pool;
> > > > +       struct block_list *list;
> > > > +       int i, j;
> > > > +
> > > > +       pool = kmalloc(sizeof(struct zblock_pool), gfp);
> > > > +       if (!pool)
> > > > +               return NULL;
> > > > +
> > > > +       /* init each block list */
> > > > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > > > +               list = &(pool->block_lists)[i];
> > > > +               spin_lock_init(&list->lock);
> > > > +               INIT_LIST_HEAD(&list->head);
> > > > +               for (j = 0; j < BLOCK_CACHE_SIZE; j++)
> > > > +                       list->block_cache[j] = NULL;
> > > > +               list->block_count = 0;
> > > > +       }
> > > > +       pool->ops = ops;
> > > > +       atomic_set(&pool->alloc_flag, 0);
> > > > +       return pool;
> > > > +}
> > > > +
> > > > +/**
> > > > + * zblock_destroy_pool() - destroys an existing zblock pool
> > > > + * @pool:      the zblock pool to be destroyed
> > > > + *
> > > > + */
> > > > +static void zblock_destroy_pool(struct zblock_pool *pool)
> > > > +{
> > > > +       kfree(pool);
> > > > +}
> > > > +
> > > > +
> > > > +/**
> > > > + * zblock_alloc() - allocates a slot of appropriate size
> > > > + * @pool:      zblock pool from which to allocate
> > > > + * @size:      size in bytes of the desired allocation
> > > > + * @gfp:       gfp flags used if the pool needs to grow
> > > > + * @handle:    handle of the new allocation
> > > > + *
> > > > + * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
> > > > + * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
> > > > + * a new slot.
> > > > + */
> > > > +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
> > > > +                       unsigned long *handle)
> > > > +{
> > > > +       unsigned int block_type, slot;
> > > > +       struct zblock_block *block;
> > > > +       struct block_list *list;
> > > > +
> > > > +       if (!size)
> > > > +               return -EINVAL;
> > > > +
> > > > +       if (size > PAGE_SIZE)
> > > > +               return -ENOSPC;
> > > > +
> > > > +       /* find basic block type with suitable slot size */
> > > > +       for (block_type = 0; block_type < ARRAY_SIZE(block_desc); block_type++) {
> > > > +               if (size <= block_desc[block_type].slot_size)
> > > > +                       break;
> > > > +       }
> > > > +       list = &(pool->block_lists[block_type]);
> > > > +
> > > > +check:
> > > > +       spin_lock(&list->lock);
> > > > +       /* check if there are free slots in cache */
> > > > +       block = cache_find_block(list);
> > > > +       if (block)
> > > > +               goto found;
> > > > +       spin_unlock(&list->lock);
> > > > +
> > > > +       /* not found block with free slots try to allocate new empty block */
> > > > +       if (atomic_cmpxchg(&pool->alloc_flag, 0, 1))
> > > > +               goto check;
> > > > +       block = alloc_block(pool, block_type, gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
> > > > +       if (block) {
> > > > +               spin_lock(&list->lock);
> > > > +               goto found;
> > > > +       }
> > > > +       atomic_set(&pool->alloc_flag, 0);
> > > > +       return -ENOMEM;
> > > > +
> > > > +found:
> > > > +       spin_lock(&block->lock);
> > > > +       block->free_slots--;
> > > > +       spin_unlock(&list->lock);
> > > > +       /* find the first free slot in block */
> > > > +       for (slot = 0; slot < block_desc[block_type].slots_per_block; slot++) {
> > > > +               if (block->slot_info[slot] == SLOT_FREE)
> > > > +                       break;
> > > > +       }
> > > > +       block->slot_info[slot] = SLOT_OCCUPIED;
> > > > +       spin_unlock(&block->lock);
> > > > +       *handle = metadata_to_handle(block, block_type, slot);
> > > > +       atomic_set(&pool->alloc_flag, 0);
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +/**
> > > > + * zblock_free() - frees the allocation associated with the given handle
> > > > + * @pool:      pool in which the allocation resided
> > > > + * @handle:    handle associated with the allocation returned by zblock_alloc()
> > > > + *
> > > > + */
> > > > +static void zblock_free(struct zblock_pool *pool, unsigned long handle)
> > > > +{
> > > > +       unsigned int slot, block_type;
> > > > +       struct zblock_block *block;
> > > > +       struct block_list *list;
> > > > +       int i;
> > > > +
> > > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > > +       list = &(pool->block_lists[block_type]);
> > > > +
> > > > +       if (block->under_reclaim)
> > > > +               return;
> > > > +       spin_lock(&list->lock);
> > > > +       i = is_in_cache(block, list);
> > > > +       block->free_slots++;
> > > > +       /* if all slots in block are empty delete whole block */
> > > > +       if (block->free_slots == block_desc[block_type].slots_per_block) {
> > > > +               list_del(&block->block_node);
> > > > +               list->block_count--;
> > > > +
> > > > +               /* if cached block to be deleted */
> > > > +               if (i != -1)
> > > > +                       list->block_cache[i] = NULL;
> > > > +               spin_unlock(&list->lock);
> > > > +               free_pages((unsigned long)block, block_desc[block_type].order);
> > > > +               return;
> > > > +       }
> > > > +       /* if block is not cached update cache */
> > > > +       if (i == -1)
> > > > +               cache_insert_block(block, list);
> > > > +
> > > > +       spin_lock(&block->lock);
> > > > +       spin_unlock(&list->lock);
> > > > +       block->slot_info[slot] = SLOT_FREE;
> > > > +       spin_unlock(&block->lock);
> > > > +}
> > > > +
> > > > +/**
> > > > + * zblock_reclaim_block() - evicts allocations from block and frees it
> > > > + * @pool:      pool from which a block will attempt to be evicted
> > > > + *
> > > > + * Returns: pages reclaimed count if block is successfully freed
> > > > + *          otherwise -EINVAL if there are no blocks to evict
> > > > + */
> > > > +static int zblock_reclaim_block(struct zblock_pool *pool)
> > > > +{
> > > > +       struct zblock_block *block;
> > > > +       struct block_list *list;
> > > > +       unsigned long handle, block_type, slot;
> > > > +       int ret, i, reclaimed;
> > > > +
> > > > +       /* start with list storing blocks with the worst compression and try
> > > > +        * to evict the first added (oldest) block in this list
> > > > +        */
> > > > +       for (block_type = ARRAY_SIZE(block_desc) - 1; block_type >= 0; --block_type) {
> > > > +               list = &(pool->block_lists[block_type]);
> > > > +               spin_lock(&list->lock);
> > > > +
> > > > +               /* find the oldest block in list */
> > > > +               block = list_last_entry(&list->head, struct zblock_block, block_node);
> > > > +
> > > > +               if (!block) {
> > > > +                       spin_unlock(&list->lock);
> > > > +                       continue;
> > > > +               }
> > > > +               i = is_in_cache(block, list);
> > > > +               /* skip iteration if this block is cached */
> > > > +               if (i != -1) {
> > > > +                       spin_unlock(&list->lock);
> > > > +                       continue;
> > > > +               }
> > > > +               block->under_reclaim = true;
> > > > +               spin_unlock(&list->lock);
> > > > +               reclaimed = 0;
> > > > +
> > > > +               /* try to evict all UNMAPPED slots in block */
> > > > +               for (slot = 0; slot < block_desc[block_type].slots_per_block; ++slot) {
> > > > +                       if (block->slot_info[slot] == SLOT_UNMAPPED) {
> > > > +                               handle = metadata_to_handle(block, block_type, slot);
> > > > +                               ret = pool->ops->evict(pool, handle);
> > > > +                               if (ret)
> > > > +                                       break;
> > > > +
> > > > +                               ++reclaimed;
> > > > +                               spin_lock(&block->lock);
> > > > +                               block->slot_info[slot] = SLOT_FREE;
> > > > +                               spin_unlock(&block->lock);
> > > > +                               block->free_slots++;
> > > > +                       }
> > > > +               }
> > > > +               spin_lock(&list->lock);
> > > > +               /* some occupied slots remained - insert block */
> > > > +               if (block->free_slots != block_desc[block_type].slots_per_block) {
> > > > +                       block->under_reclaim = false;
> > > > +                       cache_insert_block(block, list);
> > > > +                       spin_unlock(&list->lock);
> > > > +               } else {
> > > > +               /* all slots are free - delete this block */
> > > > +                       list_del(&block->block_node);
> > > > +                       list->block_count--;
> > > > +                       spin_unlock(&list->lock);
> > > > +                       free_pages((unsigned long)block, block_desc[block_type].order);
> > > > +               }
> > > > +               if (reclaimed != 0)
> > > > +                       return reclaimed;
> > > > +               return -EAGAIN;
> > > > +       }
> > > > +       return -EINVAL;
> > > > +}
> > > > +
> > > > +
> > > > +/**
> > > > + * zblock_map() - maps the allocation associated with the given handle
> > > > + * @pool:      pool in which the allocation resides
> > > > + * @handle:    handle associated with the allocation to be mapped
> > > > + *
> > > > + *
> > > > + * Returns: a pointer to the mapped allocation
> > > > + */
> > > > +static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
> > > > +{
> > > > +       unsigned int block_type, slot;
> > > > +       struct zblock_block *block;
> > > > +
> > > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > > +       spin_lock(&block->lock);
> > > > +       block->slot_info[slot] = SLOT_MAPPED;
> > > > +       spin_unlock(&block->lock);
> > > > +       return (void *)(block + 1) + slot * block_desc[block_type].slot_size;
> > > > +}
> > > > +
> > > > +/**
> > > > + * zblock_unmap() - unmaps the allocation associated with the given handle
> > > > + * @pool:      pool in which the allocation resides
> > > > + * @handle:    handle associated with the allocation to be unmapped
> > > > + */
> > > > +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
> > > > +{
> > > > +       unsigned int block_type, slot;
> > > > +       struct zblock_block *block;
> > > > +
> > > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > > +       spin_lock(&block->lock);
> > > > +       block->slot_info[slot] = SLOT_UNMAPPED;
> > > > +       spin_unlock(&block->lock);
> > > > +}
> > > > +
> > > > +/**
> > > > + * zblock_get_pool_size() - gets the zblock pool size in bytes
> > > > + * @pool:      pool whose size is being queried
> > > > + *
> > > > + * Returns: size in bytes of the given pool.
> > > > + */
> > > > +static u64 zblock_get_pool_size(struct zblock_pool *pool)
> > > > +{
> > > > +       u64 total_size;
> > > > +       int i;
> > > > +
> > > > +       total_size = 0;
> > > > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > > > +               total_size += (pool->block_lists)[i].block_count
> > > > +                               * (PAGE_SIZE << block_desc[i].order);
> > > > +       }
> > > > +       return total_size;
> > > > +}
> > > > +
> > > > +/*****************
> > > > + * zpool
> > > > + ****************/
> > > > +
> > > > +static int zblock_zpool_evict(struct zblock_pool *pool, unsigned long handle)
> > > > +{
> > > > +       if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
> > > > +               return pool->zpool_ops->evict(pool->zpool, handle);
> > > > +       else
> > > > +               return -ENOENT;
> > > > +}
> > > > +
> > > > +static const struct zblock_ops zblock_zpool_ops = {
> > > > +       .evict =        zblock_zpool_evict
> > > > +};
> > > > +
> > > > +static void *zblock_zpool_create(const char *name, gfp_t gfp,
> > > > +                                  const struct zpool_ops *zpool_ops,
> > > > +                                  struct zpool *zpool)
> > > > +{
> > > > +       struct zblock_pool *pool;
> > > > +
> > > > +       pool = zblock_create_pool(gfp, &zblock_zpool_ops);
> > > > +       if (pool) {
> > > > +               pool->zpool = zpool;
> > > > +               pool->zpool_ops = zpool_ops;
> > > > +       }
> > > > +       return pool;
> > > > +}
> > > > +
> > > > +static void zblock_zpool_destroy(void *pool)
> > > > +{
> > > > +       zblock_destroy_pool(pool);
> > > > +}
> > > > +
> > > > +static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
> > > > +                       unsigned long *handle)
> > > > +{
> > > > +       return zblock_alloc(pool, size, gfp, handle);
> > > > +}
> > > > +
> > > > +static void zblock_zpool_free(void *pool, unsigned long handle)
> > > > +{
> > > > +       zblock_free(pool, handle);
> > > > +}
> > > > +
> > > > +static int zblock_zpool_shrink(void *pool, unsigned int pages,
> > > > +                       unsigned int *reclaimed)
> > > > +{
> > > > +       unsigned int total = 0;
> > > > +       int ret = -EINVAL;
> > > > +
> > > > +       while (total < pages) {
> > > > +               ret = zblock_reclaim_block(pool);
> > > > +               if (ret < 0)
> > > > +                       break;
> > > > +               total += ret;
> > > > +       }
> > > > +       if (reclaimed)
> > > > +               *reclaimed = total;
> > > > +
> > > > +       return ret;
> > > > +}
> > > > +
> > > > +static void *zblock_zpool_map(void *pool, unsigned long handle,
> > > > +                       enum zpool_mapmode mm)
> > > > +{
> > > > +       return zblock_map(pool, handle);
> > > > +}
> > > > +
> > > > +static void zblock_zpool_unmap(void *pool, unsigned long handle)
> > > > +{
> > > > +       zblock_unmap(pool, handle);
> > > > +}
> > > > +
> > > > +static u64 zblock_zpool_total_size(void *pool)
> > > > +{
> > > > +       return zblock_get_pool_size(pool);
> > > > +}
> > > > +
> > > > +static struct zpool_driver zblock_zpool_driver = {
> > > > +       .type =         "zblock",
> > > > +       .owner =        THIS_MODULE,
> > > > +       .create =       zblock_zpool_create,
> > > > +       .destroy =      zblock_zpool_destroy,
> > > > +       .malloc =       zblock_zpool_malloc,
> > > > +       .free =         zblock_zpool_free,
> > > > +       .shrink =       zblock_zpool_shrink,
> > > > +       .map =          zblock_zpool_map,
> > > > +       .unmap =        zblock_zpool_unmap,
> > > > +       .total_size =   zblock_zpool_total_size,
> > > > +};
> > > > +
> > > > +MODULE_ALIAS("zpool-zblock");
> > > > +
> > > > +static int __init init_zblock(void)
> > > > +{
> > > > +       pr_info("loaded\n");
> > > > +       zpool_register_driver(&zblock_zpool_driver);
> > > > +       return 0;
> > > > +}
> > > > +
> > > > +static void __exit exit_zblock(void)
> > > > +{
> > > > +       zpool_unregister_driver(&zblock_zpool_driver);
> > > > +       pr_info("unloaded\n");
> > > > +}
> > > > +
> > > > +module_init(init_zblock);
> > > > +module_exit(exit_zblock);
> > > > +
> > > > +MODULE_LICENSE("GPL");
> > > > +MODULE_AUTHOR("Ananda Badmaeb <a.badmaev@clicknet.pro>");
> > > > +MODULE_DESCRIPTION("Block allocator for compressed pages");
> > > > --
> > > > 2.34.1
> > > >
> > > >


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v5] mm: add zblock - new allocator for use via zpool API
  2022-09-29  8:53       ` Vitaly Wool
@ 2022-09-29 18:28         ` Yosry Ahmed
  0 siblings, 0 replies; 10+ messages in thread
From: Yosry Ahmed @ 2022-09-29 18:28 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: ananda, Linux-MM

On Thu, Sep 29, 2022 at 1:53 AM Vitaly Wool <vitaly.wool@konsulko.com> wrote:
>
> On Thu, Sep 29, 2022 at 9:59 AM Yosry Ahmed <yosryahmed@google.com> wrote:
> >
> > On Wed, Sep 28, 2022 at 11:55 PM Vitaly Wool <vitaly.wool@konsulko.com> wrote:
> > >
> > > On Wed, Sep 28, 2022 at 8:38 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> > > >
> > > > On Wed, Sep 28, 2022 at 1:06 AM ananda <a.badmaev@clicknet.pro> wrote:
> > > > >
> > > > > From: Ananda <a.badmaev@clicknet.pro>
> > > > >
> > > > >     Zblock stores integer number of compressed objects per zblock block.
> > > > > These blocks consist of several physical pages (1/2/4/8) and are arranged
> > > > > in linked lists.
> > > > >     The range from 0 to PAGE_SIZE is divided into the number of intervals
> > > > > corresponding to the number of lists and each list only operates objects
> > > > > of size from its interval. Thus the block lists are isolated from each
> > > > > other, which makes it possible to simultaneously perform actions with
> > > > > several objects from different lists.
> > > > >     Blocks make it possible to densely arrange objects of various sizes
> > > > > resulting in low internal fragmentation. Also this allocator tries to fill
> > > > > incomplete blocks instead of adding new ones thus in many cases providing
> > > > > a compression ratio substantially higher than z3fold and zbud.
> > > > >     Zblock does not require MMU and also is superior to zsmalloc with
> > > > > regard to the worst execution times, thus allowing for better response time
> > > > > and real-time characteristics of the whole system.
> > > > >
> > > >
> > > > It seems to me, and I can be wrong, that there is some overlap in
> > > > design and goals between this zpool backend and zsmalloc. They both
> > > > try to avoid internal fragmentation by avoiding the static slots used
> > > > by zbud and z3fold, and instead pack compressed pages more
> > > > dynamically. They both have some sort of concurrency handling
> > > > (separate block lists in zblock vs. classes in zsmalloc). A key
> > > > difference is that zsmalloc avoids higher order allocations (at least
> > > > based on its docs), and instead allows compressed pages to span across
> > > > 0-order page boundaries.
> > >
> > > Well, another key difference is that zsmalloc may only work on
> > > MMU-enabled systems.
> > >
> > > > The key differences I see here (based on this commit message and
> > > > zsmalloc docs) are:
> > > > a) Blocks in zblock can consist of higher order pages.
> > > > b) Compressed pages in zsmalloc can span page boundaries (I am
> > > > assuming this isn't the case for zblock).
> > > >
> > > > It appears to me that if zblock has better performance than zsmalloc,
> > > > it can be because pages in zblock are physically contiguous vs. the
> > > > 0-order pages in zsmalloc (TLB misses, cache misses, etc). Is my
> > > > assumption correct?
> > > >
> > > > If yes, would it be better to implement those changes as some tunable
> > > > extension to zsmalloc? It would make it easier if we have overall less
> > > > zpool backends, and also easier for current users of zsmalloc to
> > > > experiment with these changes.
> > >
> > > Easier to whom? Not for me, nor for anyone using zswap, that's what I
> > > have to say.
> > > zpool API is created to unify compression backends and so I would
> > > strongly prefer continuing having zpool for backend configuration and
> > > selection, rather than implementing a custom in-zsmalloc selection
> > > mechanism.
> > >
> > > I don't think merging is a good idea, and here are the reasons:
> > > - zsmalloc's code is already almost incomprehensible
> > > - zsmalloc's main objective is density, while zblock aims for low latency
> > > - merging the two approaches within zsmalloc would mean creating some
> > > internal selection mechanism within zsmalloc which I would absolutely
> > > like to avoid (for smaller RAM devices I usually don't compile
> > > zsmalloc at all or compile it as a module).
> > >
> >
> > Thanks for taking the time to respond to this :)
> >
> > I am sorry if my intention was not clear, but I did not mean that we
> > should have zblock be added to zsmalloc such that we "select" between
> > zsmalloc and zblock. What I meant (which can still be utter nonsense)
> > is that if the differences between zsmalloc and zblock can be
> > reimagined as improvements to zsmalloc, then maybe this would be
> > better, to have less zpool backends overall if we don't need more.
>
> Well, I'm not entirely sure we should aim for having less zpool
> backends, but if that's something we'd have to do then I'd rather mark
> z3fold as obsolete and have zblock taking its place than merge zblock
> with zsmalloc.
>

I was worried about people not using which backend to use if they are
close enough, but I guess proper documentation of what's different in
zblock compared to other zpool backends helps solve the problem.

> > For example, maybe we can have the default allocation order be a
> > config option. By default it would be 0, which maintains the current
> > behavior, and then we can configure it to something higher to get a
> > behavior closer to zblock. This is of course an oversimplification,
> > but if most key differences can be formulated similarly, then maybe we
> > can get improved zsmalloc instead of zblock, with perhaps a few
> > tunables (tunables like allocation order *not* different selectable
> > modes).
>
> There's one more important thing that I forgot to mention. zsmalloc
> doesn't support reclaim and I greatly doubt it ever will, as opposed
> to zblock which does. Not supporting reclaim makes zsmalloc a bad fit
> for zswap, so having another backend almost as good as zsmalloc with
> regard to compression ratio *and* supporting reclaim is an important
> step forward
>

We use zsmalloc with zswap at Google, but we don't have writeback as
we use it without a real swap device. I think there might be interest
from others to implement writeback for zsmalloc, but that's orthogonal
to this thread (also might change now with zblock).

> > You are, of course, way more familiar with this code than me, so
> > please excuse me if what I am saying still sounds like nonsense. I am
> > just trying to avoid having similar zpool backends if possible.
> >
> > zsmalloc code being incomprehensible is another point that I am not
> > considering here as well, so perhaps even if everything else checks
> > out the added complexity isn't worth it. I can't judge this. I was
> > only making a suggestion.
>
> I did not say it was incomprehensible, but it's getting close IMHO :)
> And if we add zblock-like functionality there then it probably will
> become one.
> So far the zsmalloc's code is more than 4x larger than zblock's, and
> if we can keep it this way, we'll have a separate nice backend more
> versatile than zsmalloc, almost as good compression wise as zsmalloc,
> and all that almost at the simplicity level of zbud. I believe this is
> the way to go.
>

All makes sense to me. Thanks for the clarification!


> Thanks,
> Vitaly
>
> > >
> > > > > Signed-off-by: Ananda <a.badmaev@clicknet.pro>
> > > > > ---
> > > > >
> > > > > v2: fixed compiler warnings
> > > > >
> > > > > v3: added documentation and const modifier to struct tree_descr
> > > > >
> > > > > v4: - fixed gfp flags for block allocation
> > > > >     - fixed potential memory leak when allocating blocks
> > > > >     - resolved some issues with code style and warnings from checkpatch
> > > > >       (except warning about single line config symbol description)
> > > > >     - moved test results from documentation to changelog
> > > > >
> > > > > v5: - "direct" handle mapping and use of linked lists instead of red-black
> > > > >       trees resulting in faster operations and a bit simpler code
> > > > >     - renamed ztree -> zblock
> > > > >     - edited various comments and descriptions
> > > > >
> > > > >  Documentation/mm/zblock.rst |  31 ++
> > > > >  MAINTAINERS                 |   7 +
> > > > >  mm/Kconfig                  |  17 +
> > > > >  mm/Makefile                 |   1 +
> > > > >  mm/zblock.c                 | 637 ++++++++++++++++++++++++++++++++++++
> > > > >  5 files changed, 693 insertions(+)
> > > > >  create mode 100644 Documentation/mm/zblock.rst
> > > > >  create mode 100644 mm/zblock.c
> > > > >
> > > > > diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst
> > > > > new file mode 100644
> > > > > index 000000000000..5008ce90b54b
> > > > > --- /dev/null
> > > > > +++ b/Documentation/mm/zblock.rst
> > > > > @@ -0,0 +1,31 @@
> > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > +
> > > > > +.. _block:
> > > > > +
> > > > > +======
> > > > > +zblock
> > > > > +======
> > > > > +
> > > > > +Zblock stores integer number of compressed objects per block. These
> > > > > +blocks consist of several consecutive physical pages (from 1 to 8) and
> > > > > +are arranged in lists. The range from 0 to PAGE_SIZE is divided into the
> > > > > +number of intervals corresponding to the number of lists and each list
> > > > > +only operates objects of size from its interval. Thus the block lists are
> > > > > +isolated from each other, which makes it possible to simultaneously
> > > > > +perform actions with several objects from different lists.
> > > > > +
> > > > > +Blocks make it possible to densely arrange objects of various sizes
> > > > > +resulting in low internal fragmentation. Also this allocator tries to fill
> > > > > +incomplete blocks instead of adding new ones thus in many cases providing
> > > > > +a compression ratio substantially higher than z3fold and zbud. Zblock does
> > > > > +not require MMU and also is superior to zsmalloc with regard to the worst
> > > > > +execution times, thus allowing for better response time and real-time
> > > > > +characteristics of the whole system.
> > > > > +
> > > > > +Like z3fold and zsmalloc zblock_alloc() does not return a dereferenceable
> > > > > +pointer. Instead, it returns an unsigned long handle which encodes actual
> > > > > +location of the allocated object.
> > > > > +
> > > > > +Unlike zbud and z3fold zblock works well with objects of various sizes - both
> > > > > +highly compressed and poorly compressed including cases where both types
> > > > > +are present.
> > > > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > > > index f1390b8270b2..014fb19eb2cc 100644
> > > > > --- a/MAINTAINERS
> > > > > +++ b/MAINTAINERS
> > > > > @@ -22457,6 +22457,13 @@ L:     linux-mm@kvack.org
> > > > >  S:     Maintained
> > > > >  F:     mm/z3fold.c
> > > > >
> > > > > +ZBLOCK COMPRESSED PAGE ALLOCATOR
> > > > > +M:     Ananda Badmaev <a.badmaev@clicknet.pro>
> > > > > +M:     Vitaly Wool <vitaly.wool@konsulko.com>
> > > > > +L:     linux-mm@kvack.org
> > > > > +S:     Maintained
> > > > > +F:     mm/zblock.c
> > > > > +
> > > > >  ZD1211RW WIRELESS DRIVER
> > > > >  M:     Ulrich Kunitz <kune@deine-taler.de>
> > > > >  L:     linux-wireless@vger.kernel.org
> > > > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > > > index 0331f1461f81..470c80f5726d 100644
> > > > > --- a/mm/Kconfig
> > > > > +++ b/mm/Kconfig
> > > > > @@ -149,6 +149,12 @@ config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > > > >         select ZSMALLOC
> > > > >         help
> > > > >           Use the zsmalloc allocator as the default allocator.
> > > > > +
> > > > > +config ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > > > > +       bool "zblock"
> > > > > +       select ZBLOCK
> > > > > +       help
> > > > > +         Use the zblock allocator as the default allocator.
> > > > >  endchoice
> > > > >
> > > > >  config ZSWAP_ZPOOL_DEFAULT
> > > > > @@ -157,6 +163,7 @@ config ZSWAP_ZPOOL_DEFAULT
> > > > >         default "zbud" if ZSWAP_ZPOOL_DEFAULT_ZBUD
> > > > >         default "z3fold" if ZSWAP_ZPOOL_DEFAULT_Z3FOLD
> > > > >         default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC
> > > > > +          default "zblock" if ZSWAP_ZPOOL_DEFAULT_ZBLOCK
> > > > >         default ""
> > > > >
> > > > >  config ZBUD
> > > > > @@ -187,6 +194,16 @@ config ZSMALLOC
> > > > >           pages of various compression levels efficiently. It achieves
> > > > >           the highest storage density with the least amount of fragmentation.
> > > > >
> > > > > +config ZBLOCK
> > > > > +       tristate "Simple block allocator (zblock)"
> > > > > +       depends on ZPOOL
> > > > > +       help
> > > > > +         A special purpose allocator for storing compressed pages.
> > > > > +         It stores integer number of compressed pages per block and
> > > > > +         each block consists of number of physical pages being a power of 2.
> > > > > +         zblock provides fast read/write, limited worst case time for
> > > > > +         operations and good compression ratio in most scenarios.
> > > > > +
> > > > >  config ZSMALLOC_STAT
> > > > >         bool "Export zsmalloc statistics"
> > > > >         depends on ZSMALLOC
> > > > > diff --git a/mm/Makefile b/mm/Makefile
> > > > > index 9a564f836403..eb7235da6e61 100644
> > > > > --- a/mm/Makefile
> > > > > +++ b/mm/Makefile
> > > > > @@ -110,6 +110,7 @@ obj-$(CONFIG_ZPOOL) += zpool.o
> > > > >  obj-$(CONFIG_ZBUD)     += zbud.o
> > > > >  obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
> > > > >  obj-$(CONFIG_Z3FOLD)   += z3fold.o
> > > > > +obj-$(CONFIG_ZBLOCK)   += zblock.o
> > > > >  obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
> > > > >  obj-$(CONFIG_CMA)      += cma.o
> > > > >  obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
> > > > > diff --git a/mm/zblock.c b/mm/zblock.c
> > > > > new file mode 100644
> > > > > index 000000000000..b389f43e0c26
> > > > > --- /dev/null
> > > > > +++ b/mm/zblock.c
> > > > > @@ -0,0 +1,637 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > > +/*
> > > > > + * zblock.c
> > > > > + *
> > > > > + * Author: Ananda Badmaev <a.badmaev@clicknet.pro>
> > > > > + * Copyright (C) 2022, Konsulko AB.
> > > > > + *
> > > > > + * This implementation is based on z3fold written by Vitaly Wool.
> > > > > + * Zblock is a small object allocator with the intention to serve as a
> > > > > + * zpool backend. It operates on page blocks which consist of number
> > > > > + * of physical pages being a power of 2 and store integer number of
> > > > > + * compressed pages per block which results in determinism and simplicity.
> > > > > + *
> > > > > + * zblock doesn't export any API and is meant to be used via zpool API.
> > > > > + */
> > > > > +
> > > > > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > > > > +
> > > > > +#include <linux/atomic.h>
> > > > > +#include <linux/list.h>
> > > > > +#include <linux/mm.h>
> > > > > +#include <linux/module.h>
> > > > > +#include <linux/preempt.h>
> > > > > +#include <linux/slab.h>
> > > > > +#include <linux/spinlock.h>
> > > > > +#include <linux/zpool.h>
> > > > > +
> > > > > +#define SLOT_FREE 0
> > > > > +#define SLOT_OCCUPIED 1
> > > > > +#define SLOT_MAPPED 2
> > > > > +#define SLOT_UNMAPPED 3
> > > > > +
> > > > > +#define SLOT_BITS 5
> > > > > +#define MAX_SLOTS (1 << SLOT_BITS)
> > > > > +#define SLOT_MASK ((0x1UL << SLOT_BITS) - 1)
> > > > > +
> > > > > +#define BLOCK_DATA_SIZE(order) ((PAGE_SIZE << order) - sizeof(struct zblock_block))
> > > > > +#define SLOT_SIZE(nslots, order) (round_down((BLOCK_DATA_SIZE(order) / nslots), sizeof(long)))
> > > > > +
> > > > > +#define BLOCK_CACHE_SIZE 32
> > > > > +
> > > > > +struct zblock_pool;
> > > > > +
> > > > > +struct zblock_ops {
> > > > > +       int (*evict)(struct zblock_pool *pool, unsigned long handle);
> > > > > +};
> > > > > +
> > > > > +/**
> > > > > + * struct zblock_block - block metadata
> > > > > + * Block consists of several (1/2/4/8) pages and contains fixed
> > > > > + * integer number of slots for allocating compressed pages.
> > > > > + *
> > > > > + * lock:                        protects block
> > > > > + * block_node:          links block into the relevant list in the pool
> > > > > + * slot_info:       contains data about free/occupied slots
> > > > > + * free_slots:          number of free slots in the block
> > > > > + * under_reclaim:    if true shows that block is being evicted
> > > > > + */
> > > > > +struct zblock_block {
> > > > > +       spinlock_t lock;
> > > > > +       struct list_head block_node;
> > > > > +       u8 slot_info[MAX_SLOTS];
> > > > > +       unsigned int free_slots;
> > > > > +       bool under_reclaim;
> > > > > +};
> > > > > +/**
> > > > > + * struct block_desc - general metadata for block lists
> > > > > + * Each block list stores only blocks of corresponding type which means
> > > > > + * that all blocks in it have the same number and size of slots.
> > > > > + * All slots are aligned to size of long.
> > > > > + *
> > > > > + * slot_size:          size of slot for this list
> > > > > + * slots_per_block:    number of slots per block for this list
> > > > > + * order:                      order for __get_free_pages
> > > > > + */
> > > > > +static const struct block_desc {
> > > > > +       const unsigned int slot_size;
> > > > > +       const unsigned short slots_per_block;
> > > > > +       const unsigned short order;
> > > > > +} block_desc[] = {
> > > > > +       { SLOT_SIZE(32, 0), 32, 0 },
> > > > > +       { SLOT_SIZE(22, 0), 22, 0 },
> > > > > +       { SLOT_SIZE(17, 0), 17, 0 },
> > > > > +       { SLOT_SIZE(13, 0), 13, 0 },
> > > > > +       { SLOT_SIZE(11, 0), 11, 0 },
> > > > > +       { SLOT_SIZE(9, 0), 9, 0 },
> > > > > +       { SLOT_SIZE(8, 0), 8, 0 },
> > > > > +       { SLOT_SIZE(14, 1), 14, 1 },
> > > > > +       { SLOT_SIZE(12, 1), 12, 1 },
> > > > > +       { SLOT_SIZE(11, 1), 11, 1 },
> > > > > +       { SLOT_SIZE(10, 1), 10, 1 },
> > > > > +       { SLOT_SIZE(9, 1), 9, 1 },
> > > > > +       { SLOT_SIZE(8, 1), 8, 1 },
> > > > > +       { SLOT_SIZE(15, 2), 15, 2 },
> > > > > +       { SLOT_SIZE(14, 2), 14, 2 },
> > > > > +       { SLOT_SIZE(13, 2), 13, 2 },
> > > > > +       { SLOT_SIZE(12, 2), 12, 2 },
> > > > > +       { SLOT_SIZE(11, 2), 11, 2 },
> > > > > +       { SLOT_SIZE(10, 2), 10, 2 },
> > > > > +       { SLOT_SIZE(9, 2), 9, 2 },
> > > > > +       { SLOT_SIZE(8, 2), 8, 2 },
> > > > > +       { SLOT_SIZE(15, 3), 15, 3 },
> > > > > +       { SLOT_SIZE(14, 3), 14, 3 },
> > > > > +       { SLOT_SIZE(13, 3), 13, 3 },
> > > > > +       { SLOT_SIZE(12, 3), 12, 3 },
> > > > > +       { SLOT_SIZE(11, 3), 11, 3 },
> > > > > +       { SLOT_SIZE(10, 3), 10, 3 },
> > > > > +       { SLOT_SIZE(9, 3), 9, 3 },
> > > > > +       { SLOT_SIZE(7, 3), 7, 3 }
> > > > > +};
> > > > > +
> > > > > +/**
> > > > > + * struct block_list - stores metadata of particular list
> > > > > + * lock:                       protects list
> > > > > + * head:                       head of this list
> > > > > + * block_cache:                blocks with free slots
> > > > > + * block_count:                total number of blocks in the list
> > > > > + */
> > > > > +struct block_list {
> > > > > +       spinlock_t lock;
> > > > > +       struct list_head head;
> > > > > +       struct zblock_block *block_cache[BLOCK_CACHE_SIZE];
> > > > > +       unsigned long block_count;
> > > > > +};
> > > > > +
> > > > > +/**
> > > > > + * struct zblock_pool - stores metadata for each zblock pool
> > > > > + * @block_lists:       array of block lists
> > > > > + * @ops:                       pointer to a structure of user defined operations specified at
> > > > > + *                                     pool creation time.
> > > > > + * @zpool:                     zpool driver
> > > > > + * @zpool_ops:         zpool operations structure with an evict callback
> > > > > + * @alloc_flag:                protects block allocation from memory leak
> > > > > + *
> > > > > + * This structure is allocated at pool creation time and maintains metadata
> > > > > + * for a particular zblock pool.
> > > > > + */
> > > > > +struct zblock_pool {
> > > > > +       struct block_list block_lists[ARRAY_SIZE(block_desc)];
> > > > > +       const struct zblock_ops *ops;
> > > > > +       struct zpool *zpool;
> > > > > +       const struct zpool_ops *zpool_ops;
> > > > > +       atomic_t alloc_flag;
> > > > > +};
> > > > > +
> > > > > +/*****************
> > > > > + * Helpers
> > > > > + *****************/
> > > > > +
> > > > > +static void cache_insert_block(struct zblock_block *block, struct block_list *list)
> > > > > +{
> > > > > +       unsigned int i, min_free_slots, min_index;
> > > > > +
> > > > > +       min_free_slots = MAX_SLOTS;
> > > > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > > > +               if (!list->block_cache[i] || !(list->block_cache[i])->free_slots) {
> > > > > +                       list->block_cache[i] = block;
> > > > > +                       return;
> > > > > +               }
> > > > > +               if ((list->block_cache[i])->free_slots < min_free_slots) {
> > > > > +                       min_free_slots = (list->block_cache[i])->free_slots;
> > > > > +                       min_index = i;
> > > > > +               }
> > > > > +       }
> > > > > +       list->block_cache[min_index] = block;
> > > > > +}
> > > > > +
> > > > > +static struct zblock_block *cache_find_block(struct block_list *list)
> > > > > +{
> > > > > +       int i;
> > > > > +
> > > > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > > > +               if (list->block_cache[i] && (list->block_cache[i])->free_slots)
> > > > > +                       return list->block_cache[i];
> > > > > +       }
> > > > > +       return NULL;
> > > > > +}
> > > > > +
> > > > > +static int is_in_cache(struct zblock_block *block, struct block_list *list)
> > > > > +{
> > > > > +       int i;
> > > > > +
> > > > > +       for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
> > > > > +               if (block == list->block_cache[i])
> > > > > +                       return i;
> > > > > +       }
> > > > > +       return -1;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * allocate new block and add it to corresponding block list
> > > > > + */
> > > > > +static struct zblock_block *alloc_block(struct zblock_pool *pool,
> > > > > +                                                                       int block_type, gfp_t gfp)
> > > > > +{
> > > > > +       struct zblock_block *block;
> > > > > +       struct block_list *list;
> > > > > +
> > > > > +       block = (void *)__get_free_pages(gfp, block_desc[block_type].order);
> > > > > +       if (!block)
> > > > > +               return NULL;
> > > > > +
> > > > > +       list = &(pool->block_lists)[block_type];
> > > > > +
> > > > > +       /* init block data  */
> > > > > +       spin_lock_init(&block->lock);
> > > > > +       memset(block->slot_info, SLOT_FREE, block_desc[block_type].slots_per_block);
> > > > > +       block->free_slots = block_desc[block_type].slots_per_block;
> > > > > +       block->under_reclaim = false;
> > > > > +
> > > > > +       spin_lock(&list->lock);
> > > > > +       /* inserting block into list */
> > > > > +       INIT_LIST_HEAD(&block->block_node);
> > > > > +       list_add(&block->block_node, &list->head);
> > > > > +       cache_insert_block(block, list);
> > > > > +       list->block_count++;
> > > > > +       spin_unlock(&list->lock);
> > > > > +       return block;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * Encodes the handle of a particular slot in the pool using metadata
> > > > > + */
> > > > > +static inline unsigned long metadata_to_handle(struct zblock_block *block,
> > > > > +                                                       unsigned int block_type, unsigned int slot)
> > > > > +{
> > > > > +       return (unsigned long)(block) + (block_type << SLOT_BITS) + slot;
> > > > > +}
> > > > > +
> > > > > +/* Returns block, block type and slot in the pool corresponding to handle */
> > > > > +static inline struct zblock_block *handle_to_metadata(unsigned long handle,
> > > > > +                                               unsigned int *block_type, unsigned int *slot)
> > > > > +{
> > > > > +       *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS;
> > > > > +       *slot = handle & SLOT_MASK;
> > > > > +       return (struct zblock_block *)(handle & PAGE_MASK);
> > > > > +}
> > > > > +
> > > > > +
> > > > > +/*****************
> > > > > + * API Functions
> > > > > + *****************/
> > > > > +/**
> > > > > + * zblock_create_pool() - create a new zblock pool
> > > > > + * @gfp:       gfp flags when allocating the zblock pool structure
> > > > > + * @ops:       user-defined operations for the zblock pool
> > > > > + *
> > > > > + * Return: pointer to the new zblock pool or NULL if the metadata allocation
> > > > > + * failed.
> > > > > + */
> > > > > +static struct zblock_pool *zblock_create_pool(gfp_t gfp, const struct zblock_ops *ops)
> > > > > +{
> > > > > +       struct zblock_pool *pool;
> > > > > +       struct block_list *list;
> > > > > +       int i, j;
> > > > > +
> > > > > +       pool = kmalloc(sizeof(struct zblock_pool), gfp);
> > > > > +       if (!pool)
> > > > > +               return NULL;
> > > > > +
> > > > > +       /* init each block list */
> > > > > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > > > > +               list = &(pool->block_lists)[i];
> > > > > +               spin_lock_init(&list->lock);
> > > > > +               INIT_LIST_HEAD(&list->head);
> > > > > +               for (j = 0; j < BLOCK_CACHE_SIZE; j++)
> > > > > +                       list->block_cache[j] = NULL;
> > > > > +               list->block_count = 0;
> > > > > +       }
> > > > > +       pool->ops = ops;
> > > > > +       atomic_set(&pool->alloc_flag, 0);
> > > > > +       return pool;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * zblock_destroy_pool() - destroys an existing zblock pool
> > > > > + * @pool:      the zblock pool to be destroyed
> > > > > + *
> > > > > + */
> > > > > +static void zblock_destroy_pool(struct zblock_pool *pool)
> > > > > +{
> > > > > +       kfree(pool);
> > > > > +}
> > > > > +
> > > > > +
> > > > > +/**
> > > > > + * zblock_alloc() - allocates a slot of appropriate size
> > > > > + * @pool:      zblock pool from which to allocate
> > > > > + * @size:      size in bytes of the desired allocation
> > > > > + * @gfp:       gfp flags used if the pool needs to grow
> > > > > + * @handle:    handle of the new allocation
> > > > > + *
> > > > > + * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
> > > > > + * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
> > > > > + * a new slot.
> > > > > + */
> > > > > +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp,
> > > > > +                       unsigned long *handle)
> > > > > +{
> > > > > +       unsigned int block_type, slot;
> > > > > +       struct zblock_block *block;
> > > > > +       struct block_list *list;
> > > > > +
> > > > > +       if (!size)
> > > > > +               return -EINVAL;
> > > > > +
> > > > > +       if (size > PAGE_SIZE)
> > > > > +               return -ENOSPC;
> > > > > +
> > > > > +       /* find basic block type with suitable slot size */
> > > > > +       for (block_type = 0; block_type < ARRAY_SIZE(block_desc); block_type++) {
> > > > > +               if (size <= block_desc[block_type].slot_size)
> > > > > +                       break;
> > > > > +       }
> > > > > +       list = &(pool->block_lists[block_type]);
> > > > > +
> > > > > +check:
> > > > > +       spin_lock(&list->lock);
> > > > > +       /* check if there are free slots in cache */
> > > > > +       block = cache_find_block(list);
> > > > > +       if (block)
> > > > > +               goto found;
> > > > > +       spin_unlock(&list->lock);
> > > > > +
> > > > > +       /* not found block with free slots try to allocate new empty block */
> > > > > +       if (atomic_cmpxchg(&pool->alloc_flag, 0, 1))
> > > > > +               goto check;
> > > > > +       block = alloc_block(pool, block_type, gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
> > > > > +       if (block) {
> > > > > +               spin_lock(&list->lock);
> > > > > +               goto found;
> > > > > +       }
> > > > > +       atomic_set(&pool->alloc_flag, 0);
> > > > > +       return -ENOMEM;
> > > > > +
> > > > > +found:
> > > > > +       spin_lock(&block->lock);
> > > > > +       block->free_slots--;
> > > > > +       spin_unlock(&list->lock);
> > > > > +       /* find the first free slot in block */
> > > > > +       for (slot = 0; slot < block_desc[block_type].slots_per_block; slot++) {
> > > > > +               if (block->slot_info[slot] == SLOT_FREE)
> > > > > +                       break;
> > > > > +       }
> > > > > +       block->slot_info[slot] = SLOT_OCCUPIED;
> > > > > +       spin_unlock(&block->lock);
> > > > > +       *handle = metadata_to_handle(block, block_type, slot);
> > > > > +       atomic_set(&pool->alloc_flag, 0);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * zblock_free() - frees the allocation associated with the given handle
> > > > > + * @pool:      pool in which the allocation resided
> > > > > + * @handle:    handle associated with the allocation returned by zblock_alloc()
> > > > > + *
> > > > > + */
> > > > > +static void zblock_free(struct zblock_pool *pool, unsigned long handle)
> > > > > +{
> > > > > +       unsigned int slot, block_type;
> > > > > +       struct zblock_block *block;
> > > > > +       struct block_list *list;
> > > > > +       int i;
> > > > > +
> > > > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > > > +       list = &(pool->block_lists[block_type]);
> > > > > +
> > > > > +       if (block->under_reclaim)
> > > > > +               return;
> > > > > +       spin_lock(&list->lock);
> > > > > +       i = is_in_cache(block, list);
> > > > > +       block->free_slots++;
> > > > > +       /* if all slots in block are empty delete whole block */
> > > > > +       if (block->free_slots == block_desc[block_type].slots_per_block) {
> > > > > +               list_del(&block->block_node);
> > > > > +               list->block_count--;
> > > > > +
> > > > > +               /* if cached block to be deleted */
> > > > > +               if (i != -1)
> > > > > +                       list->block_cache[i] = NULL;
> > > > > +               spin_unlock(&list->lock);
> > > > > +               free_pages((unsigned long)block, block_desc[block_type].order);
> > > > > +               return;
> > > > > +       }
> > > > > +       /* if block is not cached update cache */
> > > > > +       if (i == -1)
> > > > > +               cache_insert_block(block, list);
> > > > > +
> > > > > +       spin_lock(&block->lock);
> > > > > +       spin_unlock(&list->lock);
> > > > > +       block->slot_info[slot] = SLOT_FREE;
> > > > > +       spin_unlock(&block->lock);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * zblock_reclaim_block() - evicts allocations from block and frees it
> > > > > + * @pool:      pool from which a block will attempt to be evicted
> > > > > + *
> > > > > + * Returns: pages reclaimed count if block is successfully freed
> > > > > + *          otherwise -EINVAL if there are no blocks to evict
> > > > > + */
> > > > > +static int zblock_reclaim_block(struct zblock_pool *pool)
> > > > > +{
> > > > > +       struct zblock_block *block;
> > > > > +       struct block_list *list;
> > > > > +       unsigned long handle, block_type, slot;
> > > > > +       int ret, i, reclaimed;
> > > > > +
> > > > > +       /* start with list storing blocks with the worst compression and try
> > > > > +        * to evict the first added (oldest) block in this list
> > > > > +        */
> > > > > +       for (block_type = ARRAY_SIZE(block_desc) - 1; block_type >= 0; --block_type) {
> > > > > +               list = &(pool->block_lists[block_type]);
> > > > > +               spin_lock(&list->lock);
> > > > > +
> > > > > +               /* find the oldest block in list */
> > > > > +               block = list_last_entry(&list->head, struct zblock_block, block_node);
> > > > > +
> > > > > +               if (!block) {
> > > > > +                       spin_unlock(&list->lock);
> > > > > +                       continue;
> > > > > +               }
> > > > > +               i = is_in_cache(block, list);
> > > > > +               /* skip iteration if this block is cached */
> > > > > +               if (i != -1) {
> > > > > +                       spin_unlock(&list->lock);
> > > > > +                       continue;
> > > > > +               }
> > > > > +               block->under_reclaim = true;
> > > > > +               spin_unlock(&list->lock);
> > > > > +               reclaimed = 0;
> > > > > +
> > > > > +               /* try to evict all UNMAPPED slots in block */
> > > > > +               for (slot = 0; slot < block_desc[block_type].slots_per_block; ++slot) {
> > > > > +                       if (block->slot_info[slot] == SLOT_UNMAPPED) {
> > > > > +                               handle = metadata_to_handle(block, block_type, slot);
> > > > > +                               ret = pool->ops->evict(pool, handle);
> > > > > +                               if (ret)
> > > > > +                                       break;
> > > > > +
> > > > > +                               ++reclaimed;
> > > > > +                               spin_lock(&block->lock);
> > > > > +                               block->slot_info[slot] = SLOT_FREE;
> > > > > +                               spin_unlock(&block->lock);
> > > > > +                               block->free_slots++;
> > > > > +                       }
> > > > > +               }
> > > > > +               spin_lock(&list->lock);
> > > > > +               /* some occupied slots remained - insert block */
> > > > > +               if (block->free_slots != block_desc[block_type].slots_per_block) {
> > > > > +                       block->under_reclaim = false;
> > > > > +                       cache_insert_block(block, list);
> > > > > +                       spin_unlock(&list->lock);
> > > > > +               } else {
> > > > > +               /* all slots are free - delete this block */
> > > > > +                       list_del(&block->block_node);
> > > > > +                       list->block_count--;
> > > > > +                       spin_unlock(&list->lock);
> > > > > +                       free_pages((unsigned long)block, block_desc[block_type].order);
> > > > > +               }
> > > > > +               if (reclaimed != 0)
> > > > > +                       return reclaimed;
> > > > > +               return -EAGAIN;
> > > > > +       }
> > > > > +       return -EINVAL;
> > > > > +}
> > > > > +
> > > > > +
> > > > > +/**
> > > > > + * zblock_map() - maps the allocation associated with the given handle
> > > > > + * @pool:      pool in which the allocation resides
> > > > > + * @handle:    handle associated with the allocation to be mapped
> > > > > + *
> > > > > + *
> > > > > + * Returns: a pointer to the mapped allocation
> > > > > + */
> > > > > +static void *zblock_map(struct zblock_pool *pool, unsigned long handle)
> > > > > +{
> > > > > +       unsigned int block_type, slot;
> > > > > +       struct zblock_block *block;
> > > > > +
> > > > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > > > +       spin_lock(&block->lock);
> > > > > +       block->slot_info[slot] = SLOT_MAPPED;
> > > > > +       spin_unlock(&block->lock);
> > > > > +       return (void *)(block + 1) + slot * block_desc[block_type].slot_size;
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * zblock_unmap() - unmaps the allocation associated with the given handle
> > > > > + * @pool:      pool in which the allocation resides
> > > > > + * @handle:    handle associated with the allocation to be unmapped
> > > > > + */
> > > > > +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle)
> > > > > +{
> > > > > +       unsigned int block_type, slot;
> > > > > +       struct zblock_block *block;
> > > > > +
> > > > > +       block = handle_to_metadata(handle, &block_type, &slot);
> > > > > +       spin_lock(&block->lock);
> > > > > +       block->slot_info[slot] = SLOT_UNMAPPED;
> > > > > +       spin_unlock(&block->lock);
> > > > > +}
> > > > > +
> > > > > +/**
> > > > > + * zblock_get_pool_size() - gets the zblock pool size in bytes
> > > > > + * @pool:      pool whose size is being queried
> > > > > + *
> > > > > + * Returns: size in bytes of the given pool.
> > > > > + */
> > > > > +static u64 zblock_get_pool_size(struct zblock_pool *pool)
> > > > > +{
> > > > > +       u64 total_size;
> > > > > +       int i;
> > > > > +
> > > > > +       total_size = 0;
> > > > > +       for (i = 0; i < ARRAY_SIZE(block_desc); i++) {
> > > > > +               total_size += (pool->block_lists)[i].block_count
> > > > > +                               * (PAGE_SIZE << block_desc[i].order);
> > > > > +       }
> > > > > +       return total_size;
> > > > > +}
> > > > > +
> > > > > +/*****************
> > > > > + * zpool
> > > > > + ****************/
> > > > > +
> > > > > +static int zblock_zpool_evict(struct zblock_pool *pool, unsigned long handle)
> > > > > +{
> > > > > +       if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
> > > > > +               return pool->zpool_ops->evict(pool->zpool, handle);
> > > > > +       else
> > > > > +               return -ENOENT;
> > > > > +}
> > > > > +
> > > > > +static const struct zblock_ops zblock_zpool_ops = {
> > > > > +       .evict =        zblock_zpool_evict
> > > > > +};
> > > > > +
> > > > > +static void *zblock_zpool_create(const char *name, gfp_t gfp,
> > > > > +                                  const struct zpool_ops *zpool_ops,
> > > > > +                                  struct zpool *zpool)
> > > > > +{
> > > > > +       struct zblock_pool *pool;
> > > > > +
> > > > > +       pool = zblock_create_pool(gfp, &zblock_zpool_ops);
> > > > > +       if (pool) {
> > > > > +               pool->zpool = zpool;
> > > > > +               pool->zpool_ops = zpool_ops;
> > > > > +       }
> > > > > +       return pool;
> > > > > +}
> > > > > +
> > > > > +static void zblock_zpool_destroy(void *pool)
> > > > > +{
> > > > > +       zblock_destroy_pool(pool);
> > > > > +}
> > > > > +
> > > > > +static int zblock_zpool_malloc(void *pool, size_t size, gfp_t gfp,
> > > > > +                       unsigned long *handle)
> > > > > +{
> > > > > +       return zblock_alloc(pool, size, gfp, handle);
> > > > > +}
> > > > > +
> > > > > +static void zblock_zpool_free(void *pool, unsigned long handle)
> > > > > +{
> > > > > +       zblock_free(pool, handle);
> > > > > +}
> > > > > +
> > > > > +static int zblock_zpool_shrink(void *pool, unsigned int pages,
> > > > > +                       unsigned int *reclaimed)
> > > > > +{
> > > > > +       unsigned int total = 0;
> > > > > +       int ret = -EINVAL;
> > > > > +
> > > > > +       while (total < pages) {
> > > > > +               ret = zblock_reclaim_block(pool);
> > > > > +               if (ret < 0)
> > > > > +                       break;
> > > > > +               total += ret;
> > > > > +       }
> > > > > +       if (reclaimed)
> > > > > +               *reclaimed = total;
> > > > > +
> > > > > +       return ret;
> > > > > +}
> > > > > +
> > > > > +static void *zblock_zpool_map(void *pool, unsigned long handle,
> > > > > +                       enum zpool_mapmode mm)
> > > > > +{
> > > > > +       return zblock_map(pool, handle);
> > > > > +}
> > > > > +
> > > > > +static void zblock_zpool_unmap(void *pool, unsigned long handle)
> > > > > +{
> > > > > +       zblock_unmap(pool, handle);
> > > > > +}
> > > > > +
> > > > > +static u64 zblock_zpool_total_size(void *pool)
> > > > > +{
> > > > > +       return zblock_get_pool_size(pool);
> > > > > +}
> > > > > +
> > > > > +static struct zpool_driver zblock_zpool_driver = {
> > > > > +       .type =         "zblock",
> > > > > +       .owner =        THIS_MODULE,
> > > > > +       .create =       zblock_zpool_create,
> > > > > +       .destroy =      zblock_zpool_destroy,
> > > > > +       .malloc =       zblock_zpool_malloc,
> > > > > +       .free =         zblock_zpool_free,
> > > > > +       .shrink =       zblock_zpool_shrink,
> > > > > +       .map =          zblock_zpool_map,
> > > > > +       .unmap =        zblock_zpool_unmap,
> > > > > +       .total_size =   zblock_zpool_total_size,
> > > > > +};
> > > > > +
> > > > > +MODULE_ALIAS("zpool-zblock");
> > > > > +
> > > > > +static int __init init_zblock(void)
> > > > > +{
> > > > > +       pr_info("loaded\n");
> > > > > +       zpool_register_driver(&zblock_zpool_driver);
> > > > > +       return 0;
> > > > > +}
> > > > > +
> > > > > +static void __exit exit_zblock(void)
> > > > > +{
> > > > > +       zpool_unregister_driver(&zblock_zpool_driver);
> > > > > +       pr_info("unloaded\n");
> > > > > +}
> > > > > +
> > > > > +module_init(init_zblock);
> > > > > +module_exit(exit_zblock);
> > > > > +
> > > > > +MODULE_LICENSE("GPL");
> > > > > +MODULE_AUTHOR("Ananda Badmaeb <a.badmaev@clicknet.pro>");
> > > > > +MODULE_DESCRIPTION("Block allocator for compressed pages");
> > > > > --
> > > > > 2.34.1
> > > > >
> > > > >


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-09-29 18:29 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-28  8:01 [PATCH v5] mm: add zblock - new allocator for use via zpool API ananda
2022-09-28 17:29 ` Matthew Wilcox
2022-09-29  5:01   ` Ananda Badmaev
2022-09-28 18:37 ` Yosry Ahmed
2022-09-29  5:24   ` Ananda Badmaev
2022-09-29  6:55     ` Vitaly Wool
2022-09-29  6:55   ` Vitaly Wool
2022-09-29  7:58     ` Yosry Ahmed
2022-09-29  8:53       ` Vitaly Wool
2022-09-29 18:28         ` Yosry Ahmed

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.