linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv3 0/3] z3fold: background page compaction
@ 2016-10-27 11:06 Vitaly Wool
  2016-10-27 11:08 ` [PATCHv3 1/3] z3fold: make counters atomic Vitaly Wool
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Vitaly Wool @ 2016-10-27 11:06 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

The coming patchset is another take on z3fold page layout
optimization problem. The previous solution [1] used
shrinker to solve the issue of in-page space fragmentation
but after some discussions the decision was made to rewrite
background page layout optimization using good old work
queues.

The patchset thus implements in-page compaction worker for
z3fold, preceded by some code optimizations and preparations
which, again, deserved to be separate patches.

Changes compared to v2:
- more accurate accounting of unbuddied_nr, per Dan's
  comments
- various cleanups.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>

[1] https://lkml.org/lkml/2016/10/15/31

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv3 1/3] z3fold: make counters atomic
  2016-10-27 11:06 [PATCHv3 0/3] z3fold: background page compaction Vitaly Wool
@ 2016-10-27 11:08 ` Vitaly Wool
  2016-11-01 20:03   ` Dan Streetman
  2016-10-27 11:12 ` [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock Vitaly Wool
  2016-10-27 11:13 ` [PATCHv3 3/3] z3fold: add compaction worker Vitaly Wool
  2 siblings, 1 reply; 10+ messages in thread
From: Vitaly Wool @ 2016-10-27 11:08 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

This patch converts pages_nr per-pool counter to atomic64_t.
It also introduces a new counter, unbuddied_nr, which is
atomic64_t, too, to track the number of unbuddied (compactable)
z3fold pages.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8f9e89c..5ac325a 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -69,6 +69,7 @@ struct z3fold_ops {
  * @lru:	list tracking the z3fold pages in LRU order by most recently
  *		added buddy.
  * @pages_nr:	number of z3fold pages in the pool.
+ * @unbuddied_nr:	number of unbuddied z3fold pages in the pool.
  * @ops:	pointer to a structure of user defined operations specified at
  *		pool creation time.
  *
@@ -80,7 +81,8 @@ struct z3fold_pool {
 	struct list_head unbuddied[NCHUNKS];
 	struct list_head buddied;
 	struct list_head lru;
-	u64 pages_nr;
+	atomic64_t pages_nr;
+	atomic64_t unbuddied_nr;
 	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
@@ -234,7 +236,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
 	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
-	pool->pages_nr = 0;
+	atomic64_set(&pool->pages_nr, 0);
+	atomic64_set(&pool->unbuddied_nr, 0);
 	pool->ops = ops;
 	return pool;
 }
@@ -334,6 +337,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 					continue;
 				}
 				list_del(&zhdr->buddy);
+				atomic64_dec(&pool->unbuddied_nr);
 				goto found;
 			}
 		}
@@ -346,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 	if (!page)
 		return -ENOMEM;
 	spin_lock(&pool->lock);
-	pool->pages_nr++;
+	atomic64_inc(&pool->pages_nr);
 	zhdr = init_z3fold_page(page);
 
 	if (bud == HEADLESS) {
@@ -369,6 +373,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		/* Add to unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		atomic64_inc(&pool->unbuddied_nr);
 	} else {
 		/* Add to buddied list */
 		list_add(&zhdr->buddy, &pool->buddied);
@@ -412,6 +417,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		/* HEADLESS page stored */
 		bud = HEADLESS;
 	} else {
+		bool is_unbuddied = zhdr->first_chunks == 0 ||
+				zhdr->middle_chunks == 0 ||
+				zhdr->last_chunks == 0;
+
 		bud = handle_to_buddy(handle);
 
 		switch (bud) {
@@ -431,6 +440,8 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 			spin_unlock(&pool->lock);
 			return;
 		}
+		if (is_unbuddied)
+			atomic64_dec(&pool->unbuddied_nr);
 	}
 
 	if (test_bit(UNDER_RECLAIM, &page->private)) {
@@ -451,12 +462,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		list_del(&page->lru);
 		clear_bit(PAGE_HEADLESS, &page->private);
 		free_z3fold_page(zhdr);
-		pool->pages_nr--;
+		atomic64_dec(&pool->pages_nr);
 	} else {
 		z3fold_compact_page(zhdr);
 		/* Add to the unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		atomic64_inc(&pool->unbuddied_nr);
 	}
 
 	spin_unlock(&pool->lock);
@@ -520,6 +532,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		zhdr = page_address(page);
 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			list_del(&zhdr->buddy);
+			if (zhdr->first_chunks == 0 ||
+			    zhdr->middle_chunks == 0 ||
+			    zhdr->last_chunks == 0)
+				atomic64_dec(&pool->unbuddied_nr);
+
 			/*
 			 * We need encode the handles before unlocking, since
 			 * we can race with free that will set
@@ -569,7 +586,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			 */
 			clear_bit(PAGE_HEADLESS, &page->private);
 			free_z3fold_page(zhdr);
-			pool->pages_nr--;
+			atomic64_dec(&pool->pages_nr);
 			spin_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
@@ -584,6 +601,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				freechunks = num_free_chunks(zhdr);
 				list_add(&zhdr->buddy,
 					 &pool->unbuddied[freechunks]);
+				atomic64_inc(&pool->unbuddied_nr);
 			}
 		}
 
@@ -672,12 +690,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
  * z3fold_get_pool_size() - gets the z3fold pool size in pages
  * @pool:	pool whose size is being queried
  *
- * Returns: size in pages of the given pool.  The pool lock need not be
- * taken to access pages_nr.
+ * Returns: size in pages of the given pool.
  */
 static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
 {
-	return pool->pages_nr;
+	return atomic64_read(&pool->pages_nr);
 }
 
 /*****************
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock
  2016-10-27 11:06 [PATCHv3 0/3] z3fold: background page compaction Vitaly Wool
  2016-10-27 11:08 ` [PATCHv3 1/3] z3fold: make counters atomic Vitaly Wool
@ 2016-10-27 11:12 ` Vitaly Wool
  2016-11-01 20:16   ` Dan Streetman
  2016-10-27 11:13 ` [PATCHv3 3/3] z3fold: add compaction worker Vitaly Wool
  2 siblings, 1 reply; 10+ messages in thread
From: Vitaly Wool @ 2016-10-27 11:12 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

Mapping/unmapping goes with no actual modifications so it makes
sense to only take a read lock in map/unmap functions.

This change gives up to 10% performance gain and lower latencies
in fio sequential read/write tests, e.g. before:

Run status group 0 (all jobs):
  WRITE: io=2700.0GB, aggrb=3234.8MB/s, minb=276031KB/s, maxb=277391KB/s, mint=850530msec, maxt=854720msec

Run status group 1 (all jobs):
   READ: io=2700.0GB, aggrb=4838.6MB/s, minb=412888KB/s, maxb=424969KB/s, mint=555168msec, maxt=571412msec

after:
Run status group 0 (all jobs):
  WRITE: io=2700.0GB, aggrb=3284.2MB/s, minb=280249KB/s, maxb=281130KB/s, mint=839218msec, maxt=841856msec

Run status group 1 (all jobs):
   READ: io=2700.0GB, aggrb=5210.7MB/s, minb=444640KB/s, maxb=447791KB/s, mint=526874msec, maxt=530607msec

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 5ac325a..014d84f 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -77,7 +77,7 @@ struct z3fold_ops {
  * pertaining to a particular z3fold pool.
  */
 struct z3fold_pool {
-	spinlock_t lock;
+	rwlock_t lock;
 	struct list_head unbuddied[NCHUNKS];
 	struct list_head buddied;
 	struct list_head lru;
@@ -231,7 +231,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 	pool = kzalloc(sizeof(struct z3fold_pool), gfp);
 	if (!pool)
 		return NULL;
-	spin_lock_init(&pool->lock);
+	rwlock_init(&pool->lock);
 	for_each_unbuddied_list(i, 0)
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
 	INIT_LIST_HEAD(&pool->buddied);
@@ -312,7 +312,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		bud = HEADLESS;
 	else {
 		chunks = size_to_chunks(size);
-		spin_lock(&pool->lock);
+		write_lock(&pool->lock);
 
 		/* First, try to find an unbuddied z3fold page. */
 		zhdr = NULL;
@@ -342,14 +342,14 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 			}
 		}
 		bud = FIRST;
-		spin_unlock(&pool->lock);
+		write_unlock(&pool->lock);
 	}
 
 	/* Couldn't find unbuddied z3fold page, create new one */
 	page = alloc_page(gfp);
 	if (!page)
 		return -ENOMEM;
-	spin_lock(&pool->lock);
+	write_lock(&pool->lock);
 	atomic64_inc(&pool->pages_nr);
 	zhdr = init_z3fold_page(page);
 
@@ -387,7 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 	list_add(&page->lru, &pool->lru);
 
 	*handle = encode_handle(zhdr, bud);
-	spin_unlock(&pool->lock);
+	write_unlock(&pool->lock);
 
 	return 0;
 }
@@ -409,7 +409,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy bud;
 
-	spin_lock(&pool->lock);
+	write_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
@@ -437,7 +437,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		default:
 			pr_err("%s: unknown bud %d\n", __func__, bud);
 			WARN_ON(1);
-			spin_unlock(&pool->lock);
+			write_unlock(&pool->lock);
 			return;
 		}
 		if (is_unbuddied)
@@ -446,7 +446,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 
 	if (test_bit(UNDER_RECLAIM, &page->private)) {
 		/* z3fold page is under reclaim, reclaim will free */
-		spin_unlock(&pool->lock);
+		write_unlock(&pool->lock);
 		return;
 	}
 
@@ -471,7 +471,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		atomic64_inc(&pool->unbuddied_nr);
 	}
 
-	spin_unlock(&pool->lock);
+	write_unlock(&pool->lock);
 }
 
 /**
@@ -517,10 +517,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 	struct page *page;
 	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
 
-	spin_lock(&pool->lock);
+	read_lock(&pool->lock);
 	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
 			retries == 0) {
-		spin_unlock(&pool->lock);
+		read_unlock(&pool->lock);
 		return -EINVAL;
 	}
 	for (i = 0; i < retries; i++) {
@@ -556,7 +556,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			last_handle = middle_handle = 0;
 		}
 
-		spin_unlock(&pool->lock);
+		read_unlock(&pool->lock);
 
 		/* Issue the eviction callback(s) */
 		if (middle_handle) {
@@ -575,7 +575,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				goto next;
 		}
 next:
-		spin_lock(&pool->lock);
+		write_lock(&pool->lock);
 		clear_bit(UNDER_RECLAIM, &page->private);
 		if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
 		    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
@@ -587,7 +587,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			clear_bit(PAGE_HEADLESS, &page->private);
 			free_z3fold_page(zhdr);
 			atomic64_dec(&pool->pages_nr);
-			spin_unlock(&pool->lock);
+			write_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			if (zhdr->first_chunks != 0 &&
@@ -607,8 +607,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 
 		/* add to beginning of LRU */
 		list_add(&page->lru, &pool->lru);
+		write_unlock(&pool->lock);
+		read_lock(&pool->lock);
 	}
-	spin_unlock(&pool->lock);
+	read_unlock(&pool->lock);
 	return -EAGAIN;
 }
 
@@ -629,7 +631,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 	void *addr;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
+	read_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	addr = zhdr;
 	page = virt_to_page(zhdr);
@@ -656,7 +658,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 		break;
 	}
 out:
-	spin_unlock(&pool->lock);
+	read_unlock(&pool->lock);
 	return addr;
 }
 
@@ -671,19 +673,19 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
+	read_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
 	if (test_bit(PAGE_HEADLESS, &page->private)) {
-		spin_unlock(&pool->lock);
+		read_unlock(&pool->lock);
 		return;
 	}
 
 	buddy = handle_to_buddy(handle);
 	if (buddy == MIDDLE)
 		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
-	spin_unlock(&pool->lock);
+	read_unlock(&pool->lock);
 }
 
 /**
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv3 3/3] z3fold: add compaction worker
  2016-10-27 11:06 [PATCHv3 0/3] z3fold: background page compaction Vitaly Wool
  2016-10-27 11:08 ` [PATCHv3 1/3] z3fold: make counters atomic Vitaly Wool
  2016-10-27 11:12 ` [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock Vitaly Wool
@ 2016-10-27 11:13 ` Vitaly Wool
  2016-11-01 22:13   ` Dan Streetman
  2 siblings, 1 reply; 10+ messages in thread
From: Vitaly Wool @ 2016-10-27 11:13 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

This patch implements compaction worker thread for z3fold. This
worker does not free up any pages directly but it allows for a
denser placement of compressed objects which results in less
actual pages consumed and higher compression ratio therefore.

This patch has been checked with the latest Linus's tree.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 140 insertions(+), 26 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 014d84f..cc26ff5 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/preempt.h>
+#include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/zpool.h>
@@ -59,6 +60,7 @@ struct z3fold_ops {
 
 /**
  * struct z3fold_pool - stores metadata for each z3fold pool
+ * @name:	pool name
  * @lock:	protects all pool fields and first|last_chunk fields of any
  *		z3fold page in the pool
  * @unbuddied:	array of lists tracking z3fold pages that contain 2- buddies;
@@ -72,11 +74,14 @@ struct z3fold_ops {
  * @unbuddied_nr:	number of unbuddied z3fold pages in the pool.
  * @ops:	pointer to a structure of user defined operations specified at
  *		pool creation time.
+ * @compact_wq:	workqueue for page layout background optimization
+ * @work:	compaction work item
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular z3fold pool.
  */
 struct z3fold_pool {
+	const char *name;
 	rwlock_t lock;
 	struct list_head unbuddied[NCHUNKS];
 	struct list_head buddied;
@@ -86,6 +91,8 @@ struct z3fold_pool {
 	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
+	struct workqueue_struct *compact_wq;
+	struct delayed_work work;
 };
 
 enum buddy {
@@ -121,6 +128,7 @@ enum z3fold_page_flags {
 	UNDER_RECLAIM = 0,
 	PAGE_HEADLESS,
 	MIDDLE_CHUNK_MAPPED,
+	COMPACTION_DEFERRED,
 };
 
 /*****************
@@ -136,6 +144,9 @@ static int size_to_chunks(size_t size)
 #define for_each_unbuddied_list(_iter, _begin) \
 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
 
+#define for_each_unbuddied_list_reverse(_iter, _end) \
+	for ((_iter) = (_end); (_iter) > 0; (_iter)--)
+
 /* Initializes the z3fold header of a newly allocated z3fold page */
 static struct z3fold_header *init_z3fold_page(struct page *page)
 {
@@ -145,6 +156,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 	clear_bit(UNDER_RECLAIM, &page->private);
 	clear_bit(PAGE_HEADLESS, &page->private);
 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
+	clear_bit(COMPACTION_DEFERRED, &page->private);
 
 	zhdr->first_chunks = 0;
 	zhdr->middle_chunks = 0;
@@ -211,6 +223,116 @@ static int num_free_chunks(struct z3fold_header *zhdr)
 	return nfree;
 }
 
+static inline void *mchunk_memmove(struct z3fold_header *zhdr,
+				unsigned short dst_chunk)
+{
+	void *beg = zhdr;
+	return memmove(beg + (dst_chunk << CHUNK_SHIFT),
+		       beg + (zhdr->start_middle << CHUNK_SHIFT),
+		       zhdr->middle_chunks << CHUNK_SHIFT);
+}
+
+static int z3fold_compact_page(struct z3fold_header *zhdr, bool sync)
+{
+	struct page *page = virt_to_page(zhdr);
+	int ret = 0;
+
+	if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) {
+		set_bit(COMPACTION_DEFERRED, &page->private);
+		ret = -1;
+		goto out;
+	}
+
+	clear_bit(COMPACTION_DEFERRED, &page->private);
+	if (zhdr->middle_chunks != 0) {
+		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+			/*
+			 * If we are here, no one can access this page
+			 * except for z3fold_map or z3fold_free. Both
+			 * will wait for page_lock to become free.
+			 */
+			mchunk_memmove(zhdr, 1); /* move to the beginning */
+			zhdr->first_chunks = zhdr->middle_chunks;
+			zhdr->middle_chunks = 0;
+			zhdr->start_middle = 0;
+			zhdr->first_num++;
+			ret = 1;
+			goto out;
+		}
+		if (sync)
+			goto out;
+
+		/*
+		 * These are more complicated cases: first or last object
+		 * may be mapped. Luckily we don't touch these anyway.
+		 *
+		 * NB: moving data is expensive, so let's only do that if
+		 * there's substantial gain (2+ chunks)
+		 */
+		if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+		    zhdr->start_middle > zhdr->first_chunks + 2) {
+			mchunk_memmove(zhdr, zhdr->first_chunks + 1);
+			zhdr->start_middle = zhdr->first_chunks + 1;
+			ret = 1;
+			goto out;
+		}
+		if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+		    zhdr->middle_chunks + zhdr->last_chunks <=
+		    NCHUNKS - zhdr->start_middle - 2) {
+			unsigned short new_start = NCHUNKS - zhdr->last_chunks -
+				zhdr->middle_chunks;
+			mchunk_memmove(zhdr, new_start);
+			zhdr->start_middle = new_start;
+			ret = 1;
+			goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+#define COMPACTION_BATCH	(NCHUNKS/2)
+static void z3fold_compact_work(struct work_struct *w)
+{
+	struct z3fold_pool *pool = container_of(to_delayed_work(w),
+						struct z3fold_pool, work);
+	struct z3fold_header *zhdr;
+	struct page *page;
+	int i, ret, compacted = 0;
+	bool requeue = false;
+
+	write_lock(&pool->lock);
+	for_each_unbuddied_list_reverse(i, NCHUNKS - 3) {
+		zhdr = list_empty(&pool->unbuddied[i]) ?
+				NULL : list_last_entry(&pool->unbuddied[i],
+				struct z3fold_header, buddy);
+		if (!zhdr)
+			continue;
+		page = virt_to_page(zhdr);
+		if (likely(!test_bit(COMPACTION_DEFERRED, &page->private)))
+			continue;
+		list_del(&zhdr->buddy);
+		ret = z3fold_compact_page(zhdr, false);
+		if (ret < 0)
+			requeue = true;
+		else
+			compacted += ret;
+		write_unlock(&pool->lock);
+		cond_resched();
+		write_lock(&pool->lock);
+		list_add(&zhdr->buddy,
+			&pool->unbuddied[num_free_chunks(zhdr)]);
+		if (compacted >= COMPACTION_BATCH) {
+			requeue = true;
+			break;
+		}
+	}
+	write_unlock(&pool->lock);
+	if (requeue && !delayed_work_pending(&pool->work))
+		queue_delayed_work(pool->compact_wq, &pool->work, HZ);
+}
+
+
 /*****************
  * API Functions
 *****************/
@@ -230,7 +352,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 
 	pool = kzalloc(sizeof(struct z3fold_pool), gfp);
 	if (!pool)
-		return NULL;
+		goto out;
 	rwlock_init(&pool->lock);
 	for_each_unbuddied_list(i, 0)
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
@@ -238,8 +360,17 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 	INIT_LIST_HEAD(&pool->lru);
 	atomic64_set(&pool->pages_nr, 0);
 	atomic64_set(&pool->unbuddied_nr, 0);
+	pool->compact_wq = create_singlethread_workqueue(pool->name);
+	if (!pool->compact_wq)
+		goto out;
+	INIT_DELAYED_WORK(&pool->work, z3fold_compact_work);
 	pool->ops = ops;
 	return pool;
+
+out:
+	if (pool)
+		kfree(pool);
+	return NULL;
 }
 
 /**
@@ -250,31 +381,11 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
  */
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
+	if (pool->compact_wq)
+		destroy_workqueue(pool->compact_wq);
 	kfree(pool);
 }
 
-/* Has to be called with lock held */
-static int z3fold_compact_page(struct z3fold_header *zhdr)
-{
-	struct page *page = virt_to_page(zhdr);
-	void *beg = zhdr;
-
-
-	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
-	    zhdr->middle_chunks != 0 &&
-	    zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
-		memmove(beg + ZHDR_SIZE_ALIGNED,
-			beg + (zhdr->start_middle << CHUNK_SHIFT),
-			zhdr->middle_chunks << CHUNK_SHIFT);
-		zhdr->first_chunks = zhdr->middle_chunks;
-		zhdr->middle_chunks = 0;
-		zhdr->start_middle = 0;
-		zhdr->first_num++;
-		return 1;
-	}
-	return 0;
-}
-
 /**
  * z3fold_alloc() - allocates a region of a given size
  * @pool:	z3fold pool from which to allocate
@@ -464,11 +575,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		free_z3fold_page(zhdr);
 		atomic64_dec(&pool->pages_nr);
 	} else {
-		z3fold_compact_page(zhdr);
+		set_bit(COMPACTION_DEFERRED, &page->private);
 		/* Add to the unbuddied list */
 		freechunks = num_free_chunks(zhdr);
-		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		list_add_tail(&zhdr->buddy, &pool->unbuddied[freechunks]);
 		atomic64_inc(&pool->unbuddied_nr);
+		if (!delayed_work_pending(&pool->work))
+			queue_delayed_work(pool->compact_wq, &pool->work, HZ);
 	}
 
 	write_unlock(&pool->lock);
@@ -596,7 +709,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				/* Full, add to buddied list */
 				list_add(&zhdr->buddy, &pool->buddied);
 			} else {
-				z3fold_compact_page(zhdr);
+				z3fold_compact_page(zhdr, true);
 				/* add to unbuddied list */
 				freechunks = num_free_chunks(zhdr);
 				list_add(&zhdr->buddy,
@@ -725,6 +838,7 @@ static void *z3fold_zpool_create(const char *name, gfp_t gfp,
 	if (pool) {
 		pool->zpool = zpool;
 		pool->zpool_ops = zpool_ops;
+		pool->name = name;
 	}
 	return pool;
 }
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 1/3] z3fold: make counters atomic
  2016-10-27 11:08 ` [PATCHv3 1/3] z3fold: make counters atomic Vitaly Wool
@ 2016-11-01 20:03   ` Dan Streetman
  2016-11-01 21:03     ` Vitaly Wool
  0 siblings, 1 reply; 10+ messages in thread
From: Dan Streetman @ 2016-11-01 20:03 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Thu, Oct 27, 2016 at 7:08 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> This patch converts pages_nr per-pool counter to atomic64_t.
> It also introduces a new counter, unbuddied_nr, which is
> atomic64_t, too, to track the number of unbuddied (compactable)
> z3fold pages.

so, with the use of workqueues, do we still need the unbuddied_nr
counter?  It doesn't seem to be used in later patches?

changing the pages_nr to atomic is a good idea though, so we can
safely read it without needing the pool lock.

>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 33 +++++++++++++++++++++++++--------
>  1 file changed, 25 insertions(+), 8 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 8f9e89c..5ac325a 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -69,6 +69,7 @@ struct z3fold_ops {
>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>   *             added buddy.
>   * @pages_nr:  number of z3fold pages in the pool.
> + * @unbuddied_nr:      number of unbuddied z3fold pages in the pool.
>   * @ops:       pointer to a structure of user defined operations specified at
>   *             pool creation time.
>   *
> @@ -80,7 +81,8 @@ struct z3fold_pool {
>         struct list_head unbuddied[NCHUNKS];
>         struct list_head buddied;
>         struct list_head lru;
> -       u64 pages_nr;
> +       atomic64_t pages_nr;
> +       atomic64_t unbuddied_nr;
>         const struct z3fold_ops *ops;
>         struct zpool *zpool;
>         const struct zpool_ops *zpool_ops;
> @@ -234,7 +236,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>         INIT_LIST_HEAD(&pool->buddied);
>         INIT_LIST_HEAD(&pool->lru);
> -       pool->pages_nr = 0;
> +       atomic64_set(&pool->pages_nr, 0);
> +       atomic64_set(&pool->unbuddied_nr, 0);
>         pool->ops = ops;
>         return pool;
>  }
> @@ -334,6 +337,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                                         continue;
>                                 }
>                                 list_del(&zhdr->buddy);
> +                               atomic64_dec(&pool->unbuddied_nr);
>                                 goto found;
>                         }
>                 }
> @@ -346,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>         if (!page)
>                 return -ENOMEM;
>         spin_lock(&pool->lock);
> -       pool->pages_nr++;
> +       atomic64_inc(&pool->pages_nr);
>         zhdr = init_z3fold_page(page);
>
>         if (bud == HEADLESS) {
> @@ -369,6 +373,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 /* Add to unbuddied list */
>                 freechunks = num_free_chunks(zhdr);
>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               atomic64_inc(&pool->unbuddied_nr);
>         } else {
>                 /* Add to buddied list */
>                 list_add(&zhdr->buddy, &pool->buddied);
> @@ -412,6 +417,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 /* HEADLESS page stored */
>                 bud = HEADLESS;
>         } else {
> +               bool is_unbuddied = zhdr->first_chunks == 0 ||
> +                               zhdr->middle_chunks == 0 ||
> +                               zhdr->last_chunks == 0;
> +
>                 bud = handle_to_buddy(handle);
>
>                 switch (bud) {
> @@ -431,6 +440,8 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                         spin_unlock(&pool->lock);
>                         return;
>                 }
> +               if (is_unbuddied)
> +                       atomic64_dec(&pool->unbuddied_nr);
>         }
>
>         if (test_bit(UNDER_RECLAIM, &page->private)) {
> @@ -451,12 +462,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 list_del(&page->lru);
>                 clear_bit(PAGE_HEADLESS, &page->private);
>                 free_z3fold_page(zhdr);
> -               pool->pages_nr--;
> +               atomic64_dec(&pool->pages_nr);
>         } else {
>                 z3fold_compact_page(zhdr);
>                 /* Add to the unbuddied list */
>                 freechunks = num_free_chunks(zhdr);
>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               atomic64_inc(&pool->unbuddied_nr);
>         }
>
>         spin_unlock(&pool->lock);
> @@ -520,6 +532,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                 zhdr = page_address(page);
>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>                         list_del(&zhdr->buddy);
> +                       if (zhdr->first_chunks == 0 ||
> +                           zhdr->middle_chunks == 0 ||
> +                           zhdr->last_chunks == 0)
> +                               atomic64_dec(&pool->unbuddied_nr);
> +
>                         /*
>                          * We need encode the handles before unlocking, since
>                          * we can race with free that will set
> @@ -569,7 +586,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                          */
>                         clear_bit(PAGE_HEADLESS, &page->private);
>                         free_z3fold_page(zhdr);
> -                       pool->pages_nr--;
> +                       atomic64_dec(&pool->pages_nr);
>                         spin_unlock(&pool->lock);
>                         return 0;
>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
> @@ -584,6 +601,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 freechunks = num_free_chunks(zhdr);
>                                 list_add(&zhdr->buddy,
>                                          &pool->unbuddied[freechunks]);
> +                               atomic64_inc(&pool->unbuddied_nr);
>                         }
>                 }
>
> @@ -672,12 +690,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>   * z3fold_get_pool_size() - gets the z3fold pool size in pages
>   * @pool:      pool whose size is being queried
>   *
> - * Returns: size in pages of the given pool.  The pool lock need not be
> - * taken to access pages_nr.
> + * Returns: size in pages of the given pool.
>   */
>  static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
>  {
> -       return pool->pages_nr;
> +       return atomic64_read(&pool->pages_nr);
>  }
>
>  /*****************
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock
  2016-10-27 11:12 ` [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock Vitaly Wool
@ 2016-11-01 20:16   ` Dan Streetman
  2016-11-01 21:05     ` Vitaly Wool
  0 siblings, 1 reply; 10+ messages in thread
From: Dan Streetman @ 2016-11-01 20:16 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Thu, Oct 27, 2016 at 7:12 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> Mapping/unmapping goes with no actual modifications so it makes
> sense to only take a read lock in map/unmap functions.
>
> This change gives up to 10% performance gain and lower latencies
> in fio sequential read/write tests, e.g. before:
>
> Run status group 0 (all jobs):
>   WRITE: io=2700.0GB, aggrb=3234.8MB/s, minb=276031KB/s, maxb=277391KB/s, mint=850530msec, maxt=854720msec
>
> Run status group 1 (all jobs):
>    READ: io=2700.0GB, aggrb=4838.6MB/s, minb=412888KB/s, maxb=424969KB/s, mint=555168msec, maxt=571412msec
>
> after:
> Run status group 0 (all jobs):
>   WRITE: io=2700.0GB, aggrb=3284.2MB/s, minb=280249KB/s, maxb=281130KB/s, mint=839218msec, maxt=841856msec
>
> Run status group 1 (all jobs):
>    READ: io=2700.0GB, aggrb=5210.7MB/s, minb=444640KB/s, maxb=447791KB/s, mint=526874msec, maxt=530607msec
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 44 +++++++++++++++++++++++---------------------
>  1 file changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 5ac325a..014d84f 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -77,7 +77,7 @@ struct z3fold_ops {
>   * pertaining to a particular z3fold pool.
>   */
>  struct z3fold_pool {
> -       spinlock_t lock;
> +       rwlock_t lock;
>         struct list_head unbuddied[NCHUNKS];
>         struct list_head buddied;
>         struct list_head lru;
> @@ -231,7 +231,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>         pool = kzalloc(sizeof(struct z3fold_pool), gfp);
>         if (!pool)
>                 return NULL;
> -       spin_lock_init(&pool->lock);
> +       rwlock_init(&pool->lock);
>         for_each_unbuddied_list(i, 0)
>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>         INIT_LIST_HEAD(&pool->buddied);
> @@ -312,7 +312,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 bud = HEADLESS;
>         else {
>                 chunks = size_to_chunks(size);
> -               spin_lock(&pool->lock);
> +               write_lock(&pool->lock);
>
>                 /* First, try to find an unbuddied z3fold page. */
>                 zhdr = NULL;
> @@ -342,14 +342,14 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                         }
>                 }
>                 bud = FIRST;
> -               spin_unlock(&pool->lock);
> +               write_unlock(&pool->lock);
>         }
>
>         /* Couldn't find unbuddied z3fold page, create new one */
>         page = alloc_page(gfp);
>         if (!page)
>                 return -ENOMEM;
> -       spin_lock(&pool->lock);
> +       write_lock(&pool->lock);
>         atomic64_inc(&pool->pages_nr);
>         zhdr = init_z3fold_page(page);
>
> @@ -387,7 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>         list_add(&page->lru, &pool->lru);
>
>         *handle = encode_handle(zhdr, bud);
> -       spin_unlock(&pool->lock);
> +       write_unlock(&pool->lock);
>
>         return 0;
>  }
> @@ -409,7 +409,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>         struct page *page;
>         enum buddy bud;
>
> -       spin_lock(&pool->lock);
> +       write_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         page = virt_to_page(zhdr);
>
> @@ -437,7 +437,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 default:
>                         pr_err("%s: unknown bud %d\n", __func__, bud);
>                         WARN_ON(1);
> -                       spin_unlock(&pool->lock);
> +                       write_unlock(&pool->lock);
>                         return;
>                 }
>                 if (is_unbuddied)
> @@ -446,7 +446,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>
>         if (test_bit(UNDER_RECLAIM, &page->private)) {
>                 /* z3fold page is under reclaim, reclaim will free */
> -               spin_unlock(&pool->lock);
> +               write_unlock(&pool->lock);
>                 return;
>         }
>
> @@ -471,7 +471,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 atomic64_inc(&pool->unbuddied_nr);
>         }
>
> -       spin_unlock(&pool->lock);
> +       write_unlock(&pool->lock);
>  }
>
>  /**
> @@ -517,10 +517,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>         struct page *page;
>         unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
>
> -       spin_lock(&pool->lock);
> +       read_lock(&pool->lock);

this needs the write lock, as we actually change the pool's lists
inside the for loop below, e.g.:

                page = list_last_entry(&pool->lru, struct page, lru);
                list_del(&page->lru);

as well as:

                        list_del(&zhdr->buddy);

>         if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
>                         retries == 0) {
> -               spin_unlock(&pool->lock);
> +               read_unlock(&pool->lock);
>                 return -EINVAL;
>         }
>         for (i = 0; i < retries; i++) {
> @@ -556,7 +556,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                         last_handle = middle_handle = 0;
>                 }
>
> -               spin_unlock(&pool->lock);
> +               read_unlock(&pool->lock);
>
>                 /* Issue the eviction callback(s) */
>                 if (middle_handle) {
> @@ -575,7 +575,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 goto next;
>                 }
>  next:
> -               spin_lock(&pool->lock);
> +               write_lock(&pool->lock);
>                 clear_bit(UNDER_RECLAIM, &page->private);
>                 if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>                     (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
> @@ -587,7 +587,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                         clear_bit(PAGE_HEADLESS, &page->private);
>                         free_z3fold_page(zhdr);
>                         atomic64_dec(&pool->pages_nr);
> -                       spin_unlock(&pool->lock);
> +                       write_unlock(&pool->lock);
>                         return 0;
>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>                         if (zhdr->first_chunks != 0 &&
> @@ -607,8 +607,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>
>                 /* add to beginning of LRU */
>                 list_add(&page->lru, &pool->lru);
> +               write_unlock(&pool->lock);
> +               read_lock(&pool->lock);
>         }
> -       spin_unlock(&pool->lock);
> +       read_unlock(&pool->lock);
>         return -EAGAIN;
>  }
>
> @@ -629,7 +631,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>         void *addr;
>         enum buddy buddy;
>
> -       spin_lock(&pool->lock);
> +       read_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         addr = zhdr;
>         page = virt_to_page(zhdr);
> @@ -656,7 +658,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>                 break;
>         }
>  out:
> -       spin_unlock(&pool->lock);
> +       read_unlock(&pool->lock);
>         return addr;
>  }
>
> @@ -671,19 +673,19 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>         struct page *page;
>         enum buddy buddy;
>
> -       spin_lock(&pool->lock);
> +       read_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         page = virt_to_page(zhdr);
>
>         if (test_bit(PAGE_HEADLESS, &page->private)) {
> -               spin_unlock(&pool->lock);
> +               read_unlock(&pool->lock);
>                 return;
>         }
>
>         buddy = handle_to_buddy(handle);
>         if (buddy == MIDDLE)
>                 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> -       spin_unlock(&pool->lock);
> +       read_unlock(&pool->lock);
>  }
>
>  /**
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 1/3] z3fold: make counters atomic
  2016-11-01 20:03   ` Dan Streetman
@ 2016-11-01 21:03     ` Vitaly Wool
  2016-11-01 22:16       ` Dan Streetman
  0 siblings, 1 reply; 10+ messages in thread
From: Vitaly Wool @ 2016-11-01 21:03 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Tue, Nov 1, 2016 at 9:03 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Thu, Oct 27, 2016 at 7:08 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> This patch converts pages_nr per-pool counter to atomic64_t.
>> It also introduces a new counter, unbuddied_nr, which is
>> atomic64_t, too, to track the number of unbuddied (compactable)
>> z3fold pages.
>
> so, with the use of workqueues, do we still need the unbuddied_nr
> counter?  It doesn't seem to be used in later patches?

I'm going to add sysfs/debugfs accounting a bit later so it won't rest unused.

Also, with a per-page lock (if I come up with something reasonable) it could
be still worth going back to shrinker.

> changing the pages_nr to atomic is a good idea though, so we can
> safely read it without needing the pool lock.
>
>>
>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>> ---
>>  mm/z3fold.c | 33 +++++++++++++++++++++++++--------
>>  1 file changed, 25 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index 8f9e89c..5ac325a 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -69,6 +69,7 @@ struct z3fold_ops {
>>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>>   *             added buddy.
>>   * @pages_nr:  number of z3fold pages in the pool.
>> + * @unbuddied_nr:      number of unbuddied z3fold pages in the pool.
>>   * @ops:       pointer to a structure of user defined operations specified at
>>   *             pool creation time.
>>   *
>> @@ -80,7 +81,8 @@ struct z3fold_pool {
>>         struct list_head unbuddied[NCHUNKS];
>>         struct list_head buddied;
>>         struct list_head lru;
>> -       u64 pages_nr;
>> +       atomic64_t pages_nr;
>> +       atomic64_t unbuddied_nr;
>>         const struct z3fold_ops *ops;
>>         struct zpool *zpool;
>>         const struct zpool_ops *zpool_ops;
>> @@ -234,7 +236,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>>         INIT_LIST_HEAD(&pool->buddied);
>>         INIT_LIST_HEAD(&pool->lru);
>> -       pool->pages_nr = 0;
>> +       atomic64_set(&pool->pages_nr, 0);
>> +       atomic64_set(&pool->unbuddied_nr, 0);
>>         pool->ops = ops;
>>         return pool;
>>  }
>> @@ -334,6 +337,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                                         continue;
>>                                 }
>>                                 list_del(&zhdr->buddy);
>> +                               atomic64_dec(&pool->unbuddied_nr);
>>                                 goto found;
>>                         }
>>                 }
>> @@ -346,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>         if (!page)
>>                 return -ENOMEM;
>>         spin_lock(&pool->lock);
>> -       pool->pages_nr++;
>> +       atomic64_inc(&pool->pages_nr);
>>         zhdr = init_z3fold_page(page);
>>
>>         if (bud == HEADLESS) {
>> @@ -369,6 +373,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                 /* Add to unbuddied list */
>>                 freechunks = num_free_chunks(zhdr);
>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> +               atomic64_inc(&pool->unbuddied_nr);
>>         } else {
>>                 /* Add to buddied list */
>>                 list_add(&zhdr->buddy, &pool->buddied);
>> @@ -412,6 +417,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>                 /* HEADLESS page stored */
>>                 bud = HEADLESS;
>>         } else {
>> +               bool is_unbuddied = zhdr->first_chunks == 0 ||
>> +                               zhdr->middle_chunks == 0 ||
>> +                               zhdr->last_chunks == 0;
>> +
>>                 bud = handle_to_buddy(handle);
>>
>>                 switch (bud) {
>> @@ -431,6 +440,8 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>                         spin_unlock(&pool->lock);
>>                         return;
>>                 }
>> +               if (is_unbuddied)
>> +                       atomic64_dec(&pool->unbuddied_nr);
>>         }
>>
>>         if (test_bit(UNDER_RECLAIM, &page->private)) {
>> @@ -451,12 +462,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>                 list_del(&page->lru);
>>                 clear_bit(PAGE_HEADLESS, &page->private);
>>                 free_z3fold_page(zhdr);
>> -               pool->pages_nr--;
>> +               atomic64_dec(&pool->pages_nr);
>>         } else {
>>                 z3fold_compact_page(zhdr);
>>                 /* Add to the unbuddied list */
>>                 freechunks = num_free_chunks(zhdr);
>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> +               atomic64_inc(&pool->unbuddied_nr);
>>         }
>>
>>         spin_unlock(&pool->lock);
>> @@ -520,6 +532,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                 zhdr = page_address(page);
>>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>                         list_del(&zhdr->buddy);
>> +                       if (zhdr->first_chunks == 0 ||
>> +                           zhdr->middle_chunks == 0 ||
>> +                           zhdr->last_chunks == 0)
>> +                               atomic64_dec(&pool->unbuddied_nr);
>> +
>>                         /*
>>                          * We need encode the handles before unlocking, since
>>                          * we can race with free that will set
>> @@ -569,7 +586,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                          */
>>                         clear_bit(PAGE_HEADLESS, &page->private);
>>                         free_z3fold_page(zhdr);
>> -                       pool->pages_nr--;
>> +                       atomic64_dec(&pool->pages_nr);
>>                         spin_unlock(&pool->lock);
>>                         return 0;
>>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>> @@ -584,6 +601,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                                 freechunks = num_free_chunks(zhdr);
>>                                 list_add(&zhdr->buddy,
>>                                          &pool->unbuddied[freechunks]);
>> +                               atomic64_inc(&pool->unbuddied_nr);
>>                         }
>>                 }
>>
>> @@ -672,12 +690,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>>   * z3fold_get_pool_size() - gets the z3fold pool size in pages
>>   * @pool:      pool whose size is being queried
>>   *
>> - * Returns: size in pages of the given pool.  The pool lock need not be
>> - * taken to access pages_nr.
>> + * Returns: size in pages of the given pool.
>>   */
>>  static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
>>  {
>> -       return pool->pages_nr;
>> +       return atomic64_read(&pool->pages_nr);
>>  }
>>
>>  /*****************
>> --
>> 2.4.2

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock
  2016-11-01 20:16   ` Dan Streetman
@ 2016-11-01 21:05     ` Vitaly Wool
  0 siblings, 0 replies; 10+ messages in thread
From: Vitaly Wool @ 2016-11-01 21:05 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Tue, Nov 1, 2016 at 9:16 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Thu, Oct 27, 2016 at 7:12 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> Mapping/unmapping goes with no actual modifications so it makes
>> sense to only take a read lock in map/unmap functions.
>>
>> This change gives up to 10% performance gain and lower latencies
>> in fio sequential read/write tests, e.g. before:
>>
>> Run status group 0 (all jobs):
>>   WRITE: io=2700.0GB, aggrb=3234.8MB/s, minb=276031KB/s, maxb=277391KB/s, mint=850530msec, maxt=854720msec
>>
>> Run status group 1 (all jobs):
>>    READ: io=2700.0GB, aggrb=4838.6MB/s, minb=412888KB/s, maxb=424969KB/s, mint=555168msec, maxt=571412msec
>>
>> after:
>> Run status group 0 (all jobs):
>>   WRITE: io=2700.0GB, aggrb=3284.2MB/s, minb=280249KB/s, maxb=281130KB/s, mint=839218msec, maxt=841856msec
>>
>> Run status group 1 (all jobs):
>>    READ: io=2700.0GB, aggrb=5210.7MB/s, minb=444640KB/s, maxb=447791KB/s, mint=526874msec, maxt=530607msec
>>
>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>> ---
>>  mm/z3fold.c | 44 +++++++++++++++++++++++---------------------
>>  1 file changed, 23 insertions(+), 21 deletions(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index 5ac325a..014d84f 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -77,7 +77,7 @@ struct z3fold_ops {
>>   * pertaining to a particular z3fold pool.
>>   */
>>  struct z3fold_pool {
>> -       spinlock_t lock;
>> +       rwlock_t lock;
>>         struct list_head unbuddied[NCHUNKS];
>>         struct list_head buddied;
>>         struct list_head lru;
>> @@ -231,7 +231,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>         pool = kzalloc(sizeof(struct z3fold_pool), gfp);
>>         if (!pool)
>>                 return NULL;
>> -       spin_lock_init(&pool->lock);
>> +       rwlock_init(&pool->lock);
>>         for_each_unbuddied_list(i, 0)
>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>>         INIT_LIST_HEAD(&pool->buddied);
>> @@ -312,7 +312,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                 bud = HEADLESS;
>>         else {
>>                 chunks = size_to_chunks(size);
>> -               spin_lock(&pool->lock);
>> +               write_lock(&pool->lock);
>>
>>                 /* First, try to find an unbuddied z3fold page. */
>>                 zhdr = NULL;
>> @@ -342,14 +342,14 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                         }
>>                 }
>>                 bud = FIRST;
>> -               spin_unlock(&pool->lock);
>> +               write_unlock(&pool->lock);
>>         }
>>
>>         /* Couldn't find unbuddied z3fold page, create new one */
>>         page = alloc_page(gfp);
>>         if (!page)
>>                 return -ENOMEM;
>> -       spin_lock(&pool->lock);
>> +       write_lock(&pool->lock);
>>         atomic64_inc(&pool->pages_nr);
>>         zhdr = init_z3fold_page(page);
>>
>> @@ -387,7 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>         list_add(&page->lru, &pool->lru);
>>
>>         *handle = encode_handle(zhdr, bud);
>> -       spin_unlock(&pool->lock);
>> +       write_unlock(&pool->lock);
>>
>>         return 0;
>>  }
>> @@ -409,7 +409,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>         struct page *page;
>>         enum buddy bud;
>>
>> -       spin_lock(&pool->lock);
>> +       write_lock(&pool->lock);
>>         zhdr = handle_to_z3fold_header(handle);
>>         page = virt_to_page(zhdr);
>>
>> @@ -437,7 +437,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>                 default:
>>                         pr_err("%s: unknown bud %d\n", __func__, bud);
>>                         WARN_ON(1);
>> -                       spin_unlock(&pool->lock);
>> +                       write_unlock(&pool->lock);
>>                         return;
>>                 }
>>                 if (is_unbuddied)
>> @@ -446,7 +446,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>
>>         if (test_bit(UNDER_RECLAIM, &page->private)) {
>>                 /* z3fold page is under reclaim, reclaim will free */
>> -               spin_unlock(&pool->lock);
>> +               write_unlock(&pool->lock);
>>                 return;
>>         }
>>
>> @@ -471,7 +471,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>                 atomic64_inc(&pool->unbuddied_nr);
>>         }
>>
>> -       spin_unlock(&pool->lock);
>> +       write_unlock(&pool->lock);
>>  }
>>
>>  /**
>> @@ -517,10 +517,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>         struct page *page;
>>         unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
>>
>> -       spin_lock(&pool->lock);
>> +       read_lock(&pool->lock);
>
> this needs the write lock, as we actually change the pool's lists
> inside the for loop below, e.g.:
>
>                 page = list_last_entry(&pool->lru, struct page, lru);
>                 list_del(&page->lru);
>
> as well as:
>
>                         list_del(&zhdr->buddy);
>

Oh right, thanks. FWIW changing the lock there doesn't seem to have
any impact on fio results anyway.

>>         if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
>>                         retries == 0) {
>> -               spin_unlock(&pool->lock);
>> +               read_unlock(&pool->lock);
>>                 return -EINVAL;
>>         }
>>         for (i = 0; i < retries; i++) {
>> @@ -556,7 +556,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                         last_handle = middle_handle = 0;
>>                 }
>>
>> -               spin_unlock(&pool->lock);
>> +               read_unlock(&pool->lock);
>>
>>                 /* Issue the eviction callback(s) */
>>                 if (middle_handle) {
>> @@ -575,7 +575,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                                 goto next;
>>                 }
>>  next:
>> -               spin_lock(&pool->lock);
>> +               write_lock(&pool->lock);
>>                 clear_bit(UNDER_RECLAIM, &page->private);
>>                 if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>>                     (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
>> @@ -587,7 +587,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                         clear_bit(PAGE_HEADLESS, &page->private);
>>                         free_z3fold_page(zhdr);
>>                         atomic64_dec(&pool->pages_nr);
>> -                       spin_unlock(&pool->lock);
>> +                       write_unlock(&pool->lock);
>>                         return 0;
>>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>                         if (zhdr->first_chunks != 0 &&
>> @@ -607,8 +607,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>
>>                 /* add to beginning of LRU */
>>                 list_add(&page->lru, &pool->lru);
>> +               write_unlock(&pool->lock);
>> +               read_lock(&pool->lock);
>>         }
>> -       spin_unlock(&pool->lock);
>> +       read_unlock(&pool->lock);
>>         return -EAGAIN;
>>  }
>>
>> @@ -629,7 +631,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>>         void *addr;
>>         enum buddy buddy;
>>
>> -       spin_lock(&pool->lock);
>> +       read_lock(&pool->lock);
>>         zhdr = handle_to_z3fold_header(handle);
>>         addr = zhdr;
>>         page = virt_to_page(zhdr);
>> @@ -656,7 +658,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>>                 break;
>>         }
>>  out:
>> -       spin_unlock(&pool->lock);
>> +       read_unlock(&pool->lock);
>>         return addr;
>>  }
>>
>> @@ -671,19 +673,19 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>>         struct page *page;
>>         enum buddy buddy;
>>
>> -       spin_lock(&pool->lock);
>> +       read_lock(&pool->lock);
>>         zhdr = handle_to_z3fold_header(handle);
>>         page = virt_to_page(zhdr);
>>
>>         if (test_bit(PAGE_HEADLESS, &page->private)) {
>> -               spin_unlock(&pool->lock);
>> +               read_unlock(&pool->lock);
>>                 return;
>>         }
>>
>>         buddy = handle_to_buddy(handle);
>>         if (buddy == MIDDLE)
>>                 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>> -       spin_unlock(&pool->lock);
>> +       read_unlock(&pool->lock);
>>  }
>>
>>  /**
>> --
>> 2.4.2

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 3/3] z3fold: add compaction worker
  2016-10-27 11:13 ` [PATCHv3 3/3] z3fold: add compaction worker Vitaly Wool
@ 2016-11-01 22:13   ` Dan Streetman
  0 siblings, 0 replies; 10+ messages in thread
From: Dan Streetman @ 2016-11-01 22:13 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Thu, Oct 27, 2016 at 7:13 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> This patch implements compaction worker thread for z3fold. This
> worker does not free up any pages directly but it allows for a
> denser placement of compressed objects which results in less
> actual pages consumed and higher compression ratio therefore.
>
> This patch has been checked with the latest Linus's tree.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 140 insertions(+), 26 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 014d84f..cc26ff5 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -27,6 +27,7 @@
>  #include <linux/mm.h>
>  #include <linux/module.h>
>  #include <linux/preempt.h>
> +#include <linux/workqueue.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
>  #include <linux/zpool.h>
> @@ -59,6 +60,7 @@ struct z3fold_ops {
>
>  /**
>   * struct z3fold_pool - stores metadata for each z3fold pool
> + * @name:      pool name
>   * @lock:      protects all pool fields and first|last_chunk fields of any
>   *             z3fold page in the pool
>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
> @@ -72,11 +74,14 @@ struct z3fold_ops {
>   * @unbuddied_nr:      number of unbuddied z3fold pages in the pool.
>   * @ops:       pointer to a structure of user defined operations specified at
>   *             pool creation time.
> + * @compact_wq:        workqueue for page layout background optimization
> + * @work:      compaction work item
>   *
>   * This structure is allocated at pool creation time and maintains metadata
>   * pertaining to a particular z3fold pool.
>   */
>  struct z3fold_pool {
> +       const char *name;
>         rwlock_t lock;
>         struct list_head unbuddied[NCHUNKS];
>         struct list_head buddied;
> @@ -86,6 +91,8 @@ struct z3fold_pool {
>         const struct z3fold_ops *ops;
>         struct zpool *zpool;
>         const struct zpool_ops *zpool_ops;
> +       struct workqueue_struct *compact_wq;
> +       struct delayed_work work;
>  };
>
>  enum buddy {
> @@ -121,6 +128,7 @@ enum z3fold_page_flags {
>         UNDER_RECLAIM = 0,
>         PAGE_HEADLESS,
>         MIDDLE_CHUNK_MAPPED,
> +       COMPACTION_DEFERRED,
>  };
>
>  /*****************
> @@ -136,6 +144,9 @@ static int size_to_chunks(size_t size)
>  #define for_each_unbuddied_list(_iter, _begin) \
>         for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
>
> +#define for_each_unbuddied_list_reverse(_iter, _end) \
> +       for ((_iter) = (_end); (_iter) > 0; (_iter)--)
> +
>  /* Initializes the z3fold header of a newly allocated z3fold page */
>  static struct z3fold_header *init_z3fold_page(struct page *page)
>  {
> @@ -145,6 +156,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>         clear_bit(UNDER_RECLAIM, &page->private);
>         clear_bit(PAGE_HEADLESS, &page->private);
>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> +       clear_bit(COMPACTION_DEFERRED, &page->private);
>
>         zhdr->first_chunks = 0;
>         zhdr->middle_chunks = 0;
> @@ -211,6 +223,116 @@ static int num_free_chunks(struct z3fold_header *zhdr)
>         return nfree;
>  }
>
> +static inline void *mchunk_memmove(struct z3fold_header *zhdr,
> +                               unsigned short dst_chunk)
> +{
> +       void *beg = zhdr;
> +       return memmove(beg + (dst_chunk << CHUNK_SHIFT),
> +                      beg + (zhdr->start_middle << CHUNK_SHIFT),
> +                      zhdr->middle_chunks << CHUNK_SHIFT);
> +}
> +
> +static int z3fold_compact_page(struct z3fold_header *zhdr, bool sync)
> +{
> +       struct page *page = virt_to_page(zhdr);
> +       int ret = 0;
> +
> +       if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) {
> +               set_bit(COMPACTION_DEFERRED, &page->private);
> +               ret = -1;
> +               goto out;
> +       }
> +
> +       clear_bit(COMPACTION_DEFERRED, &page->private);
> +       if (zhdr->middle_chunks != 0) {
> +               if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> +                       /*
> +                        * If we are here, no one can access this page
> +                        * except for z3fold_map or z3fold_free. Both
> +                        * will wait for page_lock to become free.
> +                        */
> +                       mchunk_memmove(zhdr, 1); /* move to the beginning */
> +                       zhdr->first_chunks = zhdr->middle_chunks;
> +                       zhdr->middle_chunks = 0;
> +                       zhdr->start_middle = 0;
> +                       zhdr->first_num++;
> +                       ret = 1;
> +                       goto out;
> +               }
> +               if (sync)
> +                       goto out;
> +
> +               /*
> +                * These are more complicated cases: first or last object
> +                * may be mapped. Luckily we don't touch these anyway.
> +                *
> +                * NB: moving data is expensive, so let's only do that if
> +                * there's substantial gain (2+ chunks)
> +                */
> +               if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
> +                   zhdr->start_middle > zhdr->first_chunks + 2) {
> +                       mchunk_memmove(zhdr, zhdr->first_chunks + 1);
> +                       zhdr->start_middle = zhdr->first_chunks + 1;
> +                       ret = 1;
> +                       goto out;
> +               }
> +               if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
> +                   zhdr->middle_chunks + zhdr->last_chunks <=
> +                   NCHUNKS - zhdr->start_middle - 2) {
> +                       unsigned short new_start = NCHUNKS - zhdr->last_chunks -
> +                               zhdr->middle_chunks;
> +                       mchunk_memmove(zhdr, new_start);
> +                       zhdr->start_middle = new_start;
> +                       ret = 1;
> +                       goto out;
> +               }
> +       }
> +out:
> +       return ret;
> +}
> +
> +#define COMPACTION_BATCH       (NCHUNKS/2)
> +static void z3fold_compact_work(struct work_struct *w)
> +{
> +       struct z3fold_pool *pool = container_of(to_delayed_work(w),
> +                                               struct z3fold_pool, work);
> +       struct z3fold_header *zhdr;
> +       struct page *page;
> +       int i, ret, compacted = 0;
> +       bool requeue = false;
> +
> +       write_lock(&pool->lock);
> +       for_each_unbuddied_list_reverse(i, NCHUNKS - 3) {

do we still need to scan each unbuddied list?  This is a lot of
overhead when we already knew exactly what page(s) we marked as
compaction deferred.

also, this is only scanning the last entry in each unbuddied list -
but that's possibly missing a whole lot of entries assuming the
unbuddied lists are larger than 1 entry each.

Can't we add one more list_head element to the zhdr, and add a
compaction_needed list to the pool?  Then the workqueue work can just
quickly process all the entries in that list.

> +               zhdr = list_empty(&pool->unbuddied[i]) ?
> +                               NULL : list_last_entry(&pool->unbuddied[i],
> +                               struct z3fold_header, buddy);
> +               if (!zhdr)
> +                       continue;
> +               page = virt_to_page(zhdr);
> +               if (likely(!test_bit(COMPACTION_DEFERRED, &page->private)))
> +                       continue;
> +               list_del(&zhdr->buddy);
> +               ret = z3fold_compact_page(zhdr, false);
> +               if (ret < 0)
> +                       requeue = true;
> +               else
> +                       compacted += ret;
> +               write_unlock(&pool->lock);
> +               cond_resched();
> +               write_lock(&pool->lock);
> +               list_add(&zhdr->buddy,
> +                       &pool->unbuddied[num_free_chunks(zhdr)]);
> +               if (compacted >= COMPACTION_BATCH) {

instead of doing batches, if we have a dedicated list of entries that
we know need compaction, just process the entire list.  call
cond_resched() between compacting entries.

> +                       requeue = true;
> +                       break;
> +               }
> +       }
> +       write_unlock(&pool->lock);
> +       if (requeue && !delayed_work_pending(&pool->work))
> +               queue_delayed_work(pool->compact_wq, &pool->work, HZ);
> +}
> +
> +
>  /*****************
>   * API Functions
>  *****************/
> @@ -230,7 +352,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>
>         pool = kzalloc(sizeof(struct z3fold_pool), gfp);
>         if (!pool)
> -               return NULL;
> +               goto out;
>         rwlock_init(&pool->lock);
>         for_each_unbuddied_list(i, 0)
>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
> @@ -238,8 +360,17 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>         INIT_LIST_HEAD(&pool->lru);
>         atomic64_set(&pool->pages_nr, 0);
>         atomic64_set(&pool->unbuddied_nr, 0);
> +       pool->compact_wq = create_singlethread_workqueue(pool->name);
> +       if (!pool->compact_wq)
> +               goto out;
> +       INIT_DELAYED_WORK(&pool->work, z3fold_compact_work);
>         pool->ops = ops;
>         return pool;
> +
> +out:
> +       if (pool)
> +               kfree(pool);
> +       return NULL;
>  }
>
>  /**
> @@ -250,31 +381,11 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>   */
>  static void z3fold_destroy_pool(struct z3fold_pool *pool)
>  {
> +       if (pool->compact_wq)
> +               destroy_workqueue(pool->compact_wq);
>         kfree(pool);
>  }
>
> -/* Has to be called with lock held */
> -static int z3fold_compact_page(struct z3fold_header *zhdr)
> -{
> -       struct page *page = virt_to_page(zhdr);
> -       void *beg = zhdr;
> -
> -
> -       if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
> -           zhdr->middle_chunks != 0 &&
> -           zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> -               memmove(beg + ZHDR_SIZE_ALIGNED,
> -                       beg + (zhdr->start_middle << CHUNK_SHIFT),
> -                       zhdr->middle_chunks << CHUNK_SHIFT);
> -               zhdr->first_chunks = zhdr->middle_chunks;
> -               zhdr->middle_chunks = 0;
> -               zhdr->start_middle = 0;
> -               zhdr->first_num++;
> -               return 1;
> -       }
> -       return 0;
> -}
> -
>  /**
>   * z3fold_alloc() - allocates a region of a given size
>   * @pool:      z3fold pool from which to allocate
> @@ -464,11 +575,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 free_z3fold_page(zhdr);
>                 atomic64_dec(&pool->pages_nr);
>         } else {
> -               z3fold_compact_page(zhdr);
> +               set_bit(COMPACTION_DEFERRED, &page->private);

can't we at least try to compact inline?  at minimum, we should throw
this to the compact function to mark for compaction, if needed - we
can certainly at least do the zhdr placement calculations to see if
compaction is needed, even if we don't want to actually do the
compaction right now.

>                 /* Add to the unbuddied list */
>                 freechunks = num_free_chunks(zhdr);
> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               list_add_tail(&zhdr->buddy, &pool->unbuddied[freechunks]);
>                 atomic64_inc(&pool->unbuddied_nr);
> +               if (!delayed_work_pending(&pool->work))
> +                       queue_delayed_work(pool->compact_wq, &pool->work, HZ);
>         }
>
>         write_unlock(&pool->lock);
> @@ -596,7 +709,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 /* Full, add to buddied list */
>                                 list_add(&zhdr->buddy, &pool->buddied);
>                         } else {
> -                               z3fold_compact_page(zhdr);
> +                               z3fold_compact_page(zhdr, true);
>                                 /* add to unbuddied list */
>                                 freechunks = num_free_chunks(zhdr);
>                                 list_add(&zhdr->buddy,
> @@ -725,6 +838,7 @@ static void *z3fold_zpool_create(const char *name, gfp_t gfp,
>         if (pool) {
>                 pool->zpool = zpool;
>                 pool->zpool_ops = zpool_ops;
> +               pool->name = name;
>         }
>         return pool;
>  }
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3 1/3] z3fold: make counters atomic
  2016-11-01 21:03     ` Vitaly Wool
@ 2016-11-01 22:16       ` Dan Streetman
  0 siblings, 0 replies; 10+ messages in thread
From: Dan Streetman @ 2016-11-01 22:16 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Tue, Nov 1, 2016 at 5:03 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
> On Tue, Nov 1, 2016 at 9:03 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>> On Thu, Oct 27, 2016 at 7:08 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>>> This patch converts pages_nr per-pool counter to atomic64_t.
>>> It also introduces a new counter, unbuddied_nr, which is
>>> atomic64_t, too, to track the number of unbuddied (compactable)
>>> z3fold pages.
>>
>> so, with the use of workqueues, do we still need the unbuddied_nr
>> counter?  It doesn't seem to be used in later patches?
>
> I'm going to add sysfs/debugfs accounting a bit later so it won't rest unused.
>
> Also, with a per-page lock (if I come up with something reasonable) it could
> be still worth going back to shrinker.

let's wait to add it once the code that uses it is ready, yeah?

>
>> changing the pages_nr to atomic is a good idea though, so we can
>> safely read it without needing the pool lock.
>>
>>>
>>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>>> ---
>>>  mm/z3fold.c | 33 +++++++++++++++++++++++++--------
>>>  1 file changed, 25 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>> index 8f9e89c..5ac325a 100644
>>> --- a/mm/z3fold.c
>>> +++ b/mm/z3fold.c
>>> @@ -69,6 +69,7 @@ struct z3fold_ops {
>>>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>>>   *             added buddy.
>>>   * @pages_nr:  number of z3fold pages in the pool.
>>> + * @unbuddied_nr:      number of unbuddied z3fold pages in the pool.
>>>   * @ops:       pointer to a structure of user defined operations specified at
>>>   *             pool creation time.
>>>   *
>>> @@ -80,7 +81,8 @@ struct z3fold_pool {
>>>         struct list_head unbuddied[NCHUNKS];
>>>         struct list_head buddied;
>>>         struct list_head lru;
>>> -       u64 pages_nr;
>>> +       atomic64_t pages_nr;
>>> +       atomic64_t unbuddied_nr;
>>>         const struct z3fold_ops *ops;
>>>         struct zpool *zpool;
>>>         const struct zpool_ops *zpool_ops;
>>> @@ -234,7 +236,8 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>>>         INIT_LIST_HEAD(&pool->buddied);
>>>         INIT_LIST_HEAD(&pool->lru);
>>> -       pool->pages_nr = 0;
>>> +       atomic64_set(&pool->pages_nr, 0);
>>> +       atomic64_set(&pool->unbuddied_nr, 0);
>>>         pool->ops = ops;
>>>         return pool;
>>>  }
>>> @@ -334,6 +337,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>                                         continue;
>>>                                 }
>>>                                 list_del(&zhdr->buddy);
>>> +                               atomic64_dec(&pool->unbuddied_nr);
>>>                                 goto found;
>>>                         }
>>>                 }
>>> @@ -346,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>         if (!page)
>>>                 return -ENOMEM;
>>>         spin_lock(&pool->lock);
>>> -       pool->pages_nr++;
>>> +       atomic64_inc(&pool->pages_nr);
>>>         zhdr = init_z3fold_page(page);
>>>
>>>         if (bud == HEADLESS) {
>>> @@ -369,6 +373,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>                 /* Add to unbuddied list */
>>>                 freechunks = num_free_chunks(zhdr);
>>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> +               atomic64_inc(&pool->unbuddied_nr);
>>>         } else {
>>>                 /* Add to buddied list */
>>>                 list_add(&zhdr->buddy, &pool->buddied);
>>> @@ -412,6 +417,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>>                 /* HEADLESS page stored */
>>>                 bud = HEADLESS;
>>>         } else {
>>> +               bool is_unbuddied = zhdr->first_chunks == 0 ||
>>> +                               zhdr->middle_chunks == 0 ||
>>> +                               zhdr->last_chunks == 0;
>>> +
>>>                 bud = handle_to_buddy(handle);
>>>
>>>                 switch (bud) {
>>> @@ -431,6 +440,8 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>>                         spin_unlock(&pool->lock);
>>>                         return;
>>>                 }
>>> +               if (is_unbuddied)
>>> +                       atomic64_dec(&pool->unbuddied_nr);
>>>         }
>>>
>>>         if (test_bit(UNDER_RECLAIM, &page->private)) {
>>> @@ -451,12 +462,13 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>>                 list_del(&page->lru);
>>>                 clear_bit(PAGE_HEADLESS, &page->private);
>>>                 free_z3fold_page(zhdr);
>>> -               pool->pages_nr--;
>>> +               atomic64_dec(&pool->pages_nr);
>>>         } else {
>>>                 z3fold_compact_page(zhdr);
>>>                 /* Add to the unbuddied list */
>>>                 freechunks = num_free_chunks(zhdr);
>>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> +               atomic64_inc(&pool->unbuddied_nr);
>>>         }
>>>
>>>         spin_unlock(&pool->lock);
>>> @@ -520,6 +532,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>                 zhdr = page_address(page);
>>>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>>                         list_del(&zhdr->buddy);
>>> +                       if (zhdr->first_chunks == 0 ||
>>> +                           zhdr->middle_chunks == 0 ||
>>> +                           zhdr->last_chunks == 0)
>>> +                               atomic64_dec(&pool->unbuddied_nr);
>>> +
>>>                         /*
>>>                          * We need encode the handles before unlocking, since
>>>                          * we can race with free that will set
>>> @@ -569,7 +586,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>                          */
>>>                         clear_bit(PAGE_HEADLESS, &page->private);
>>>                         free_z3fold_page(zhdr);
>>> -                       pool->pages_nr--;
>>> +                       atomic64_dec(&pool->pages_nr);
>>>                         spin_unlock(&pool->lock);
>>>                         return 0;
>>>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>> @@ -584,6 +601,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>                                 freechunks = num_free_chunks(zhdr);
>>>                                 list_add(&zhdr->buddy,
>>>                                          &pool->unbuddied[freechunks]);
>>> +                               atomic64_inc(&pool->unbuddied_nr);
>>>                         }
>>>                 }
>>>
>>> @@ -672,12 +690,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>>>   * z3fold_get_pool_size() - gets the z3fold pool size in pages
>>>   * @pool:      pool whose size is being queried
>>>   *
>>> - * Returns: size in pages of the given pool.  The pool lock need not be
>>> - * taken to access pages_nr.
>>> + * Returns: size in pages of the given pool.
>>>   */
>>>  static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
>>>  {
>>> -       return pool->pages_nr;
>>> +       return atomic64_read(&pool->pages_nr);
>>>  }
>>>
>>>  /*****************
>>> --
>>> 2.4.2

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-11-01 22:16 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-27 11:06 [PATCHv3 0/3] z3fold: background page compaction Vitaly Wool
2016-10-27 11:08 ` [PATCHv3 1/3] z3fold: make counters atomic Vitaly Wool
2016-11-01 20:03   ` Dan Streetman
2016-11-01 21:03     ` Vitaly Wool
2016-11-01 22:16       ` Dan Streetman
2016-10-27 11:12 ` [PATCHv3 2/3] z3fold: change per-pool spinlock to rwlock Vitaly Wool
2016-11-01 20:16   ` Dan Streetman
2016-11-01 21:05     ` Vitaly Wool
2016-10-27 11:13 ` [PATCHv3 3/3] z3fold: add compaction worker Vitaly Wool
2016-11-01 22:13   ` Dan Streetman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).