linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH/RESEND v2 0/5] z3fold optimizations and fixes
@ 2017-01-11 14:59 Vitaly Wool
  2017-01-11 15:06 ` [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic Vitaly Wool
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 14:59 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

This is a consolidation of z3fold optimizations and fixes done so far, revised after comments from Dan ([1], [2], [3], [4]).
The coming patches are to be applied on top of the following commit:

Author: zhong jiang <zhongjiang@huawei.com>
Date:   Tue Dec 20 11:53:40 2016 +1100

    mm/z3fold.c: limit first_num to the actual range of possible buddy indexes

All the z3fold patches newer than this one are considered obsolete and should be substituted with this patch series. The coming patches have been verified with linux-next tree.

[1] https://lkml.org/lkml/2016/11/29/969
[2] https://lkml.org/lkml/2017/1/4/586
[3] https://lkml.org/lkml/2017/1/4/604
[4] https://lkml.org/lkml/2017/1/4/737

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic
  2017-01-11 14:59 [PATCH/RESEND v2 0/5] z3fold optimizations and fixes Vitaly Wool
@ 2017-01-11 15:06 ` Vitaly Wool
  2017-01-11 16:09   ` Dan Streetman
  2017-01-11 15:06 ` [PATCH/RESEND v2 2/5] z3fold: fix header size related issues Vitaly Wool
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 15:06 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

This patch converts pages_nr per-pool counter to atomic64_t.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 207e5dd..2273789 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -80,7 +80,7 @@ struct z3fold_pool {
 	struct list_head unbuddied[NCHUNKS];
 	struct list_head buddied;
 	struct list_head lru;
-	u64 pages_nr;
+	atomic64_t pages_nr;
 	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
@@ -238,7 +238,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
 	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
-	pool->pages_nr = 0;
+	atomic64_set(&pool->pages_nr, 0);
 	pool->ops = ops;
 	return pool;
 }
@@ -350,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 	if (!page)
 		return -ENOMEM;
 	spin_lock(&pool->lock);
-	pool->pages_nr++;
+	atomic64_inc(&pool->pages_nr);
 	zhdr = init_z3fold_page(page);
 
 	if (bud == HEADLESS) {
@@ -443,10 +443,9 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		return;
 	}
 
-	if (bud != HEADLESS) {
-		/* Remove from existing buddy list */
+	/* Remove from existing buddy list */
+	if (bud != HEADLESS)
 		list_del(&zhdr->buddy);
-	}
 
 	if (bud == HEADLESS ||
 	    (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
@@ -455,7 +454,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		list_del(&page->lru);
 		clear_bit(PAGE_HEADLESS, &page->private);
 		free_z3fold_page(zhdr);
-		pool->pages_nr--;
+		atomic64_dec(&pool->pages_nr);
 	} else {
 		z3fold_compact_page(zhdr);
 		/* Add to the unbuddied list */
@@ -573,7 +572,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			 */
 			clear_bit(PAGE_HEADLESS, &page->private);
 			free_z3fold_page(zhdr);
-			pool->pages_nr--;
+			atomic64_dec(&pool->pages_nr);
 			spin_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
@@ -676,12 +675,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
  * z3fold_get_pool_size() - gets the z3fold pool size in pages
  * @pool:	pool whose size is being queried
  *
- * Returns: size in pages of the given pool.  The pool lock need not be
- * taken to access pages_nr.
+ * Returns: size in pages of the given pool.
  */
 static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
 {
-	return pool->pages_nr;
+	return atomic64_read(&pool->pages_nr);
 }
 
 /*****************
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH/RESEND v2 2/5] z3fold: fix header size related issues
  2017-01-11 14:59 [PATCH/RESEND v2 0/5] z3fold optimizations and fixes Vitaly Wool
  2017-01-11 15:06 ` [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic Vitaly Wool
@ 2017-01-11 15:06 ` Vitaly Wool
  2017-01-11 16:24   ` Dan Streetman
  2017-01-11 15:06 ` [PATCH/RESEND v2 3/5] z3fold: extend compaction function Vitaly Wool
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 15:06 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

Currently the whole kernel build will be stopped if the size of struct
z3fold_header is greater than the size of one chunk, which is 64 bytes by
default. This patch instead defines the offset for z3fold objects as the
size of the z3fold header in chunks.

Fixed also are the calculation of num_free_chunks() and the address to
move the middle chunk to in case of in-page compaction in
z3fold_compact_page().

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 114 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 64 insertions(+), 50 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 2273789..98ab01f 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -34,29 +34,58 @@
 /*****************
  * Structures
 *****************/
+struct z3fold_pool;
+struct z3fold_ops {
+	int (*evict)(struct z3fold_pool *pool, unsigned long handle);
+};
+
+enum buddy {
+	HEADLESS = 0,
+	FIRST,
+	MIDDLE,
+	LAST,
+	BUDDIES_MAX
+};
+
+/*
+ * struct z3fold_header - z3fold page metadata occupying the first chunk of each
+ *			z3fold page, except for HEADLESS pages
+ * @buddy:	links the z3fold page into the relevant list in the pool
+ * @first_chunks:	the size of the first buddy in chunks, 0 if free
+ * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
+ * @last_chunks:	the size of the last buddy in chunks, 0 if free
+ * @first_num:		the starting number (for the first handle)
+ */
+struct z3fold_header {
+	struct list_head buddy;
+	unsigned short first_chunks;
+	unsigned short middle_chunks;
+	unsigned short last_chunks;
+	unsigned short start_middle;
+	unsigned short first_num:2;
+};
+
 /*
  * NCHUNKS_ORDER determines the internal allocation granularity, effectively
  * adjusting internal fragmentation.  It also determines the number of
  * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
- * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
- * in allocated page is occupied by z3fold header, NCHUNKS will be calculated
- * to 63 which shows the max number of free chunks in z3fold page, also there
- * will be 63 freelists per pool.
+ * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
+ * in the beginning of an allocated page are occupied by z3fold header, so
+ * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
+ * which shows the max number of free chunks in z3fold page, also there will
+ * be 63, or 62, respectively, freelists per pool.
  */
 #define NCHUNKS_ORDER	6
 
 #define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
 #define CHUNK_SIZE	(1 << CHUNK_SHIFT)
-#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
+#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
+#define ZHDR_CHUNKS	(ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
+#define TOTAL_CHUNKS	(PAGE_SIZE >> CHUNK_SHIFT)
 #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
 
 #define BUDDY_MASK	(0x3)
 
-struct z3fold_pool;
-struct z3fold_ops {
-	int (*evict)(struct z3fold_pool *pool, unsigned long handle);
-};
-
 /**
  * struct z3fold_pool - stores metadata for each z3fold pool
  * @lock:	protects all pool fields and first|last_chunk fields of any
@@ -86,32 +115,6 @@ struct z3fold_pool {
 	const struct zpool_ops *zpool_ops;
 };
 
-enum buddy {
-	HEADLESS = 0,
-	FIRST,
-	MIDDLE,
-	LAST,
-	BUDDIES_MAX
-};
-
-/*
- * struct z3fold_header - z3fold page metadata occupying the first chunk of each
- *			z3fold page, except for HEADLESS pages
- * @buddy:	links the z3fold page into the relevant list in the pool
- * @first_chunks:	the size of the first buddy in chunks, 0 if free
- * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
- * @last_chunks:	the size of the last buddy in chunks, 0 if free
- * @first_num:		the starting number (for the first handle)
- */
-struct z3fold_header {
-	struct list_head buddy;
-	unsigned short first_chunks;
-	unsigned short middle_chunks;
-	unsigned short last_chunks;
-	unsigned short start_middle;
-	unsigned short first_num:2;
-};
-
 /*
  * Internal z3fold page flags
  */
@@ -121,6 +124,7 @@ enum z3fold_page_flags {
 	MIDDLE_CHUNK_MAPPED,
 };
 
+
 /*****************
  * Helpers
 *****************/
@@ -204,9 +208,10 @@ static int num_free_chunks(struct z3fold_header *zhdr)
 	 */
 	if (zhdr->middle_chunks != 0) {
 		int nfree_before = zhdr->first_chunks ?
-			0 : zhdr->start_middle - 1;
+			0 : zhdr->start_middle - ZHDR_CHUNKS;
 		int nfree_after = zhdr->last_chunks ?
-			0 : NCHUNKS - zhdr->start_middle - zhdr->middle_chunks;
+			0 : TOTAL_CHUNKS -
+				(zhdr->start_middle + zhdr->middle_chunks);
 		nfree = max(nfree_before, nfree_after);
 	} else
 		nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
@@ -254,26 +259,35 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
 	kfree(pool);
 }
 
+static inline void *mchunk_memmove(struct z3fold_header *zhdr,
+				unsigned short dst_chunk)
+{
+	void *beg = zhdr;
+	return memmove(beg + (dst_chunk << CHUNK_SHIFT),
+		       beg + (zhdr->start_middle << CHUNK_SHIFT),
+		       zhdr->middle_chunks << CHUNK_SHIFT);
+}
+
 /* Has to be called with lock held */
 static int z3fold_compact_page(struct z3fold_header *zhdr)
 {
 	struct page *page = virt_to_page(zhdr);
-	void *beg = zhdr;
 
+	if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
+		return 0; /* can't move middle chunk, it's used */
 
-	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
-	    zhdr->middle_chunks != 0 &&
-	    zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
-		memmove(beg + ZHDR_SIZE_ALIGNED,
-			beg + (zhdr->start_middle << CHUNK_SHIFT),
-			zhdr->middle_chunks << CHUNK_SHIFT);
+	if (zhdr->middle_chunks == 0)
+		return 0; /* nothing to compact */
+
+	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+		/* move to the beginning */
+		mchunk_memmove(zhdr, ZHDR_CHUNKS);
 		zhdr->first_chunks = zhdr->middle_chunks;
 		zhdr->middle_chunks = 0;
 		zhdr->start_middle = 0;
 		zhdr->first_num++;
-		return 1;
 	}
-	return 0;
+	return 1;
 }
 
 /**
@@ -365,7 +379,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		zhdr->last_chunks = chunks;
 	else {
 		zhdr->middle_chunks = chunks;
-		zhdr->start_middle = zhdr->first_chunks + 1;
+		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
 	}
 
 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
@@ -778,8 +792,8 @@ MODULE_ALIAS("zpool-z3fold");
 
 static int __init init_z3fold(void)
 {
-	/* Make sure the z3fold header will fit in one chunk */
-	BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
+	/* Make sure the z3fold header is not larger than the page size */
+	BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
 	zpool_register_driver(&z3fold_zpool_driver);
 
 	return 0;
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH/RESEND v2 3/5] z3fold: extend compaction function
  2017-01-11 14:59 [PATCH/RESEND v2 0/5] z3fold optimizations and fixes Vitaly Wool
  2017-01-11 15:06 ` [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic Vitaly Wool
  2017-01-11 15:06 ` [PATCH/RESEND v2 2/5] z3fold: fix header size related issues Vitaly Wool
@ 2017-01-11 15:06 ` Vitaly Wool
  2017-01-11 16:28   ` Dan Streetman
  2017-01-11 15:06 ` [PATCH/RESEND v2 4/5] z3fold: use per-page spinlock Vitaly Wool
  2017-01-11 15:06 ` [PATCH/RESEND v2 5/5] z3fold: add kref refcounting Vitaly Wool
  4 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 15:06 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

z3fold_compact_page() currently only handles the situation when
there's a single middle chunk within the z3fold page. However it
may be worth it to move middle chunk closer to either first or
last chunk, whichever is there, if the gap between them is big
enough.

This patch adds the relevant code, using BIG_CHUNK_GAP define as
a threshold for middle chunk to be worth moving.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 98ab01f..fca3310 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
 		       zhdr->middle_chunks << CHUNK_SHIFT);
 }
 
+#define BIG_CHUNK_GAP	3
 /* Has to be called with lock held */
 static int z3fold_compact_page(struct z3fold_header *zhdr)
 {
@@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
 		zhdr->middle_chunks = 0;
 		zhdr->start_middle = 0;
 		zhdr->first_num++;
+		return 1;
 	}
-	return 1;
+
+	/*
+	 * moving data is expensive, so let's only do that if
+	 * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+	 */
+	if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+	    zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
+			BIG_CHUNK_GAP) {
+		mchunk_memmove(zhdr, zhdr->first_chunks + 1);
+		zhdr->start_middle = zhdr->first_chunks + 1;
+		return 1;
+	} else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+		   TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
+					+ zhdr->middle_chunks) >=
+			BIG_CHUNK_GAP) {
+		unsigned short new_start = NCHUNKS - zhdr->last_chunks -
+			zhdr->middle_chunks;
+		mchunk_memmove(zhdr, new_start);
+		zhdr->start_middle = new_start;
+		return 1;
+	}
+
+	return 0;
 }
 
 /**
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH/RESEND v2 4/5] z3fold: use per-page spinlock
  2017-01-11 14:59 [PATCH/RESEND v2 0/5] z3fold optimizations and fixes Vitaly Wool
                   ` (2 preceding siblings ...)
  2017-01-11 15:06 ` [PATCH/RESEND v2 3/5] z3fold: extend compaction function Vitaly Wool
@ 2017-01-11 15:06 ` Vitaly Wool
  2017-01-11 16:42   ` Dan Streetman
  2017-01-11 15:06 ` [PATCH/RESEND v2 5/5] z3fold: add kref refcounting Vitaly Wool
  4 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 15:06 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

Most of z3fold operations are in-page, such as modifying z3fold page
header or moving z3fold objects within a page.  Taking per-pool spinlock
to protect per-page objects is therefore suboptimal, and the idea of
having a per-page spinlock (or rwlock) has been around for some time.

This patch implements spinlock-based per-page locking mechanism which
is lightweight enough to normally fit ok into the z3fold header.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 148 +++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 106 insertions(+), 42 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index fca3310..4325bde 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -51,6 +51,7 @@ enum buddy {
  * struct z3fold_header - z3fold page metadata occupying the first chunk of each
  *			z3fold page, except for HEADLESS pages
  * @buddy:	links the z3fold page into the relevant list in the pool
+ * @page_lock:		per-page lock
  * @first_chunks:	the size of the first buddy in chunks, 0 if free
  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
  * @last_chunks:	the size of the last buddy in chunks, 0 if free
@@ -58,6 +59,7 @@ enum buddy {
  */
 struct z3fold_header {
 	struct list_head buddy;
+	spinlock_t page_lock;
 	unsigned short first_chunks;
 	unsigned short middle_chunks;
 	unsigned short last_chunks;
@@ -148,6 +150,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 	clear_bit(PAGE_HEADLESS, &page->private);
 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 
+	spin_lock_init(&zhdr->page_lock);
 	zhdr->first_chunks = 0;
 	zhdr->middle_chunks = 0;
 	zhdr->last_chunks = 0;
@@ -163,6 +166,19 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
 	__free_page(virt_to_page(zhdr));
 }
 
+/* Lock a z3fold page */
+static inline void z3fold_page_lock(struct z3fold_header *zhdr)
+{
+	spin_lock(&zhdr->page_lock);
+}
+
+/* Unlock a z3fold page */
+static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
+{
+	spin_unlock(&zhdr->page_lock);
+}
+
+
 /*
  * Encodes the handle of a particular buddy within a z3fold page
  * Pool lock should be held as this function accesses first_num
@@ -351,50 +367,60 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		bud = HEADLESS;
 	else {
 		chunks = size_to_chunks(size);
-		spin_lock(&pool->lock);
 
 		/* First, try to find an unbuddied z3fold page. */
 		zhdr = NULL;
 		for_each_unbuddied_list(i, chunks) {
-			if (!list_empty(&pool->unbuddied[i])) {
-				zhdr = list_first_entry(&pool->unbuddied[i],
+			spin_lock(&pool->lock);
+			zhdr = list_first_entry_or_null(&pool->unbuddied[i],
 						struct z3fold_header, buddy);
-				page = virt_to_page(zhdr);
-				if (zhdr->first_chunks == 0) {
-					if (zhdr->middle_chunks != 0 &&
-					    chunks >= zhdr->start_middle)
-						bud = LAST;
-					else
-						bud = FIRST;
-				} else if (zhdr->last_chunks == 0)
+			if (!zhdr) {
+				spin_unlock(&pool->lock);
+				continue;
+			}
+			list_del_init(&zhdr->buddy);
+			spin_unlock(&pool->lock);
+
+			page = virt_to_page(zhdr);
+			z3fold_page_lock(zhdr);
+			if (zhdr->first_chunks == 0) {
+				if (zhdr->middle_chunks != 0 &&
+				    chunks >= zhdr->start_middle)
 					bud = LAST;
-				else if (zhdr->middle_chunks == 0)
-					bud = MIDDLE;
-				else {
-					pr_err("No free chunks in unbuddied\n");
-					WARN_ON(1);
-					continue;
-				}
-				list_del(&zhdr->buddy);
-				goto found;
+				else
+					bud = FIRST;
+			} else if (zhdr->last_chunks == 0)
+				bud = LAST;
+			else if (zhdr->middle_chunks == 0)
+				bud = MIDDLE;
+			else {
+				spin_lock(&pool->lock);
+				list_add(&zhdr->buddy, &pool->buddied);
+				spin_unlock(&pool->lock);
+				z3fold_page_unlock(zhdr);
+				pr_err("No free chunks in unbuddied\n");
+				WARN_ON(1);
+				continue;
 			}
+			goto found;
 		}
 		bud = FIRST;
-		spin_unlock(&pool->lock);
 	}
 
 	/* Couldn't find unbuddied z3fold page, create new one */
 	page = alloc_page(gfp);
 	if (!page)
 		return -ENOMEM;
-	spin_lock(&pool->lock);
+
 	atomic64_inc(&pool->pages_nr);
 	zhdr = init_z3fold_page(page);
 
 	if (bud == HEADLESS) {
 		set_bit(PAGE_HEADLESS, &page->private);
+		spin_lock(&pool->lock);
 		goto headless;
 	}
+	z3fold_page_lock(zhdr);
 
 found:
 	if (bud == FIRST)
@@ -406,6 +432,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
 	}
 
+	spin_lock(&pool->lock);
 	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
 			zhdr->middle_chunks == 0) {
 		/* Add to unbuddied list */
@@ -425,6 +452,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 
 	*handle = encode_handle(zhdr, bud);
 	spin_unlock(&pool->lock);
+	if (bud != HEADLESS)
+		z3fold_page_unlock(zhdr);
 
 	return 0;
 }
@@ -446,7 +475,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy bud;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
@@ -454,6 +482,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		/* HEADLESS page stored */
 		bud = HEADLESS;
 	} else {
+		z3fold_page_lock(zhdr);
 		bud = handle_to_buddy(handle);
 
 		switch (bud) {
@@ -470,37 +499,59 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		default:
 			pr_err("%s: unknown bud %d\n", __func__, bud);
 			WARN_ON(1);
-			spin_unlock(&pool->lock);
+			z3fold_page_unlock(zhdr);
 			return;
 		}
 	}
 
 	if (test_bit(UNDER_RECLAIM, &page->private)) {
 		/* z3fold page is under reclaim, reclaim will free */
-		spin_unlock(&pool->lock);
+		if (bud != HEADLESS)
+			z3fold_page_unlock(zhdr);
 		return;
 	}
 
 	/* Remove from existing buddy list */
-	if (bud != HEADLESS)
-		list_del(&zhdr->buddy);
+	if (bud != HEADLESS) {
+		spin_lock(&pool->lock);
+		/*
+		 * this object may have been removed from its list by
+		 * z3fold_alloc(). In that case we just do nothing,
+		 * z3fold_alloc() will allocate an object and add the page
+		 * to the relevant list.
+		 */
+		if (!list_empty(&zhdr->buddy)) {
+			list_del(&zhdr->buddy);
+		} else {
+			spin_unlock(&pool->lock);
+			z3fold_page_unlock(zhdr);
+			return;
+		}
+		spin_unlock(&pool->lock);
+	}
 
 	if (bud == HEADLESS ||
 	    (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
 			zhdr->last_chunks == 0)) {
 		/* z3fold page is empty, free */
+		spin_lock(&pool->lock);
 		list_del(&page->lru);
+		spin_unlock(&pool->lock);
 		clear_bit(PAGE_HEADLESS, &page->private);
+		if (bud != HEADLESS)
+			z3fold_page_unlock(zhdr);
 		free_z3fold_page(zhdr);
 		atomic64_dec(&pool->pages_nr);
 	} else {
 		z3fold_compact_page(zhdr);
 		/* Add to the unbuddied list */
+		spin_lock(&pool->lock);
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		spin_unlock(&pool->lock);
+		z3fold_page_unlock(zhdr);
 	}
 
-	spin_unlock(&pool->lock);
 }
 
 /**
@@ -547,12 +598,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
 
 	spin_lock(&pool->lock);
-	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
-			retries == 0) {
+	if (!pool->ops || !pool->ops->evict || retries == 0) {
 		spin_unlock(&pool->lock);
 		return -EINVAL;
 	}
 	for (i = 0; i < retries; i++) {
+		if (list_empty(&pool->lru)) {
+			spin_unlock(&pool->lock);
+			return -EINVAL;
+		}
 		page = list_last_entry(&pool->lru, struct page, lru);
 		list_del(&page->lru);
 
@@ -561,6 +615,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		zhdr = page_address(page);
 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			list_del(&zhdr->buddy);
+			spin_unlock(&pool->lock);
+			z3fold_page_lock(zhdr);
 			/*
 			 * We need encode the handles before unlocking, since
 			 * we can race with free that will set
@@ -575,13 +631,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				middle_handle = encode_handle(zhdr, MIDDLE);
 			if (zhdr->last_chunks)
 				last_handle = encode_handle(zhdr, LAST);
+			z3fold_page_unlock(zhdr);
 		} else {
 			first_handle = encode_handle(zhdr, HEADLESS);
 			last_handle = middle_handle = 0;
+			spin_unlock(&pool->lock);
 		}
 
-		spin_unlock(&pool->lock);
-
 		/* Issue the eviction callback(s) */
 		if (middle_handle) {
 			ret = pool->ops->evict(pool, middle_handle);
@@ -599,7 +655,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				goto next;
 		}
 next:
-		spin_lock(&pool->lock);
+		if (!test_bit(PAGE_HEADLESS, &page->private))
+			z3fold_page_lock(zhdr);
 		clear_bit(UNDER_RECLAIM, &page->private);
 		if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
 		    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
@@ -608,26 +665,34 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			 * All buddies are now free, free the z3fold page and
 			 * return success.
 			 */
-			clear_bit(PAGE_HEADLESS, &page->private);
+			if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
+				z3fold_page_unlock(zhdr);
 			free_z3fold_page(zhdr);
 			atomic64_dec(&pool->pages_nr);
-			spin_unlock(&pool->lock);
 			return 0;
 		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			if (zhdr->first_chunks != 0 &&
 			    zhdr->last_chunks != 0 &&
 			    zhdr->middle_chunks != 0) {
 				/* Full, add to buddied list */
+				spin_lock(&pool->lock);
 				list_add(&zhdr->buddy, &pool->buddied);
+				spin_unlock(&pool->lock);
 			} else {
 				z3fold_compact_page(zhdr);
 				/* add to unbuddied list */
+				spin_lock(&pool->lock);
 				freechunks = num_free_chunks(zhdr);
 				list_add(&zhdr->buddy,
 					 &pool->unbuddied[freechunks]);
+				spin_unlock(&pool->lock);
 			}
 		}
 
+		if (!test_bit(PAGE_HEADLESS, &page->private))
+			z3fold_page_unlock(zhdr);
+
+		spin_lock(&pool->lock);
 		/* add to beginning of LRU */
 		list_add(&page->lru, &pool->lru);
 	}
@@ -652,7 +717,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 	void *addr;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	addr = zhdr;
 	page = virt_to_page(zhdr);
@@ -660,6 +724,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 	if (test_bit(PAGE_HEADLESS, &page->private))
 		goto out;
 
+	z3fold_page_lock(zhdr);
 	buddy = handle_to_buddy(handle);
 	switch (buddy) {
 	case FIRST:
@@ -678,8 +743,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 		addr = NULL;
 		break;
 	}
+
+	z3fold_page_unlock(zhdr);
 out:
-	spin_unlock(&pool->lock);
 	return addr;
 }
 
@@ -694,19 +760,17 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
 	struct page *page;
 	enum buddy buddy;
 
-	spin_lock(&pool->lock);
 	zhdr = handle_to_z3fold_header(handle);
 	page = virt_to_page(zhdr);
 
-	if (test_bit(PAGE_HEADLESS, &page->private)) {
-		spin_unlock(&pool->lock);
+	if (test_bit(PAGE_HEADLESS, &page->private))
 		return;
-	}
 
+	z3fold_page_lock(zhdr);
 	buddy = handle_to_buddy(handle);
 	if (buddy == MIDDLE)
 		clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
-	spin_unlock(&pool->lock);
+	z3fold_page_unlock(zhdr);
 }
 
 /**
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 14:59 [PATCH/RESEND v2 0/5] z3fold optimizations and fixes Vitaly Wool
                   ` (3 preceding siblings ...)
  2017-01-11 15:06 ` [PATCH/RESEND v2 4/5] z3fold: use per-page spinlock Vitaly Wool
@ 2017-01-11 15:06 ` Vitaly Wool
  2017-01-11 17:08   ` Dan Streetman
  4 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 15:06 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Dan Streetman, Andrew Morton

With both coming and already present locking optimizations,
introducing kref to reference-count z3fold objects is the right
thing to do. Moreover, it makes buddied list no longer necessary,
and allows for a simpler handling of headless pages.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 145 ++++++++++++++++++++++++++----------------------------------
 1 file changed, 62 insertions(+), 83 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 4325bde..59cb14f 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -52,6 +52,7 @@ enum buddy {
  *			z3fold page, except for HEADLESS pages
  * @buddy:	links the z3fold page into the relevant list in the pool
  * @page_lock:		per-page lock
+ * @refcount:		reference cound for the z3fold page
  * @first_chunks:	the size of the first buddy in chunks, 0 if free
  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
  * @last_chunks:	the size of the last buddy in chunks, 0 if free
@@ -60,6 +61,7 @@ enum buddy {
 struct z3fold_header {
 	struct list_head buddy;
 	spinlock_t page_lock;
+	struct kref refcount;
 	unsigned short first_chunks;
 	unsigned short middle_chunks;
 	unsigned short last_chunks;
@@ -95,8 +97,6 @@ struct z3fold_header {
  * @unbuddied:	array of lists tracking z3fold pages that contain 2- buddies;
  *		the lists each z3fold page is added to depends on the size of
  *		its free region.
- * @buddied:	list tracking the z3fold pages that contain 3 buddies;
- *		these z3fold pages are full
  * @lru:	list tracking the z3fold pages in LRU order by most recently
  *		added buddy.
  * @pages_nr:	number of z3fold pages in the pool.
@@ -109,7 +109,6 @@ struct z3fold_header {
 struct z3fold_pool {
 	spinlock_t lock;
 	struct list_head unbuddied[NCHUNKS];
-	struct list_head buddied;
 	struct list_head lru;
 	atomic64_t pages_nr;
 	const struct z3fold_ops *ops;
@@ -121,8 +120,7 @@ struct z3fold_pool {
  * Internal z3fold page flags
  */
 enum z3fold_page_flags {
-	UNDER_RECLAIM = 0,
-	PAGE_HEADLESS,
+	PAGE_HEADLESS = 0,
 	MIDDLE_CHUNK_MAPPED,
 };
 
@@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 	struct z3fold_header *zhdr = page_address(page);
 
 	INIT_LIST_HEAD(&page->lru);
-	clear_bit(UNDER_RECLAIM, &page->private);
 	clear_bit(PAGE_HEADLESS, &page->private);
 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 
 	spin_lock_init(&zhdr->page_lock);
+	kref_init(&zhdr->refcount);
 	zhdr->first_chunks = 0;
 	zhdr->middle_chunks = 0;
 	zhdr->last_chunks = 0;
@@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 }
 
 /* Resets the struct page fields and frees the page */
-static void free_z3fold_page(struct z3fold_header *zhdr)
+static void free_z3fold_page(struct page *page)
 {
-	__free_page(virt_to_page(zhdr));
+	__free_page(page);
+}
+
+static void release_z3fold_page(struct kref *ref)
+{
+	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
+						refcount);
+	struct page *page = virt_to_page(zhdr);
+	if (!list_empty(&zhdr->buddy))
+		list_del(&zhdr->buddy);
+	if (!list_empty(&page->lru))
+		list_del(&page->lru);
+	free_z3fold_page(page);
 }
 
 /* Lock a z3fold page */
@@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 	spin_lock_init(&pool->lock);
 	for_each_unbuddied_list(i, 0)
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
-	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
 	atomic64_set(&pool->pages_nr, 0);
 	pool->ops = ops;
@@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 				spin_unlock(&pool->lock);
 				continue;
 			}
+			kref_get(&zhdr->refcount);
 			list_del_init(&zhdr->buddy);
 			spin_unlock(&pool->lock);
 
@@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 			else if (zhdr->middle_chunks == 0)
 				bud = MIDDLE;
 			else {
+				z3fold_page_unlock(zhdr);
 				spin_lock(&pool->lock);
-				list_add(&zhdr->buddy, &pool->buddied);
+				if (kref_put(&zhdr->refcount,
+					     release_z3fold_page))
+					atomic64_dec(&pool->pages_nr);
 				spin_unlock(&pool->lock);
-				z3fold_page_unlock(zhdr);
 				pr_err("No free chunks in unbuddied\n");
 				WARN_ON(1);
 				continue;
@@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		/* Add to unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
-	} else {
-		/* Add to buddied list */
-		list_add(&zhdr->buddy, &pool->buddied);
 	}
 
 headless:
@@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		}
 	}
 
-	if (test_bit(UNDER_RECLAIM, &page->private)) {
-		/* z3fold page is under reclaim, reclaim will free */
-		if (bud != HEADLESS)
-			z3fold_page_unlock(zhdr);
-		return;
-	}
-
-	/* Remove from existing buddy list */
-	if (bud != HEADLESS) {
-		spin_lock(&pool->lock);
-		/*
-		 * this object may have been removed from its list by
-		 * z3fold_alloc(). In that case we just do nothing,
-		 * z3fold_alloc() will allocate an object and add the page
-		 * to the relevant list.
-		 */
-		if (!list_empty(&zhdr->buddy)) {
-			list_del(&zhdr->buddy);
-		} else {
-			spin_unlock(&pool->lock);
-			z3fold_page_unlock(zhdr);
-			return;
-		}
-		spin_unlock(&pool->lock);
-	}
-
-	if (bud == HEADLESS ||
-	    (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
-			zhdr->last_chunks == 0)) {
-		/* z3fold page is empty, free */
+	if (bud == HEADLESS) {
 		spin_lock(&pool->lock);
 		list_del(&page->lru);
 		spin_unlock(&pool->lock);
-		clear_bit(PAGE_HEADLESS, &page->private);
-		if (bud != HEADLESS)
-			z3fold_page_unlock(zhdr);
-		free_z3fold_page(zhdr);
+		free_z3fold_page(page);
 		atomic64_dec(&pool->pages_nr);
 	} else {
-		z3fold_compact_page(zhdr);
-		/* Add to the unbuddied list */
+		if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
+		    zhdr->last_chunks != 0) {
+			z3fold_compact_page(zhdr);
+			/* Add to the unbuddied list */
+			spin_lock(&pool->lock);
+			if (!list_empty(&zhdr->buddy))
+				list_del(&zhdr->buddy);
+			freechunks = num_free_chunks(zhdr);
+			list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+			spin_unlock(&pool->lock);
+		}
+		z3fold_page_unlock(zhdr);
 		spin_lock(&pool->lock);
-		freechunks = num_free_chunks(zhdr);
-		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		if (kref_put(&zhdr->refcount, release_z3fold_page))
+			atomic64_dec(&pool->pages_nr);
 		spin_unlock(&pool->lock);
-		z3fold_page_unlock(zhdr);
 	}
 
 }
@@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			return -EINVAL;
 		}
 		page = list_last_entry(&pool->lru, struct page, lru);
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 
-		/* Protect z3fold page against free */
-		set_bit(UNDER_RECLAIM, &page->private);
 		zhdr = page_address(page);
 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
-			list_del(&zhdr->buddy);
+			if (!list_empty(&zhdr->buddy))
+				list_del_init(&zhdr->buddy);
+			kref_get(&zhdr->refcount);
 			spin_unlock(&pool->lock);
 			z3fold_page_lock(zhdr);
 			/*
@@ -655,30 +641,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				goto next;
 		}
 next:
-		if (!test_bit(PAGE_HEADLESS, &page->private))
+		if (test_bit(PAGE_HEADLESS, &page->private)) {
+			if (ret == 0) {
+				free_z3fold_page(page);
+				return 0;
+			}
+		} else {
+			int freed;
 			z3fold_page_lock(zhdr);
-		clear_bit(UNDER_RECLAIM, &page->private);
-		if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
-		    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
-		     zhdr->middle_chunks == 0)) {
-			/*
-			 * All buddies are now free, free the z3fold page and
-			 * return success.
-			 */
-			if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
-				z3fold_page_unlock(zhdr);
-			free_z3fold_page(zhdr);
-			atomic64_dec(&pool->pages_nr);
-			return 0;
-		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
-			if (zhdr->first_chunks != 0 &&
-			    zhdr->last_chunks != 0 &&
-			    zhdr->middle_chunks != 0) {
-				/* Full, add to buddied list */
-				spin_lock(&pool->lock);
-				list_add(&zhdr->buddy, &pool->buddied);
-				spin_unlock(&pool->lock);
-			} else {
+			if ((zhdr->first_chunks || zhdr->last_chunks ||
+			     zhdr->middle_chunks) &&
+			    !(zhdr->first_chunks && zhdr->last_chunks &&
+			      zhdr->middle_chunks)) {
 				z3fold_compact_page(zhdr);
 				/* add to unbuddied list */
 				spin_lock(&pool->lock);
@@ -687,10 +661,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 					 &pool->unbuddied[freechunks]);
 				spin_unlock(&pool->lock);
 			}
-		}
-
-		if (!test_bit(PAGE_HEADLESS, &page->private))
 			z3fold_page_unlock(zhdr);
+			spin_lock(&pool->lock);
+			freed = kref_put(&zhdr->refcount, release_z3fold_page);
+			spin_unlock(&pool->lock);
+			if (freed) {
+				atomic64_dec(&pool->pages_nr);
+				return 0;
+			}
+		}
 
 		spin_lock(&pool->lock);
 		/* add to beginning of LRU */
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic
  2017-01-11 15:06 ` [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic Vitaly Wool
@ 2017-01-11 16:09   ` Dan Streetman
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 16:09 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> This patch converts pages_nr per-pool counter to atomic64_t.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>

Acked-by: Dan Streetman <ddstreet@ieee.org>

> ---
>  mm/z3fold.c | 20 +++++++++-----------
>  1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 207e5dd..2273789 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -80,7 +80,7 @@ struct z3fold_pool {
>         struct list_head unbuddied[NCHUNKS];
>         struct list_head buddied;
>         struct list_head lru;
> -       u64 pages_nr;
> +       atomic64_t pages_nr;
>         const struct z3fold_ops *ops;
>         struct zpool *zpool;
>         const struct zpool_ops *zpool_ops;
> @@ -238,7 +238,7 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>         INIT_LIST_HEAD(&pool->buddied);
>         INIT_LIST_HEAD(&pool->lru);
> -       pool->pages_nr = 0;
> +       atomic64_set(&pool->pages_nr, 0);
>         pool->ops = ops;
>         return pool;
>  }
> @@ -350,7 +350,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>         if (!page)
>                 return -ENOMEM;
>         spin_lock(&pool->lock);
> -       pool->pages_nr++;
> +       atomic64_inc(&pool->pages_nr);
>         zhdr = init_z3fold_page(page);
>
>         if (bud == HEADLESS) {
> @@ -443,10 +443,9 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 return;
>         }
>
> -       if (bud != HEADLESS) {
> -               /* Remove from existing buddy list */
> +       /* Remove from existing buddy list */
> +       if (bud != HEADLESS)
>                 list_del(&zhdr->buddy);
> -       }
>
>         if (bud == HEADLESS ||
>             (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
> @@ -455,7 +454,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 list_del(&page->lru);
>                 clear_bit(PAGE_HEADLESS, &page->private);
>                 free_z3fold_page(zhdr);
> -               pool->pages_nr--;
> +               atomic64_dec(&pool->pages_nr);
>         } else {
>                 z3fold_compact_page(zhdr);
>                 /* Add to the unbuddied list */
> @@ -573,7 +572,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                          */
>                         clear_bit(PAGE_HEADLESS, &page->private);
>                         free_z3fold_page(zhdr);
> -                       pool->pages_nr--;
> +                       atomic64_dec(&pool->pages_nr);
>                         spin_unlock(&pool->lock);
>                         return 0;
>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
> @@ -676,12 +675,11 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>   * z3fold_get_pool_size() - gets the z3fold pool size in pages
>   * @pool:      pool whose size is being queried
>   *
> - * Returns: size in pages of the given pool.  The pool lock need not be
> - * taken to access pages_nr.
> + * Returns: size in pages of the given pool.
>   */
>  static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
>  {
> -       return pool->pages_nr;
> +       return atomic64_read(&pool->pages_nr);
>  }
>
>  /*****************
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 2/5] z3fold: fix header size related issues
  2017-01-11 15:06 ` [PATCH/RESEND v2 2/5] z3fold: fix header size related issues Vitaly Wool
@ 2017-01-11 16:24   ` Dan Streetman
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 16:24 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> Currently the whole kernel build will be stopped if the size of struct
> z3fold_header is greater than the size of one chunk, which is 64 bytes by
> default. This patch instead defines the offset for z3fold objects as the
> size of the z3fold header in chunks.
>
> Fixed also are the calculation of num_free_chunks() and the address to
> move the middle chunk to in case of in-page compaction in
> z3fold_compact_page().
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>

Acked-by: Dan Streetman <ddstreet@ieee.org>

> ---
>  mm/z3fold.c | 114 ++++++++++++++++++++++++++++++++++--------------------------
>  1 file changed, 64 insertions(+), 50 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 2273789..98ab01f 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -34,29 +34,58 @@
>  /*****************
>   * Structures
>  *****************/
> +struct z3fold_pool;
> +struct z3fold_ops {
> +       int (*evict)(struct z3fold_pool *pool, unsigned long handle);
> +};
> +
> +enum buddy {
> +       HEADLESS = 0,
> +       FIRST,
> +       MIDDLE,
> +       LAST,
> +       BUDDIES_MAX
> +};
> +
> +/*
> + * struct z3fold_header - z3fold page metadata occupying the first chunk of each
> + *                     z3fold page, except for HEADLESS pages
> + * @buddy:     links the z3fold page into the relevant list in the pool
> + * @first_chunks:      the size of the first buddy in chunks, 0 if free
> + * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
> + * @last_chunks:       the size of the last buddy in chunks, 0 if free
> + * @first_num:         the starting number (for the first handle)
> + */
> +struct z3fold_header {
> +       struct list_head buddy;
> +       unsigned short first_chunks;
> +       unsigned short middle_chunks;
> +       unsigned short last_chunks;
> +       unsigned short start_middle;
> +       unsigned short first_num:2;
> +};
> +
>  /*
>   * NCHUNKS_ORDER determines the internal allocation granularity, effectively
>   * adjusting internal fragmentation.  It also determines the number of
>   * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
> - * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
> - * in allocated page is occupied by z3fold header, NCHUNKS will be calculated
> - * to 63 which shows the max number of free chunks in z3fold page, also there
> - * will be 63 freelists per pool.
> + * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
> + * in the beginning of an allocated page are occupied by z3fold header, so
> + * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
> + * which shows the max number of free chunks in z3fold page, also there will
> + * be 63, or 62, respectively, freelists per pool.
>   */
>  #define NCHUNKS_ORDER  6
>
>  #define CHUNK_SHIFT    (PAGE_SHIFT - NCHUNKS_ORDER)
>  #define CHUNK_SIZE     (1 << CHUNK_SHIFT)
> -#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
> +#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
> +#define ZHDR_CHUNKS    (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
> +#define TOTAL_CHUNKS   (PAGE_SIZE >> CHUNK_SHIFT)
>  #define NCHUNKS                ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
>
>  #define BUDDY_MASK     (0x3)
>
> -struct z3fold_pool;
> -struct z3fold_ops {
> -       int (*evict)(struct z3fold_pool *pool, unsigned long handle);
> -};
> -
>  /**
>   * struct z3fold_pool - stores metadata for each z3fold pool
>   * @lock:      protects all pool fields and first|last_chunk fields of any
> @@ -86,32 +115,6 @@ struct z3fold_pool {
>         const struct zpool_ops *zpool_ops;
>  };
>
> -enum buddy {
> -       HEADLESS = 0,
> -       FIRST,
> -       MIDDLE,
> -       LAST,
> -       BUDDIES_MAX
> -};
> -
> -/*
> - * struct z3fold_header - z3fold page metadata occupying the first chunk of each
> - *                     z3fold page, except for HEADLESS pages
> - * @buddy:     links the z3fold page into the relevant list in the pool
> - * @first_chunks:      the size of the first buddy in chunks, 0 if free
> - * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
> - * @last_chunks:       the size of the last buddy in chunks, 0 if free
> - * @first_num:         the starting number (for the first handle)
> - */
> -struct z3fold_header {
> -       struct list_head buddy;
> -       unsigned short first_chunks;
> -       unsigned short middle_chunks;
> -       unsigned short last_chunks;
> -       unsigned short start_middle;
> -       unsigned short first_num:2;
> -};
> -
>  /*
>   * Internal z3fold page flags
>   */
> @@ -121,6 +124,7 @@ enum z3fold_page_flags {
>         MIDDLE_CHUNK_MAPPED,
>  };
>
> +
>  /*****************
>   * Helpers
>  *****************/
> @@ -204,9 +208,10 @@ static int num_free_chunks(struct z3fold_header *zhdr)
>          */
>         if (zhdr->middle_chunks != 0) {
>                 int nfree_before = zhdr->first_chunks ?
> -                       0 : zhdr->start_middle - 1;
> +                       0 : zhdr->start_middle - ZHDR_CHUNKS;
>                 int nfree_after = zhdr->last_chunks ?
> -                       0 : NCHUNKS - zhdr->start_middle - zhdr->middle_chunks;
> +                       0 : TOTAL_CHUNKS -
> +                               (zhdr->start_middle + zhdr->middle_chunks);
>                 nfree = max(nfree_before, nfree_after);
>         } else
>                 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
> @@ -254,26 +259,35 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
>         kfree(pool);
>  }
>
> +static inline void *mchunk_memmove(struct z3fold_header *zhdr,
> +                               unsigned short dst_chunk)
> +{
> +       void *beg = zhdr;
> +       return memmove(beg + (dst_chunk << CHUNK_SHIFT),
> +                      beg + (zhdr->start_middle << CHUNK_SHIFT),
> +                      zhdr->middle_chunks << CHUNK_SHIFT);
> +}
> +
>  /* Has to be called with lock held */
>  static int z3fold_compact_page(struct z3fold_header *zhdr)
>  {
>         struct page *page = virt_to_page(zhdr);
> -       void *beg = zhdr;
>
> +       if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private))
> +               return 0; /* can't move middle chunk, it's used */
>
> -       if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
> -           zhdr->middle_chunks != 0 &&
> -           zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> -               memmove(beg + ZHDR_SIZE_ALIGNED,
> -                       beg + (zhdr->start_middle << CHUNK_SHIFT),
> -                       zhdr->middle_chunks << CHUNK_SHIFT);
> +       if (zhdr->middle_chunks == 0)
> +               return 0; /* nothing to compact */
> +
> +       if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
> +               /* move to the beginning */
> +               mchunk_memmove(zhdr, ZHDR_CHUNKS);
>                 zhdr->first_chunks = zhdr->middle_chunks;
>                 zhdr->middle_chunks = 0;
>                 zhdr->start_middle = 0;
>                 zhdr->first_num++;
> -               return 1;
>         }
> -       return 0;
> +       return 1;
>  }
>
>  /**
> @@ -365,7 +379,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 zhdr->last_chunks = chunks;
>         else {
>                 zhdr->middle_chunks = chunks;
> -               zhdr->start_middle = zhdr->first_chunks + 1;
> +               zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
>         }
>
>         if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
> @@ -778,8 +792,8 @@ MODULE_ALIAS("zpool-z3fold");
>
>  static int __init init_z3fold(void)
>  {
> -       /* Make sure the z3fold header will fit in one chunk */
> -       BUILD_BUG_ON(sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED);
> +       /* Make sure the z3fold header is not larger than the page size */
> +       BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
>         zpool_register_driver(&z3fold_zpool_driver);
>
>         return 0;
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 3/5] z3fold: extend compaction function
  2017-01-11 15:06 ` [PATCH/RESEND v2 3/5] z3fold: extend compaction function Vitaly Wool
@ 2017-01-11 16:28   ` Dan Streetman
  2017-01-11 16:43     ` Vitaly Wool
  0 siblings, 1 reply; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 16:28 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> z3fold_compact_page() currently only handles the situation when
> there's a single middle chunk within the z3fold page. However it
> may be worth it to move middle chunk closer to either first or
> last chunk, whichever is there, if the gap between them is big
> enough.
>
> This patch adds the relevant code, using BIG_CHUNK_GAP define as
> a threshold for middle chunk to be worth moving.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 98ab01f..fca3310 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
>                        zhdr->middle_chunks << CHUNK_SHIFT);
>  }
>
> +#define BIG_CHUNK_GAP  3
>  /* Has to be called with lock held */
>  static int z3fold_compact_page(struct z3fold_header *zhdr)
>  {
> @@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
>                 zhdr->middle_chunks = 0;
>                 zhdr->start_middle = 0;
>                 zhdr->first_num++;
> +               return 1;
>         }
> -       return 1;
> +
> +       /*
> +        * moving data is expensive, so let's only do that if
> +        * there's substantial gain (at least BIG_CHUNK_GAP chunks)
> +        */
> +       if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
> +           zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
> +                       BIG_CHUNK_GAP) {
> +               mchunk_memmove(zhdr, zhdr->first_chunks + 1);
> +               zhdr->start_middle = zhdr->first_chunks + 1;

this should be first_chunks + ZHDR_CHUNKS, not + 1.

> +               return 1;
> +       } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
> +                  TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
> +                                       + zhdr->middle_chunks) >=
> +                       BIG_CHUNK_GAP) {
> +               unsigned short new_start = NCHUNKS - zhdr->last_chunks -

this should be TOTAL_CHUNKS, not NCHUNKS.

> +                       zhdr->middle_chunks;
> +               mchunk_memmove(zhdr, new_start);
> +               zhdr->start_middle = new_start;
> +               return 1;
> +       }
> +
> +       return 0;
>  }
>
>  /**
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 4/5] z3fold: use per-page spinlock
  2017-01-11 15:06 ` [PATCH/RESEND v2 4/5] z3fold: use per-page spinlock Vitaly Wool
@ 2017-01-11 16:42   ` Dan Streetman
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 16:42 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> Most of z3fold operations are in-page, such as modifying z3fold page
> header or moving z3fold objects within a page.  Taking per-pool spinlock
> to protect per-page objects is therefore suboptimal, and the idea of
> having a per-page spinlock (or rwlock) has been around for some time.
>
> This patch implements spinlock-based per-page locking mechanism which
> is lightweight enough to normally fit ok into the z3fold header.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>

I still have a bit of concern on the coordination between the page and
pool locks...but I don't see anything specifically wrong so

Reviewed-by: Dan Streetman <ddstreet@ieee.org>


> ---
>  mm/z3fold.c | 148 +++++++++++++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 106 insertions(+), 42 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index fca3310..4325bde 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -51,6 +51,7 @@ enum buddy {
>   * struct z3fold_header - z3fold page metadata occupying the first chunk of each
>   *                     z3fold page, except for HEADLESS pages
>   * @buddy:     links the z3fold page into the relevant list in the pool
> + * @page_lock:         per-page lock
>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
> @@ -58,6 +59,7 @@ enum buddy {
>   */
>  struct z3fold_header {
>         struct list_head buddy;
> +       spinlock_t page_lock;
>         unsigned short first_chunks;
>         unsigned short middle_chunks;
>         unsigned short last_chunks;
> @@ -148,6 +150,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>         clear_bit(PAGE_HEADLESS, &page->private);
>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>
> +       spin_lock_init(&zhdr->page_lock);
>         zhdr->first_chunks = 0;
>         zhdr->middle_chunks = 0;
>         zhdr->last_chunks = 0;
> @@ -163,6 +166,19 @@ static void free_z3fold_page(struct z3fold_header *zhdr)
>         __free_page(virt_to_page(zhdr));
>  }
>
> +/* Lock a z3fold page */
> +static inline void z3fold_page_lock(struct z3fold_header *zhdr)
> +{
> +       spin_lock(&zhdr->page_lock);
> +}
> +
> +/* Unlock a z3fold page */
> +static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
> +{
> +       spin_unlock(&zhdr->page_lock);
> +}
> +
> +
>  /*
>   * Encodes the handle of a particular buddy within a z3fold page
>   * Pool lock should be held as this function accesses first_num
> @@ -351,50 +367,60 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 bud = HEADLESS;
>         else {
>                 chunks = size_to_chunks(size);
> -               spin_lock(&pool->lock);
>
>                 /* First, try to find an unbuddied z3fold page. */
>                 zhdr = NULL;
>                 for_each_unbuddied_list(i, chunks) {
> -                       if (!list_empty(&pool->unbuddied[i])) {
> -                               zhdr = list_first_entry(&pool->unbuddied[i],
> +                       spin_lock(&pool->lock);
> +                       zhdr = list_first_entry_or_null(&pool->unbuddied[i],
>                                                 struct z3fold_header, buddy);
> -                               page = virt_to_page(zhdr);
> -                               if (zhdr->first_chunks == 0) {
> -                                       if (zhdr->middle_chunks != 0 &&
> -                                           chunks >= zhdr->start_middle)
> -                                               bud = LAST;
> -                                       else
> -                                               bud = FIRST;
> -                               } else if (zhdr->last_chunks == 0)
> +                       if (!zhdr) {
> +                               spin_unlock(&pool->lock);
> +                               continue;
> +                       }
> +                       list_del_init(&zhdr->buddy);
> +                       spin_unlock(&pool->lock);
> +
> +                       page = virt_to_page(zhdr);
> +                       z3fold_page_lock(zhdr);
> +                       if (zhdr->first_chunks == 0) {
> +                               if (zhdr->middle_chunks != 0 &&
> +                                   chunks >= zhdr->start_middle)
>                                         bud = LAST;
> -                               else if (zhdr->middle_chunks == 0)
> -                                       bud = MIDDLE;
> -                               else {
> -                                       pr_err("No free chunks in unbuddied\n");
> -                                       WARN_ON(1);
> -                                       continue;
> -                               }
> -                               list_del(&zhdr->buddy);
> -                               goto found;
> +                               else
> +                                       bud = FIRST;
> +                       } else if (zhdr->last_chunks == 0)
> +                               bud = LAST;
> +                       else if (zhdr->middle_chunks == 0)
> +                               bud = MIDDLE;
> +                       else {
> +                               spin_lock(&pool->lock);
> +                               list_add(&zhdr->buddy, &pool->buddied);
> +                               spin_unlock(&pool->lock);
> +                               z3fold_page_unlock(zhdr);
> +                               pr_err("No free chunks in unbuddied\n");
> +                               WARN_ON(1);
> +                               continue;
>                         }
> +                       goto found;
>                 }
>                 bud = FIRST;
> -               spin_unlock(&pool->lock);
>         }
>
>         /* Couldn't find unbuddied z3fold page, create new one */
>         page = alloc_page(gfp);
>         if (!page)
>                 return -ENOMEM;
> -       spin_lock(&pool->lock);
> +
>         atomic64_inc(&pool->pages_nr);
>         zhdr = init_z3fold_page(page);
>
>         if (bud == HEADLESS) {
>                 set_bit(PAGE_HEADLESS, &page->private);
> +               spin_lock(&pool->lock);
>                 goto headless;
>         }
> +       z3fold_page_lock(zhdr);
>
>  found:
>         if (bud == FIRST)
> @@ -406,6 +432,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
>         }
>
> +       spin_lock(&pool->lock);
>         if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 ||
>                         zhdr->middle_chunks == 0) {
>                 /* Add to unbuddied list */
> @@ -425,6 +452,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>
>         *handle = encode_handle(zhdr, bud);
>         spin_unlock(&pool->lock);
> +       if (bud != HEADLESS)
> +               z3fold_page_unlock(zhdr);
>
>         return 0;
>  }
> @@ -446,7 +475,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>         struct page *page;
>         enum buddy bud;
>
> -       spin_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         page = virt_to_page(zhdr);
>
> @@ -454,6 +482,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 /* HEADLESS page stored */
>                 bud = HEADLESS;
>         } else {
> +               z3fold_page_lock(zhdr);
>                 bud = handle_to_buddy(handle);
>
>                 switch (bud) {
> @@ -470,37 +499,59 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 default:
>                         pr_err("%s: unknown bud %d\n", __func__, bud);
>                         WARN_ON(1);
> -                       spin_unlock(&pool->lock);
> +                       z3fold_page_unlock(zhdr);
>                         return;
>                 }
>         }
>
>         if (test_bit(UNDER_RECLAIM, &page->private)) {
>                 /* z3fold page is under reclaim, reclaim will free */
> -               spin_unlock(&pool->lock);
> +               if (bud != HEADLESS)
> +                       z3fold_page_unlock(zhdr);
>                 return;
>         }
>
>         /* Remove from existing buddy list */
> -       if (bud != HEADLESS)
> -               list_del(&zhdr->buddy);
> +       if (bud != HEADLESS) {
> +               spin_lock(&pool->lock);
> +               /*
> +                * this object may have been removed from its list by
> +                * z3fold_alloc(). In that case we just do nothing,
> +                * z3fold_alloc() will allocate an object and add the page
> +                * to the relevant list.
> +                */
> +               if (!list_empty(&zhdr->buddy)) {
> +                       list_del(&zhdr->buddy);
> +               } else {
> +                       spin_unlock(&pool->lock);
> +                       z3fold_page_unlock(zhdr);
> +                       return;
> +               }
> +               spin_unlock(&pool->lock);
> +       }
>
>         if (bud == HEADLESS ||
>             (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
>                         zhdr->last_chunks == 0)) {
>                 /* z3fold page is empty, free */
> +               spin_lock(&pool->lock);
>                 list_del(&page->lru);
> +               spin_unlock(&pool->lock);
>                 clear_bit(PAGE_HEADLESS, &page->private);
> +               if (bud != HEADLESS)
> +                       z3fold_page_unlock(zhdr);
>                 free_z3fold_page(zhdr);
>                 atomic64_dec(&pool->pages_nr);
>         } else {
>                 z3fold_compact_page(zhdr);
>                 /* Add to the unbuddied list */
> +               spin_lock(&pool->lock);
>                 freechunks = num_free_chunks(zhdr);
>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               spin_unlock(&pool->lock);
> +               z3fold_page_unlock(zhdr);
>         }
>
> -       spin_unlock(&pool->lock);
>  }
>
>  /**
> @@ -547,12 +598,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>         unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
>
>         spin_lock(&pool->lock);
> -       if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
> -                       retries == 0) {
> +       if (!pool->ops || !pool->ops->evict || retries == 0) {
>                 spin_unlock(&pool->lock);
>                 return -EINVAL;
>         }
>         for (i = 0; i < retries; i++) {
> +               if (list_empty(&pool->lru)) {
> +                       spin_unlock(&pool->lock);
> +                       return -EINVAL;
> +               }
>                 page = list_last_entry(&pool->lru, struct page, lru);
>                 list_del(&page->lru);
>
> @@ -561,6 +615,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                 zhdr = page_address(page);
>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>                         list_del(&zhdr->buddy);
> +                       spin_unlock(&pool->lock);
> +                       z3fold_page_lock(zhdr);
>                         /*
>                          * We need encode the handles before unlocking, since
>                          * we can race with free that will set
> @@ -575,13 +631,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 middle_handle = encode_handle(zhdr, MIDDLE);
>                         if (zhdr->last_chunks)
>                                 last_handle = encode_handle(zhdr, LAST);
> +                       z3fold_page_unlock(zhdr);
>                 } else {
>                         first_handle = encode_handle(zhdr, HEADLESS);
>                         last_handle = middle_handle = 0;
> +                       spin_unlock(&pool->lock);
>                 }
>
> -               spin_unlock(&pool->lock);
> -
>                 /* Issue the eviction callback(s) */
>                 if (middle_handle) {
>                         ret = pool->ops->evict(pool, middle_handle);
> @@ -599,7 +655,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 goto next;
>                 }
>  next:
> -               spin_lock(&pool->lock);
> +               if (!test_bit(PAGE_HEADLESS, &page->private))
> +                       z3fold_page_lock(zhdr);
>                 clear_bit(UNDER_RECLAIM, &page->private);
>                 if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>                     (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
> @@ -608,26 +665,34 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                          * All buddies are now free, free the z3fold page and
>                          * return success.
>                          */
> -                       clear_bit(PAGE_HEADLESS, &page->private);
> +                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
> +                               z3fold_page_unlock(zhdr);
>                         free_z3fold_page(zhdr);
>                         atomic64_dec(&pool->pages_nr);
> -                       spin_unlock(&pool->lock);
>                         return 0;
>                 }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>                         if (zhdr->first_chunks != 0 &&
>                             zhdr->last_chunks != 0 &&
>                             zhdr->middle_chunks != 0) {
>                                 /* Full, add to buddied list */
> +                               spin_lock(&pool->lock);
>                                 list_add(&zhdr->buddy, &pool->buddied);
> +                               spin_unlock(&pool->lock);
>                         } else {
>                                 z3fold_compact_page(zhdr);
>                                 /* add to unbuddied list */
> +                               spin_lock(&pool->lock);
>                                 freechunks = num_free_chunks(zhdr);
>                                 list_add(&zhdr->buddy,
>                                          &pool->unbuddied[freechunks]);
> +                               spin_unlock(&pool->lock);
>                         }
>                 }
>
> +               if (!test_bit(PAGE_HEADLESS, &page->private))
> +                       z3fold_page_unlock(zhdr);
> +
> +               spin_lock(&pool->lock);
>                 /* add to beginning of LRU */
>                 list_add(&page->lru, &pool->lru);
>         }
> @@ -652,7 +717,6 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>         void *addr;
>         enum buddy buddy;
>
> -       spin_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         addr = zhdr;
>         page = virt_to_page(zhdr);
> @@ -660,6 +724,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>         if (test_bit(PAGE_HEADLESS, &page->private))
>                 goto out;
>
> +       z3fold_page_lock(zhdr);
>         buddy = handle_to_buddy(handle);
>         switch (buddy) {
>         case FIRST:
> @@ -678,8 +743,9 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
>                 addr = NULL;
>                 break;
>         }
> +
> +       z3fold_page_unlock(zhdr);
>  out:
> -       spin_unlock(&pool->lock);
>         return addr;
>  }
>
> @@ -694,19 +760,17 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
>         struct page *page;
>         enum buddy buddy;
>
> -       spin_lock(&pool->lock);
>         zhdr = handle_to_z3fold_header(handle);
>         page = virt_to_page(zhdr);
>
> -       if (test_bit(PAGE_HEADLESS, &page->private)) {
> -               spin_unlock(&pool->lock);
> +       if (test_bit(PAGE_HEADLESS, &page->private))
>                 return;
> -       }
>
> +       z3fold_page_lock(zhdr);
>         buddy = handle_to_buddy(handle);
>         if (buddy == MIDDLE)
>                 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
> -       spin_unlock(&pool->lock);
> +       z3fold_page_unlock(zhdr);
>  }
>
>  /**
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 3/5] z3fold: extend compaction function
  2017-01-11 16:28   ` Dan Streetman
@ 2017-01-11 16:43     ` Vitaly Wool
  2017-01-11 20:52       ` Vitaly Wool
  0 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 16:43 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 5:28 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> z3fold_compact_page() currently only handles the situation when
>> there's a single middle chunk within the z3fold page. However it
>> may be worth it to move middle chunk closer to either first or
>> last chunk, whichever is there, if the gap between them is big
>> enough.
>>
>> This patch adds the relevant code, using BIG_CHUNK_GAP define as
>> a threshold for middle chunk to be worth moving.
>>
>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>> ---
>>  mm/z3fold.c | 26 +++++++++++++++++++++++++-
>>  1 file changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index 98ab01f..fca3310 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
>>                        zhdr->middle_chunks << CHUNK_SHIFT);
>>  }
>>
>> +#define BIG_CHUNK_GAP  3
>>  /* Has to be called with lock held */
>>  static int z3fold_compact_page(struct z3fold_header *zhdr)
>>  {
>> @@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
>>                 zhdr->middle_chunks = 0;
>>                 zhdr->start_middle = 0;
>>                 zhdr->first_num++;
>> +               return 1;
>>         }
>> -       return 1;
>> +
>> +       /*
>> +        * moving data is expensive, so let's only do that if
>> +        * there's substantial gain (at least BIG_CHUNK_GAP chunks)
>> +        */
>> +       if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
>> +           zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
>> +                       BIG_CHUNK_GAP) {
>> +               mchunk_memmove(zhdr, zhdr->first_chunks + 1);
>> +               zhdr->start_middle = zhdr->first_chunks + 1;
>
> this should be first_chunks + ZHDR_CHUNKS, not + 1.
>
>> +               return 1;
>> +       } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
>> +                  TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
>> +                                       + zhdr->middle_chunks) >=
>> +                       BIG_CHUNK_GAP) {
>> +               unsigned short new_start = NCHUNKS - zhdr->last_chunks -
>
> this should be TOTAL_CHUNKS, not NCHUNKS.

Right :/

>> +                       zhdr->middle_chunks;
>> +               mchunk_memmove(zhdr, new_start);
>> +               zhdr->start_middle = new_start;
>> +               return 1;
>> +       }
>> +
>> +       return 0;
>>  }
>>
>>  /**
>> --
>> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 15:06 ` [PATCH/RESEND v2 5/5] z3fold: add kref refcounting Vitaly Wool
@ 2017-01-11 17:08   ` Dan Streetman
  2017-01-11 17:27     ` Vitaly Wool
  0 siblings, 1 reply; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 17:08 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> With both coming and already present locking optimizations,
> introducing kref to reference-count z3fold objects is the right
> thing to do. Moreover, it makes buddied list no longer necessary,
> and allows for a simpler handling of headless pages.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 145 ++++++++++++++++++++++++++----------------------------------
>  1 file changed, 62 insertions(+), 83 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 4325bde..59cb14f 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -52,6 +52,7 @@ enum buddy {
>   *                     z3fold page, except for HEADLESS pages
>   * @buddy:     links the z3fold page into the relevant list in the pool
>   * @page_lock:         per-page lock
> + * @refcount:          reference cound for the z3fold page
>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
> @@ -60,6 +61,7 @@ enum buddy {
>  struct z3fold_header {
>         struct list_head buddy;
>         spinlock_t page_lock;
> +       struct kref refcount;
>         unsigned short first_chunks;
>         unsigned short middle_chunks;
>         unsigned short last_chunks;
> @@ -95,8 +97,6 @@ struct z3fold_header {
>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
>   *             the lists each z3fold page is added to depends on the size of
>   *             its free region.
> - * @buddied:   list tracking the z3fold pages that contain 3 buddies;
> - *             these z3fold pages are full
>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>   *             added buddy.
>   * @pages_nr:  number of z3fold pages in the pool.
> @@ -109,7 +109,6 @@ struct z3fold_header {
>  struct z3fold_pool {
>         spinlock_t lock;
>         struct list_head unbuddied[NCHUNKS];
> -       struct list_head buddied;
>         struct list_head lru;
>         atomic64_t pages_nr;
>         const struct z3fold_ops *ops;
> @@ -121,8 +120,7 @@ struct z3fold_pool {
>   * Internal z3fold page flags
>   */
>  enum z3fold_page_flags {
> -       UNDER_RECLAIM = 0,
> -       PAGE_HEADLESS,
> +       PAGE_HEADLESS = 0,
>         MIDDLE_CHUNK_MAPPED,
>  };
>
> @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>         struct z3fold_header *zhdr = page_address(page);
>
>         INIT_LIST_HEAD(&page->lru);
> -       clear_bit(UNDER_RECLAIM, &page->private);
>         clear_bit(PAGE_HEADLESS, &page->private);
>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>
>         spin_lock_init(&zhdr->page_lock);
> +       kref_init(&zhdr->refcount);
>         zhdr->first_chunks = 0;
>         zhdr->middle_chunks = 0;
>         zhdr->last_chunks = 0;
> @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>  }
>
>  /* Resets the struct page fields and frees the page */
> -static void free_z3fold_page(struct z3fold_header *zhdr)
> +static void free_z3fold_page(struct page *page)
>  {
> -       __free_page(virt_to_page(zhdr));
> +       __free_page(page);
> +}
> +
> +static void release_z3fold_page(struct kref *ref)
> +{
> +       struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
> +                                               refcount);
> +       struct page *page = virt_to_page(zhdr);
> +       if (!list_empty(&zhdr->buddy))
> +               list_del(&zhdr->buddy);
> +       if (!list_empty(&page->lru))
> +               list_del(&page->lru);
> +       free_z3fold_page(page);
>  }
>
>  /* Lock a z3fold page */
> @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>         spin_lock_init(&pool->lock);
>         for_each_unbuddied_list(i, 0)
>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
> -       INIT_LIST_HEAD(&pool->buddied);
>         INIT_LIST_HEAD(&pool->lru);
>         atomic64_set(&pool->pages_nr, 0);
>         pool->ops = ops;
> @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                                 spin_unlock(&pool->lock);
>                                 continue;
>                         }
> +                       kref_get(&zhdr->refcount);
>                         list_del_init(&zhdr->buddy);
>                         spin_unlock(&pool->lock);
>
> @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                         else if (zhdr->middle_chunks == 0)
>                                 bud = MIDDLE;
>                         else {
> +                               z3fold_page_unlock(zhdr);
>                                 spin_lock(&pool->lock);
> -                               list_add(&zhdr->buddy, &pool->buddied);
> +                               if (kref_put(&zhdr->refcount,
> +                                            release_z3fold_page))
> +                                       atomic64_dec(&pool->pages_nr);
>                                 spin_unlock(&pool->lock);
> -                               z3fold_page_unlock(zhdr);
>                                 pr_err("No free chunks in unbuddied\n");
>                                 WARN_ON(1);
>                                 continue;
> @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 /* Add to unbuddied list */
>                 freechunks = num_free_chunks(zhdr);
>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> -       } else {
> -               /* Add to buddied list */
> -               list_add(&zhdr->buddy, &pool->buddied);
>         }
>
>  headless:
> @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 }
>         }
>
> -       if (test_bit(UNDER_RECLAIM, &page->private)) {
> -               /* z3fold page is under reclaim, reclaim will free */
> -               if (bud != HEADLESS)
> -                       z3fold_page_unlock(zhdr);
> -               return;
> -       }
> -
> -       /* Remove from existing buddy list */
> -       if (bud != HEADLESS) {
> -               spin_lock(&pool->lock);
> -               /*
> -                * this object may have been removed from its list by
> -                * z3fold_alloc(). In that case we just do nothing,
> -                * z3fold_alloc() will allocate an object and add the page
> -                * to the relevant list.
> -                */
> -               if (!list_empty(&zhdr->buddy)) {
> -                       list_del(&zhdr->buddy);
> -               } else {
> -                       spin_unlock(&pool->lock);
> -                       z3fold_page_unlock(zhdr);
> -                       return;
> -               }
> -               spin_unlock(&pool->lock);
> -       }
> -
> -       if (bud == HEADLESS ||
> -           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
> -                       zhdr->last_chunks == 0)) {
> -               /* z3fold page is empty, free */
> +       if (bud == HEADLESS) {
>                 spin_lock(&pool->lock);
>                 list_del(&page->lru);
>                 spin_unlock(&pool->lock);
> -               clear_bit(PAGE_HEADLESS, &page->private);
> -               if (bud != HEADLESS)
> -                       z3fold_page_unlock(zhdr);
> -               free_z3fold_page(zhdr);
> +               free_z3fold_page(page);
>                 atomic64_dec(&pool->pages_nr);
>         } else {
> -               z3fold_compact_page(zhdr);
> -               /* Add to the unbuddied list */
> +               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
> +                   zhdr->last_chunks != 0) {
> +                       z3fold_compact_page(zhdr);
> +                       /* Add to the unbuddied list */
> +                       spin_lock(&pool->lock);
> +                       if (!list_empty(&zhdr->buddy))
> +                               list_del(&zhdr->buddy);
> +                       freechunks = num_free_chunks(zhdr);
> +                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +                       spin_unlock(&pool->lock);
> +               }
> +               z3fold_page_unlock(zhdr);
>                 spin_lock(&pool->lock);
> -               freechunks = num_free_chunks(zhdr);
> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               if (kref_put(&zhdr->refcount, release_z3fold_page))
> +                       atomic64_dec(&pool->pages_nr);
>                 spin_unlock(&pool->lock);
> -               z3fold_page_unlock(zhdr);
>         }
>
>  }
> @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                         return -EINVAL;
>                 }
>                 page = list_last_entry(&pool->lru, struct page, lru);
> -               list_del(&page->lru);
> +               list_del_init(&page->lru);
>
> -               /* Protect z3fold page against free */
> -               set_bit(UNDER_RECLAIM, &page->private);
>                 zhdr = page_address(page);
>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
> -                       list_del(&zhdr->buddy);
> +                       if (!list_empty(&zhdr->buddy))
> +                               list_del_init(&zhdr->buddy);
> +                       kref_get(&zhdr->refcount);
>                         spin_unlock(&pool->lock);
>                         z3fold_page_lock(zhdr);
>                         /*
> @@ -655,30 +641,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 goto next;
>                 }
>  next:
> -               if (!test_bit(PAGE_HEADLESS, &page->private))
> +               if (test_bit(PAGE_HEADLESS, &page->private)) {
> +                       if (ret == 0) {
> +                               free_z3fold_page(page);
> +                               return 0;
> +                       }
> +               } else {
> +                       int freed;
>                         z3fold_page_lock(zhdr);
> -               clear_bit(UNDER_RECLAIM, &page->private);
> -               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
> -                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
> -                    zhdr->middle_chunks == 0)) {
> -                       /*
> -                        * All buddies are now free, free the z3fold page and
> -                        * return success.
> -                        */
> -                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
> -                               z3fold_page_unlock(zhdr);
> -                       free_z3fold_page(zhdr);
> -                       atomic64_dec(&pool->pages_nr);
> -                       return 0;
> -               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
> -                       if (zhdr->first_chunks != 0 &&
> -                           zhdr->last_chunks != 0 &&
> -                           zhdr->middle_chunks != 0) {
> -                               /* Full, add to buddied list */
> -                               spin_lock(&pool->lock);
> -                               list_add(&zhdr->buddy, &pool->buddied);
> -                               spin_unlock(&pool->lock);
> -                       } else {
> +                       if ((zhdr->first_chunks || zhdr->last_chunks ||
> +                            zhdr->middle_chunks) &&
> +                           !(zhdr->first_chunks && zhdr->last_chunks &&
> +                             zhdr->middle_chunks)) {
>                                 z3fold_compact_page(zhdr);
>                                 /* add to unbuddied list */
>                                 spin_lock(&pool->lock);
> @@ -687,10 +661,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                          &pool->unbuddied[freechunks]);
>                                 spin_unlock(&pool->lock);
>                         }
> -               }
> -
> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>                         z3fold_page_unlock(zhdr);
> +                       spin_lock(&pool->lock);
> +                       freed = kref_put(&zhdr->refcount, release_z3fold_page);
> +                       spin_unlock(&pool->lock);
> +                       if (freed) {
> +                               atomic64_dec(&pool->pages_nr);
> +                               return 0;
> +                       }

you still can't do this - put the kref and then add it back to the lru
later.  freed here is only useful for knowing that it was freed - you
can't assume that !freed means it's still valid for you to use.

> +               }
>
>                 spin_lock(&pool->lock);
>                 /* add to beginning of LRU */
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 17:08   ` Dan Streetman
@ 2017-01-11 17:27     ` Vitaly Wool
  2017-01-11 17:39       ` Dan Streetman
  0 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 17:27 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 6:08 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> With both coming and already present locking optimizations,
>> introducing kref to reference-count z3fold objects is the right
>> thing to do. Moreover, it makes buddied list no longer necessary,
>> and allows for a simpler handling of headless pages.
>>
>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>> ---
>>  mm/z3fold.c | 145 ++++++++++++++++++++++++++----------------------------------
>>  1 file changed, 62 insertions(+), 83 deletions(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index 4325bde..59cb14f 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -52,6 +52,7 @@ enum buddy {
>>   *                     z3fold page, except for HEADLESS pages
>>   * @buddy:     links the z3fold page into the relevant list in the pool
>>   * @page_lock:         per-page lock
>> + * @refcount:          reference cound for the z3fold page
>>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
>> @@ -60,6 +61,7 @@ enum buddy {
>>  struct z3fold_header {
>>         struct list_head buddy;
>>         spinlock_t page_lock;
>> +       struct kref refcount;
>>         unsigned short first_chunks;
>>         unsigned short middle_chunks;
>>         unsigned short last_chunks;
>> @@ -95,8 +97,6 @@ struct z3fold_header {
>>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
>>   *             the lists each z3fold page is added to depends on the size of
>>   *             its free region.
>> - * @buddied:   list tracking the z3fold pages that contain 3 buddies;
>> - *             these z3fold pages are full
>>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>>   *             added buddy.
>>   * @pages_nr:  number of z3fold pages in the pool.
>> @@ -109,7 +109,6 @@ struct z3fold_header {
>>  struct z3fold_pool {
>>         spinlock_t lock;
>>         struct list_head unbuddied[NCHUNKS];
>> -       struct list_head buddied;
>>         struct list_head lru;
>>         atomic64_t pages_nr;
>>         const struct z3fold_ops *ops;
>> @@ -121,8 +120,7 @@ struct z3fold_pool {
>>   * Internal z3fold page flags
>>   */
>>  enum z3fold_page_flags {
>> -       UNDER_RECLAIM = 0,
>> -       PAGE_HEADLESS,
>> +       PAGE_HEADLESS = 0,
>>         MIDDLE_CHUNK_MAPPED,
>>  };
>>
>> @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>         struct z3fold_header *zhdr = page_address(page);
>>
>>         INIT_LIST_HEAD(&page->lru);
>> -       clear_bit(UNDER_RECLAIM, &page->private);
>>         clear_bit(PAGE_HEADLESS, &page->private);
>>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>>
>>         spin_lock_init(&zhdr->page_lock);
>> +       kref_init(&zhdr->refcount);
>>         zhdr->first_chunks = 0;
>>         zhdr->middle_chunks = 0;
>>         zhdr->last_chunks = 0;
>> @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>  }
>>
>>  /* Resets the struct page fields and frees the page */
>> -static void free_z3fold_page(struct z3fold_header *zhdr)
>> +static void free_z3fold_page(struct page *page)
>>  {
>> -       __free_page(virt_to_page(zhdr));
>> +       __free_page(page);
>> +}
>> +
>> +static void release_z3fold_page(struct kref *ref)
>> +{
>> +       struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
>> +                                               refcount);
>> +       struct page *page = virt_to_page(zhdr);
>> +       if (!list_empty(&zhdr->buddy))
>> +               list_del(&zhdr->buddy);
>> +       if (!list_empty(&page->lru))
>> +               list_del(&page->lru);
>> +       free_z3fold_page(page);
>>  }
>>
>>  /* Lock a z3fold page */
>> @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>         spin_lock_init(&pool->lock);
>>         for_each_unbuddied_list(i, 0)
>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>> -       INIT_LIST_HEAD(&pool->buddied);
>>         INIT_LIST_HEAD(&pool->lru);
>>         atomic64_set(&pool->pages_nr, 0);
>>         pool->ops = ops;
>> @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                                 spin_unlock(&pool->lock);
>>                                 continue;
>>                         }
>> +                       kref_get(&zhdr->refcount);
>>                         list_del_init(&zhdr->buddy);
>>                         spin_unlock(&pool->lock);
>>
>> @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                         else if (zhdr->middle_chunks == 0)
>>                                 bud = MIDDLE;
>>                         else {
>> +                               z3fold_page_unlock(zhdr);
>>                                 spin_lock(&pool->lock);
>> -                               list_add(&zhdr->buddy, &pool->buddied);
>> +                               if (kref_put(&zhdr->refcount,
>> +                                            release_z3fold_page))
>> +                                       atomic64_dec(&pool->pages_nr);
>>                                 spin_unlock(&pool->lock);
>> -                               z3fold_page_unlock(zhdr);
>>                                 pr_err("No free chunks in unbuddied\n");
>>                                 WARN_ON(1);
>>                                 continue;
>> @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>                 /* Add to unbuddied list */
>>                 freechunks = num_free_chunks(zhdr);
>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> -       } else {
>> -               /* Add to buddied list */
>> -               list_add(&zhdr->buddy, &pool->buddied);
>>         }
>>
>>  headless:
>> @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>                 }
>>         }
>>
>> -       if (test_bit(UNDER_RECLAIM, &page->private)) {
>> -               /* z3fold page is under reclaim, reclaim will free */
>> -               if (bud != HEADLESS)
>> -                       z3fold_page_unlock(zhdr);
>> -               return;
>> -       }
>> -
>> -       /* Remove from existing buddy list */
>> -       if (bud != HEADLESS) {
>> -               spin_lock(&pool->lock);
>> -               /*
>> -                * this object may have been removed from its list by
>> -                * z3fold_alloc(). In that case we just do nothing,
>> -                * z3fold_alloc() will allocate an object and add the page
>> -                * to the relevant list.
>> -                */
>> -               if (!list_empty(&zhdr->buddy)) {
>> -                       list_del(&zhdr->buddy);
>> -               } else {
>> -                       spin_unlock(&pool->lock);
>> -                       z3fold_page_unlock(zhdr);
>> -                       return;
>> -               }
>> -               spin_unlock(&pool->lock);
>> -       }
>> -
>> -       if (bud == HEADLESS ||
>> -           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
>> -                       zhdr->last_chunks == 0)) {
>> -               /* z3fold page is empty, free */
>> +       if (bud == HEADLESS) {
>>                 spin_lock(&pool->lock);
>>                 list_del(&page->lru);
>>                 spin_unlock(&pool->lock);
>> -               clear_bit(PAGE_HEADLESS, &page->private);
>> -               if (bud != HEADLESS)
>> -                       z3fold_page_unlock(zhdr);
>> -               free_z3fold_page(zhdr);
>> +               free_z3fold_page(page);
>>                 atomic64_dec(&pool->pages_nr);
>>         } else {
>> -               z3fold_compact_page(zhdr);
>> -               /* Add to the unbuddied list */
>> +               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
>> +                   zhdr->last_chunks != 0) {
>> +                       z3fold_compact_page(zhdr);
>> +                       /* Add to the unbuddied list */
>> +                       spin_lock(&pool->lock);
>> +                       if (!list_empty(&zhdr->buddy))
>> +                               list_del(&zhdr->buddy);
>> +                       freechunks = num_free_chunks(zhdr);
>> +                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> +                       spin_unlock(&pool->lock);
>> +               }
>> +               z3fold_page_unlock(zhdr);
>>                 spin_lock(&pool->lock);
>> -               freechunks = num_free_chunks(zhdr);
>> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>> +               if (kref_put(&zhdr->refcount, release_z3fold_page))
>> +                       atomic64_dec(&pool->pages_nr);
>>                 spin_unlock(&pool->lock);
>> -               z3fold_page_unlock(zhdr);
>>         }
>>
>>  }
>> @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                         return -EINVAL;
>>                 }
>>                 page = list_last_entry(&pool->lru, struct page, lru);
>> -               list_del(&page->lru);
>> +               list_del_init(&page->lru);
>>
>> -               /* Protect z3fold page against free */
>> -               set_bit(UNDER_RECLAIM, &page->private);
>>                 zhdr = page_address(page);
>>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>> -                       list_del(&zhdr->buddy);
>> +                       if (!list_empty(&zhdr->buddy))
>> +                               list_del_init(&zhdr->buddy);
>> +                       kref_get(&zhdr->refcount);
>>                         spin_unlock(&pool->lock);
>>                         z3fold_page_lock(zhdr);
>>                         /*
>> @@ -655,30 +641,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                                 goto next;
>>                 }
>>  next:
>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>> +               if (test_bit(PAGE_HEADLESS, &page->private)) {
>> +                       if (ret == 0) {
>> +                               free_z3fold_page(page);
>> +                               return 0;
>> +                       }
>> +               } else {
>> +                       int freed;
>>                         z3fold_page_lock(zhdr);
>> -               clear_bit(UNDER_RECLAIM, &page->private);
>> -               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>> -                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
>> -                    zhdr->middle_chunks == 0)) {
>> -                       /*
>> -                        * All buddies are now free, free the z3fold page and
>> -                        * return success.
>> -                        */
>> -                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
>> -                               z3fold_page_unlock(zhdr);
>> -                       free_z3fold_page(zhdr);
>> -                       atomic64_dec(&pool->pages_nr);
>> -                       return 0;
>> -               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>> -                       if (zhdr->first_chunks != 0 &&
>> -                           zhdr->last_chunks != 0 &&
>> -                           zhdr->middle_chunks != 0) {
>> -                               /* Full, add to buddied list */
>> -                               spin_lock(&pool->lock);
>> -                               list_add(&zhdr->buddy, &pool->buddied);
>> -                               spin_unlock(&pool->lock);
>> -                       } else {
>> +                       if ((zhdr->first_chunks || zhdr->last_chunks ||
>> +                            zhdr->middle_chunks) &&
>> +                           !(zhdr->first_chunks && zhdr->last_chunks &&
>> +                             zhdr->middle_chunks)) {
>>                                 z3fold_compact_page(zhdr);
>>                                 /* add to unbuddied list */
>>                                 spin_lock(&pool->lock);
>> @@ -687,10 +661,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>                                          &pool->unbuddied[freechunks]);
>>                                 spin_unlock(&pool->lock);
>>                         }
>> -               }
>> -
>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>                         z3fold_page_unlock(zhdr);
>> +                       spin_lock(&pool->lock);
>> +                       freed = kref_put(&zhdr->refcount, release_z3fold_page);
>> +                       spin_unlock(&pool->lock);
>> +                       if (freed) {
>> +                               atomic64_dec(&pool->pages_nr);
>> +                               return 0;
>> +                       }
>
> you still can't do this - put the kref and then add it back to the lru
> later.  freed here is only useful for knowing that it was freed - you
> can't assume that !freed means it's still valid for you to use.

Oh right, thanks. I can however do something like
...
                        spin_lock(&pool->lock);
                        if (kref_put(&zhdr->refcount, release_z3fold_page)) {
                                atomic64_dec(&pool->pages_nr);
                                spin_unlock(&pool->lock);
                                return 0;
                        }
                }

                /* add to beginning of LRU */
                list_add(&page->lru, &pool->lru);
        }
...

provided that I take the lock for the headless case above. That will
work won't it?

~vitaly

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 17:27     ` Vitaly Wool
@ 2017-01-11 17:39       ` Dan Streetman
  2017-01-11 17:51         ` Vitaly Wool
  0 siblings, 1 reply; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 17:39 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 12:27 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
> On Wed, Jan 11, 2017 at 6:08 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>> On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>>> With both coming and already present locking optimizations,
>>> introducing kref to reference-count z3fold objects is the right
>>> thing to do. Moreover, it makes buddied list no longer necessary,
>>> and allows for a simpler handling of headless pages.
>>>
>>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>>> ---
>>>  mm/z3fold.c | 145 ++++++++++++++++++++++++++----------------------------------
>>>  1 file changed, 62 insertions(+), 83 deletions(-)
>>>
>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>> index 4325bde..59cb14f 100644
>>> --- a/mm/z3fold.c
>>> +++ b/mm/z3fold.c
>>> @@ -52,6 +52,7 @@ enum buddy {
>>>   *                     z3fold page, except for HEADLESS pages
>>>   * @buddy:     links the z3fold page into the relevant list in the pool
>>>   * @page_lock:         per-page lock
>>> + * @refcount:          reference cound for the z3fold page
>>>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>>>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>>>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
>>> @@ -60,6 +61,7 @@ enum buddy {
>>>  struct z3fold_header {
>>>         struct list_head buddy;
>>>         spinlock_t page_lock;
>>> +       struct kref refcount;
>>>         unsigned short first_chunks;
>>>         unsigned short middle_chunks;
>>>         unsigned short last_chunks;
>>> @@ -95,8 +97,6 @@ struct z3fold_header {
>>>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
>>>   *             the lists each z3fold page is added to depends on the size of
>>>   *             its free region.
>>> - * @buddied:   list tracking the z3fold pages that contain 3 buddies;
>>> - *             these z3fold pages are full
>>>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>>>   *             added buddy.
>>>   * @pages_nr:  number of z3fold pages in the pool.
>>> @@ -109,7 +109,6 @@ struct z3fold_header {
>>>  struct z3fold_pool {
>>>         spinlock_t lock;
>>>         struct list_head unbuddied[NCHUNKS];
>>> -       struct list_head buddied;
>>>         struct list_head lru;
>>>         atomic64_t pages_nr;
>>>         const struct z3fold_ops *ops;
>>> @@ -121,8 +120,7 @@ struct z3fold_pool {
>>>   * Internal z3fold page flags
>>>   */
>>>  enum z3fold_page_flags {
>>> -       UNDER_RECLAIM = 0,
>>> -       PAGE_HEADLESS,
>>> +       PAGE_HEADLESS = 0,
>>>         MIDDLE_CHUNK_MAPPED,
>>>  };
>>>
>>> @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>>         struct z3fold_header *zhdr = page_address(page);
>>>
>>>         INIT_LIST_HEAD(&page->lru);
>>> -       clear_bit(UNDER_RECLAIM, &page->private);
>>>         clear_bit(PAGE_HEADLESS, &page->private);
>>>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>>>
>>>         spin_lock_init(&zhdr->page_lock);
>>> +       kref_init(&zhdr->refcount);
>>>         zhdr->first_chunks = 0;
>>>         zhdr->middle_chunks = 0;
>>>         zhdr->last_chunks = 0;
>>> @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>>  }
>>>
>>>  /* Resets the struct page fields and frees the page */
>>> -static void free_z3fold_page(struct z3fold_header *zhdr)
>>> +static void free_z3fold_page(struct page *page)
>>>  {
>>> -       __free_page(virt_to_page(zhdr));
>>> +       __free_page(page);
>>> +}
>>> +
>>> +static void release_z3fold_page(struct kref *ref)
>>> +{
>>> +       struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
>>> +                                               refcount);
>>> +       struct page *page = virt_to_page(zhdr);
>>> +       if (!list_empty(&zhdr->buddy))
>>> +               list_del(&zhdr->buddy);
>>> +       if (!list_empty(&page->lru))
>>> +               list_del(&page->lru);
>>> +       free_z3fold_page(page);
>>>  }
>>>
>>>  /* Lock a z3fold page */
>>> @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>>         spin_lock_init(&pool->lock);
>>>         for_each_unbuddied_list(i, 0)
>>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>>> -       INIT_LIST_HEAD(&pool->buddied);
>>>         INIT_LIST_HEAD(&pool->lru);
>>>         atomic64_set(&pool->pages_nr, 0);
>>>         pool->ops = ops;
>>> @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>                                 spin_unlock(&pool->lock);
>>>                                 continue;
>>>                         }
>>> +                       kref_get(&zhdr->refcount);
>>>                         list_del_init(&zhdr->buddy);
>>>                         spin_unlock(&pool->lock);
>>>
>>> @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>                         else if (zhdr->middle_chunks == 0)
>>>                                 bud = MIDDLE;
>>>                         else {
>>> +                               z3fold_page_unlock(zhdr);
>>>                                 spin_lock(&pool->lock);
>>> -                               list_add(&zhdr->buddy, &pool->buddied);
>>> +                               if (kref_put(&zhdr->refcount,
>>> +                                            release_z3fold_page))
>>> +                                       atomic64_dec(&pool->pages_nr);
>>>                                 spin_unlock(&pool->lock);
>>> -                               z3fold_page_unlock(zhdr);
>>>                                 pr_err("No free chunks in unbuddied\n");
>>>                                 WARN_ON(1);
>>>                                 continue;
>>> @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>                 /* Add to unbuddied list */
>>>                 freechunks = num_free_chunks(zhdr);
>>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> -       } else {
>>> -               /* Add to buddied list */
>>> -               list_add(&zhdr->buddy, &pool->buddied);
>>>         }
>>>
>>>  headless:
>>> @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>>                 }
>>>         }
>>>
>>> -       if (test_bit(UNDER_RECLAIM, &page->private)) {
>>> -               /* z3fold page is under reclaim, reclaim will free */
>>> -               if (bud != HEADLESS)
>>> -                       z3fold_page_unlock(zhdr);
>>> -               return;
>>> -       }
>>> -
>>> -       /* Remove from existing buddy list */
>>> -       if (bud != HEADLESS) {
>>> -               spin_lock(&pool->lock);
>>> -               /*
>>> -                * this object may have been removed from its list by
>>> -                * z3fold_alloc(). In that case we just do nothing,
>>> -                * z3fold_alloc() will allocate an object and add the page
>>> -                * to the relevant list.
>>> -                */
>>> -               if (!list_empty(&zhdr->buddy)) {
>>> -                       list_del(&zhdr->buddy);
>>> -               } else {
>>> -                       spin_unlock(&pool->lock);
>>> -                       z3fold_page_unlock(zhdr);
>>> -                       return;
>>> -               }
>>> -               spin_unlock(&pool->lock);
>>> -       }
>>> -
>>> -       if (bud == HEADLESS ||
>>> -           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
>>> -                       zhdr->last_chunks == 0)) {
>>> -               /* z3fold page is empty, free */
>>> +       if (bud == HEADLESS) {
>>>                 spin_lock(&pool->lock);
>>>                 list_del(&page->lru);
>>>                 spin_unlock(&pool->lock);
>>> -               clear_bit(PAGE_HEADLESS, &page->private);
>>> -               if (bud != HEADLESS)
>>> -                       z3fold_page_unlock(zhdr);
>>> -               free_z3fold_page(zhdr);
>>> +               free_z3fold_page(page);
>>>                 atomic64_dec(&pool->pages_nr);
>>>         } else {
>>> -               z3fold_compact_page(zhdr);
>>> -               /* Add to the unbuddied list */
>>> +               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
>>> +                   zhdr->last_chunks != 0) {
>>> +                       z3fold_compact_page(zhdr);
>>> +                       /* Add to the unbuddied list */
>>> +                       spin_lock(&pool->lock);
>>> +                       if (!list_empty(&zhdr->buddy))
>>> +                               list_del(&zhdr->buddy);
>>> +                       freechunks = num_free_chunks(zhdr);
>>> +                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> +                       spin_unlock(&pool->lock);
>>> +               }
>>> +               z3fold_page_unlock(zhdr);
>>>                 spin_lock(&pool->lock);
>>> -               freechunks = num_free_chunks(zhdr);
>>> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>> +               if (kref_put(&zhdr->refcount, release_z3fold_page))
>>> +                       atomic64_dec(&pool->pages_nr);
>>>                 spin_unlock(&pool->lock);
>>> -               z3fold_page_unlock(zhdr);
>>>         }
>>>
>>>  }
>>> @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>                         return -EINVAL;
>>>                 }
>>>                 page = list_last_entry(&pool->lru, struct page, lru);
>>> -               list_del(&page->lru);
>>> +               list_del_init(&page->lru);
>>>
>>> -               /* Protect z3fold page against free */
>>> -               set_bit(UNDER_RECLAIM, &page->private);
>>>                 zhdr = page_address(page);
>>>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>> -                       list_del(&zhdr->buddy);
>>> +                       if (!list_empty(&zhdr->buddy))
>>> +                               list_del_init(&zhdr->buddy);
>>> +                       kref_get(&zhdr->refcount);
>>>                         spin_unlock(&pool->lock);
>>>                         z3fold_page_lock(zhdr);
>>>                         /*
>>> @@ -655,30 +641,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>                                 goto next;
>>>                 }
>>>  next:
>>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>> +               if (test_bit(PAGE_HEADLESS, &page->private)) {
>>> +                       if (ret == 0) {
>>> +                               free_z3fold_page(page);
>>> +                               return 0;
>>> +                       }
>>> +               } else {
>>> +                       int freed;
>>>                         z3fold_page_lock(zhdr);
>>> -               clear_bit(UNDER_RECLAIM, &page->private);
>>> -               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>>> -                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
>>> -                    zhdr->middle_chunks == 0)) {
>>> -                       /*
>>> -                        * All buddies are now free, free the z3fold page and
>>> -                        * return success.
>>> -                        */
>>> -                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
>>> -                               z3fold_page_unlock(zhdr);
>>> -                       free_z3fold_page(zhdr);
>>> -                       atomic64_dec(&pool->pages_nr);
>>> -                       return 0;
>>> -               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>> -                       if (zhdr->first_chunks != 0 &&
>>> -                           zhdr->last_chunks != 0 &&
>>> -                           zhdr->middle_chunks != 0) {
>>> -                               /* Full, add to buddied list */
>>> -                               spin_lock(&pool->lock);
>>> -                               list_add(&zhdr->buddy, &pool->buddied);
>>> -                               spin_unlock(&pool->lock);
>>> -                       } else {
>>> +                       if ((zhdr->first_chunks || zhdr->last_chunks ||
>>> +                            zhdr->middle_chunks) &&
>>> +                           !(zhdr->first_chunks && zhdr->last_chunks &&
>>> +                             zhdr->middle_chunks)) {
>>>                                 z3fold_compact_page(zhdr);
>>>                                 /* add to unbuddied list */
>>>                                 spin_lock(&pool->lock);
>>> @@ -687,10 +661,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>                                          &pool->unbuddied[freechunks]);
>>>                                 spin_unlock(&pool->lock);
>>>                         }
>>> -               }
>>> -
>>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>>                         z3fold_page_unlock(zhdr);
>>> +                       spin_lock(&pool->lock);
>>> +                       freed = kref_put(&zhdr->refcount, release_z3fold_page);
>>> +                       spin_unlock(&pool->lock);
>>> +                       if (freed) {
>>> +                               atomic64_dec(&pool->pages_nr);
>>> +                               return 0;
>>> +                       }
>>
>> you still can't do this - put the kref and then add it back to the lru
>> later.  freed here is only useful for knowing that it was freed - you
>> can't assume that !freed means it's still valid for you to use.
>
> Oh right, thanks. I can however do something like
> ...
>                         spin_lock(&pool->lock);
>                         if (kref_put(&zhdr->refcount, release_z3fold_page)) {
>                                 atomic64_dec(&pool->pages_nr);
>                                 spin_unlock(&pool->lock);
>                                 return 0;
>                         }
>                 }
>
>                 /* add to beginning of LRU */
>                 list_add(&page->lru, &pool->lru);
>         }
> ...
>
> provided that I take the lock for the headless case above. That will
> work won't it?

in this specific case - since every single kref_put in the driver is
protected by the pool lock - yeah, you can do that, since you know
that specific kref_put didn't free the page and no other kref_put
could happen since you're holding the pool lock.

but that specific requirement isn't made obvious by the code, and I
can see how a future patch could release the pool lock between the
kref_put and lru list add, without realizing that introduces a bug.
isn't it better to just add it to the lru list before you put the
kref?  just a suggestion.

i'd also suggest the pages_nr dec go into the page release function,
instead of checking every kref_put return value; that's easier and
less prone to forgetting to check one of the kref_puts.

>
> ~vitaly

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 17:39       ` Dan Streetman
@ 2017-01-11 17:51         ` Vitaly Wool
  2017-01-11 18:03           ` Dan Streetman
  0 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 17:51 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 6:39 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Wed, Jan 11, 2017 at 12:27 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> On Wed, Jan 11, 2017 at 6:08 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>>> On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>>>> With both coming and already present locking optimizations,
>>>> introducing kref to reference-count z3fold objects is the right
>>>> thing to do. Moreover, it makes buddied list no longer necessary,
>>>> and allows for a simpler handling of headless pages.
>>>>
>>>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>>>> ---
>>>>  mm/z3fold.c | 145 ++++++++++++++++++++++++++----------------------------------
>>>>  1 file changed, 62 insertions(+), 83 deletions(-)
>>>>
>>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>>> index 4325bde..59cb14f 100644
>>>> --- a/mm/z3fold.c
>>>> +++ b/mm/z3fold.c
>>>> @@ -52,6 +52,7 @@ enum buddy {
>>>>   *                     z3fold page, except for HEADLESS pages
>>>>   * @buddy:     links the z3fold page into the relevant list in the pool
>>>>   * @page_lock:         per-page lock
>>>> + * @refcount:          reference cound for the z3fold page
>>>>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>>>>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>>>>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
>>>> @@ -60,6 +61,7 @@ enum buddy {
>>>>  struct z3fold_header {
>>>>         struct list_head buddy;
>>>>         spinlock_t page_lock;
>>>> +       struct kref refcount;
>>>>         unsigned short first_chunks;
>>>>         unsigned short middle_chunks;
>>>>         unsigned short last_chunks;
>>>> @@ -95,8 +97,6 @@ struct z3fold_header {
>>>>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
>>>>   *             the lists each z3fold page is added to depends on the size of
>>>>   *             its free region.
>>>> - * @buddied:   list tracking the z3fold pages that contain 3 buddies;
>>>> - *             these z3fold pages are full
>>>>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>>>>   *             added buddy.
>>>>   * @pages_nr:  number of z3fold pages in the pool.
>>>> @@ -109,7 +109,6 @@ struct z3fold_header {
>>>>  struct z3fold_pool {
>>>>         spinlock_t lock;
>>>>         struct list_head unbuddied[NCHUNKS];
>>>> -       struct list_head buddied;
>>>>         struct list_head lru;
>>>>         atomic64_t pages_nr;
>>>>         const struct z3fold_ops *ops;
>>>> @@ -121,8 +120,7 @@ struct z3fold_pool {
>>>>   * Internal z3fold page flags
>>>>   */
>>>>  enum z3fold_page_flags {
>>>> -       UNDER_RECLAIM = 0,
>>>> -       PAGE_HEADLESS,
>>>> +       PAGE_HEADLESS = 0,
>>>>         MIDDLE_CHUNK_MAPPED,
>>>>  };
>>>>
>>>> @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>>>         struct z3fold_header *zhdr = page_address(page);
>>>>
>>>>         INIT_LIST_HEAD(&page->lru);
>>>> -       clear_bit(UNDER_RECLAIM, &page->private);
>>>>         clear_bit(PAGE_HEADLESS, &page->private);
>>>>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>>>>
>>>>         spin_lock_init(&zhdr->page_lock);
>>>> +       kref_init(&zhdr->refcount);
>>>>         zhdr->first_chunks = 0;
>>>>         zhdr->middle_chunks = 0;
>>>>         zhdr->last_chunks = 0;
>>>> @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>>>  }
>>>>
>>>>  /* Resets the struct page fields and frees the page */
>>>> -static void free_z3fold_page(struct z3fold_header *zhdr)
>>>> +static void free_z3fold_page(struct page *page)
>>>>  {
>>>> -       __free_page(virt_to_page(zhdr));
>>>> +       __free_page(page);
>>>> +}
>>>> +
>>>> +static void release_z3fold_page(struct kref *ref)
>>>> +{
>>>> +       struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
>>>> +                                               refcount);
>>>> +       struct page *page = virt_to_page(zhdr);
>>>> +       if (!list_empty(&zhdr->buddy))
>>>> +               list_del(&zhdr->buddy);
>>>> +       if (!list_empty(&page->lru))
>>>> +               list_del(&page->lru);
>>>> +       free_z3fold_page(page);
>>>>  }
>>>>
>>>>  /* Lock a z3fold page */
>>>> @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>>>         spin_lock_init(&pool->lock);
>>>>         for_each_unbuddied_list(i, 0)
>>>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>>>> -       INIT_LIST_HEAD(&pool->buddied);
>>>>         INIT_LIST_HEAD(&pool->lru);
>>>>         atomic64_set(&pool->pages_nr, 0);
>>>>         pool->ops = ops;
>>>> @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>>                                 spin_unlock(&pool->lock);
>>>>                                 continue;
>>>>                         }
>>>> +                       kref_get(&zhdr->refcount);
>>>>                         list_del_init(&zhdr->buddy);
>>>>                         spin_unlock(&pool->lock);
>>>>
>>>> @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>>                         else if (zhdr->middle_chunks == 0)
>>>>                                 bud = MIDDLE;
>>>>                         else {
>>>> +                               z3fold_page_unlock(zhdr);
>>>>                                 spin_lock(&pool->lock);
>>>> -                               list_add(&zhdr->buddy, &pool->buddied);
>>>> +                               if (kref_put(&zhdr->refcount,
>>>> +                                            release_z3fold_page))
>>>> +                                       atomic64_dec(&pool->pages_nr);
>>>>                                 spin_unlock(&pool->lock);
>>>> -                               z3fold_page_unlock(zhdr);
>>>>                                 pr_err("No free chunks in unbuddied\n");
>>>>                                 WARN_ON(1);
>>>>                                 continue;
>>>> @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>>                 /* Add to unbuddied list */
>>>>                 freechunks = num_free_chunks(zhdr);
>>>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>>> -       } else {
>>>> -               /* Add to buddied list */
>>>> -               list_add(&zhdr->buddy, &pool->buddied);
>>>>         }
>>>>
>>>>  headless:
>>>> @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>>>                 }
>>>>         }
>>>>
>>>> -       if (test_bit(UNDER_RECLAIM, &page->private)) {
>>>> -               /* z3fold page is under reclaim, reclaim will free */
>>>> -               if (bud != HEADLESS)
>>>> -                       z3fold_page_unlock(zhdr);
>>>> -               return;
>>>> -       }
>>>> -
>>>> -       /* Remove from existing buddy list */
>>>> -       if (bud != HEADLESS) {
>>>> -               spin_lock(&pool->lock);
>>>> -               /*
>>>> -                * this object may have been removed from its list by
>>>> -                * z3fold_alloc(). In that case we just do nothing,
>>>> -                * z3fold_alloc() will allocate an object and add the page
>>>> -                * to the relevant list.
>>>> -                */
>>>> -               if (!list_empty(&zhdr->buddy)) {
>>>> -                       list_del(&zhdr->buddy);
>>>> -               } else {
>>>> -                       spin_unlock(&pool->lock);
>>>> -                       z3fold_page_unlock(zhdr);
>>>> -                       return;
>>>> -               }
>>>> -               spin_unlock(&pool->lock);
>>>> -       }
>>>> -
>>>> -       if (bud == HEADLESS ||
>>>> -           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
>>>> -                       zhdr->last_chunks == 0)) {
>>>> -               /* z3fold page is empty, free */
>>>> +       if (bud == HEADLESS) {
>>>>                 spin_lock(&pool->lock);
>>>>                 list_del(&page->lru);
>>>>                 spin_unlock(&pool->lock);
>>>> -               clear_bit(PAGE_HEADLESS, &page->private);
>>>> -               if (bud != HEADLESS)
>>>> -                       z3fold_page_unlock(zhdr);
>>>> -               free_z3fold_page(zhdr);
>>>> +               free_z3fold_page(page);
>>>>                 atomic64_dec(&pool->pages_nr);
>>>>         } else {
>>>> -               z3fold_compact_page(zhdr);
>>>> -               /* Add to the unbuddied list */
>>>> +               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
>>>> +                   zhdr->last_chunks != 0) {
>>>> +                       z3fold_compact_page(zhdr);
>>>> +                       /* Add to the unbuddied list */
>>>> +                       spin_lock(&pool->lock);
>>>> +                       if (!list_empty(&zhdr->buddy))
>>>> +                               list_del(&zhdr->buddy);
>>>> +                       freechunks = num_free_chunks(zhdr);
>>>> +                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>>> +                       spin_unlock(&pool->lock);
>>>> +               }
>>>> +               z3fold_page_unlock(zhdr);
>>>>                 spin_lock(&pool->lock);
>>>> -               freechunks = num_free_chunks(zhdr);
>>>> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>>> +               if (kref_put(&zhdr->refcount, release_z3fold_page))
>>>> +                       atomic64_dec(&pool->pages_nr);
>>>>                 spin_unlock(&pool->lock);
>>>> -               z3fold_page_unlock(zhdr);
>>>>         }
>>>>
>>>>  }
>>>> @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>>                         return -EINVAL;
>>>>                 }
>>>>                 page = list_last_entry(&pool->lru, struct page, lru);
>>>> -               list_del(&page->lru);
>>>> +               list_del_init(&page->lru);
>>>>
>>>> -               /* Protect z3fold page against free */
>>>> -               set_bit(UNDER_RECLAIM, &page->private);
>>>>                 zhdr = page_address(page);
>>>>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>>> -                       list_del(&zhdr->buddy);
>>>> +                       if (!list_empty(&zhdr->buddy))
>>>> +                               list_del_init(&zhdr->buddy);
>>>> +                       kref_get(&zhdr->refcount);
>>>>                         spin_unlock(&pool->lock);
>>>>                         z3fold_page_lock(zhdr);
>>>>                         /*
>>>> @@ -655,30 +641,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>>                                 goto next;
>>>>                 }
>>>>  next:
>>>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>>> +               if (test_bit(PAGE_HEADLESS, &page->private)) {
>>>> +                       if (ret == 0) {
>>>> +                               free_z3fold_page(page);
>>>> +                               return 0;
>>>> +                       }
>>>> +               } else {
>>>> +                       int freed;
>>>>                         z3fold_page_lock(zhdr);
>>>> -               clear_bit(UNDER_RECLAIM, &page->private);
>>>> -               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>>>> -                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
>>>> -                    zhdr->middle_chunks == 0)) {
>>>> -                       /*
>>>> -                        * All buddies are now free, free the z3fold page and
>>>> -                        * return success.
>>>> -                        */
>>>> -                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
>>>> -                               z3fold_page_unlock(zhdr);
>>>> -                       free_z3fold_page(zhdr);
>>>> -                       atomic64_dec(&pool->pages_nr);
>>>> -                       return 0;
>>>> -               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>>> -                       if (zhdr->first_chunks != 0 &&
>>>> -                           zhdr->last_chunks != 0 &&
>>>> -                           zhdr->middle_chunks != 0) {
>>>> -                               /* Full, add to buddied list */
>>>> -                               spin_lock(&pool->lock);
>>>> -                               list_add(&zhdr->buddy, &pool->buddied);
>>>> -                               spin_unlock(&pool->lock);
>>>> -                       } else {
>>>> +                       if ((zhdr->first_chunks || zhdr->last_chunks ||
>>>> +                            zhdr->middle_chunks) &&
>>>> +                           !(zhdr->first_chunks && zhdr->last_chunks &&
>>>> +                             zhdr->middle_chunks)) {
>>>>                                 z3fold_compact_page(zhdr);
>>>>                                 /* add to unbuddied list */
>>>>                                 spin_lock(&pool->lock);
>>>> @@ -687,10 +661,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>>                                          &pool->unbuddied[freechunks]);
>>>>                                 spin_unlock(&pool->lock);
>>>>                         }
>>>> -               }
>>>> -
>>>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>>>                         z3fold_page_unlock(zhdr);
>>>> +                       spin_lock(&pool->lock);
>>>> +                       freed = kref_put(&zhdr->refcount, release_z3fold_page);
>>>> +                       spin_unlock(&pool->lock);
>>>> +                       if (freed) {
>>>> +                               atomic64_dec(&pool->pages_nr);
>>>> +                               return 0;
>>>> +                       }
>>>
>>> you still can't do this - put the kref and then add it back to the lru
>>> later.  freed here is only useful for knowing that it was freed - you
>>> can't assume that !freed means it's still valid for you to use.
>>
>> Oh right, thanks. I can however do something like
>> ...
>>                         spin_lock(&pool->lock);
>>                         if (kref_put(&zhdr->refcount, release_z3fold_page)) {
>>                                 atomic64_dec(&pool->pages_nr);
>>                                 spin_unlock(&pool->lock);
>>                                 return 0;
>>                         }
>>                 }
>>
>>                 /* add to beginning of LRU */
>>                 list_add(&page->lru, &pool->lru);
>>         }
>> ...
>>
>> provided that I take the lock for the headless case above. That will
>> work won't it?
>
> in this specific case - since every single kref_put in the driver is
> protected by the pool lock - yeah, you can do that, since you know
> that specific kref_put didn't free the page and no other kref_put
> could happen since you're holding the pool lock.
>
> but that specific requirement isn't made obvious by the code, and I
> can see how a future patch could release the pool lock between the
> kref_put and lru list add, without realizing that introduces a bug.
> isn't it better to just add it to the lru list before you put the
> kref?  just a suggestion.

That would require to add it to the lru separately for headless pages
above, I don't think it makes things better or less error prone.
I would rather add a comment before list_add.

> i'd also suggest the pages_nr dec go into the page release function,
> instead of checking every kref_put return value; that's easier and
> less prone to forgetting to check one of the kref_puts.

That will mean z3fold_header should contain a pointer to its pool. I'm
not sure it is worth it but I can do that :)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 17:51         ` Vitaly Wool
@ 2017-01-11 18:03           ` Dan Streetman
  2017-01-11 20:51             ` Vitaly Wool
  0 siblings, 1 reply; 20+ messages in thread
From: Dan Streetman @ 2017-01-11 18:03 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 12:51 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
> On Wed, Jan 11, 2017 at 6:39 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>> On Wed, Jan 11, 2017 at 12:27 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
>>> On Wed, Jan 11, 2017 at 6:08 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>>>> On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>>>>> With both coming and already present locking optimizations,
>>>>> introducing kref to reference-count z3fold objects is the right
>>>>> thing to do. Moreover, it makes buddied list no longer necessary,
>>>>> and allows for a simpler handling of headless pages.
>>>>>
>>>>> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>>>>> ---
>>>>>  mm/z3fold.c | 145 ++++++++++++++++++++++++++----------------------------------
>>>>>  1 file changed, 62 insertions(+), 83 deletions(-)
>>>>>
>>>>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>>>>> index 4325bde..59cb14f 100644
>>>>> --- a/mm/z3fold.c
>>>>> +++ b/mm/z3fold.c
>>>>> @@ -52,6 +52,7 @@ enum buddy {
>>>>>   *                     z3fold page, except for HEADLESS pages
>>>>>   * @buddy:     links the z3fold page into the relevant list in the pool
>>>>>   * @page_lock:         per-page lock
>>>>> + * @refcount:          reference cound for the z3fold page
>>>>>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>>>>>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>>>>>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
>>>>> @@ -60,6 +61,7 @@ enum buddy {
>>>>>  struct z3fold_header {
>>>>>         struct list_head buddy;
>>>>>         spinlock_t page_lock;
>>>>> +       struct kref refcount;
>>>>>         unsigned short first_chunks;
>>>>>         unsigned short middle_chunks;
>>>>>         unsigned short last_chunks;
>>>>> @@ -95,8 +97,6 @@ struct z3fold_header {
>>>>>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
>>>>>   *             the lists each z3fold page is added to depends on the size of
>>>>>   *             its free region.
>>>>> - * @buddied:   list tracking the z3fold pages that contain 3 buddies;
>>>>> - *             these z3fold pages are full
>>>>>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>>>>>   *             added buddy.
>>>>>   * @pages_nr:  number of z3fold pages in the pool.
>>>>> @@ -109,7 +109,6 @@ struct z3fold_header {
>>>>>  struct z3fold_pool {
>>>>>         spinlock_t lock;
>>>>>         struct list_head unbuddied[NCHUNKS];
>>>>> -       struct list_head buddied;
>>>>>         struct list_head lru;
>>>>>         atomic64_t pages_nr;
>>>>>         const struct z3fold_ops *ops;
>>>>> @@ -121,8 +120,7 @@ struct z3fold_pool {
>>>>>   * Internal z3fold page flags
>>>>>   */
>>>>>  enum z3fold_page_flags {
>>>>> -       UNDER_RECLAIM = 0,
>>>>> -       PAGE_HEADLESS,
>>>>> +       PAGE_HEADLESS = 0,
>>>>>         MIDDLE_CHUNK_MAPPED,
>>>>>  };
>>>>>
>>>>> @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>>>>         struct z3fold_header *zhdr = page_address(page);
>>>>>
>>>>>         INIT_LIST_HEAD(&page->lru);
>>>>> -       clear_bit(UNDER_RECLAIM, &page->private);
>>>>>         clear_bit(PAGE_HEADLESS, &page->private);
>>>>>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>>>>>
>>>>>         spin_lock_init(&zhdr->page_lock);
>>>>> +       kref_init(&zhdr->refcount);
>>>>>         zhdr->first_chunks = 0;
>>>>>         zhdr->middle_chunks = 0;
>>>>>         zhdr->last_chunks = 0;
>>>>> @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>>>>>  }
>>>>>
>>>>>  /* Resets the struct page fields and frees the page */
>>>>> -static void free_z3fold_page(struct z3fold_header *zhdr)
>>>>> +static void free_z3fold_page(struct page *page)
>>>>>  {
>>>>> -       __free_page(virt_to_page(zhdr));
>>>>> +       __free_page(page);
>>>>> +}
>>>>> +
>>>>> +static void release_z3fold_page(struct kref *ref)
>>>>> +{
>>>>> +       struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
>>>>> +                                               refcount);
>>>>> +       struct page *page = virt_to_page(zhdr);
>>>>> +       if (!list_empty(&zhdr->buddy))
>>>>> +               list_del(&zhdr->buddy);
>>>>> +       if (!list_empty(&page->lru))
>>>>> +               list_del(&page->lru);
>>>>> +       free_z3fold_page(page);
>>>>>  }
>>>>>
>>>>>  /* Lock a z3fold page */
>>>>> @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>>>>>         spin_lock_init(&pool->lock);
>>>>>         for_each_unbuddied_list(i, 0)
>>>>>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
>>>>> -       INIT_LIST_HEAD(&pool->buddied);
>>>>>         INIT_LIST_HEAD(&pool->lru);
>>>>>         atomic64_set(&pool->pages_nr, 0);
>>>>>         pool->ops = ops;
>>>>> @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>>>                                 spin_unlock(&pool->lock);
>>>>>                                 continue;
>>>>>                         }
>>>>> +                       kref_get(&zhdr->refcount);
>>>>>                         list_del_init(&zhdr->buddy);
>>>>>                         spin_unlock(&pool->lock);
>>>>>
>>>>> @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>>>                         else if (zhdr->middle_chunks == 0)
>>>>>                                 bud = MIDDLE;
>>>>>                         else {
>>>>> +                               z3fold_page_unlock(zhdr);
>>>>>                                 spin_lock(&pool->lock);
>>>>> -                               list_add(&zhdr->buddy, &pool->buddied);
>>>>> +                               if (kref_put(&zhdr->refcount,
>>>>> +                                            release_z3fold_page))
>>>>> +                                       atomic64_dec(&pool->pages_nr);
>>>>>                                 spin_unlock(&pool->lock);
>>>>> -                               z3fold_page_unlock(zhdr);
>>>>>                                 pr_err("No free chunks in unbuddied\n");
>>>>>                                 WARN_ON(1);
>>>>>                                 continue;
>>>>> @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>>>>>                 /* Add to unbuddied list */
>>>>>                 freechunks = num_free_chunks(zhdr);
>>>>>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>>>> -       } else {
>>>>> -               /* Add to buddied list */
>>>>> -               list_add(&zhdr->buddy, &pool->buddied);
>>>>>         }
>>>>>
>>>>>  headless:
>>>>> @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>>>>>                 }
>>>>>         }
>>>>>
>>>>> -       if (test_bit(UNDER_RECLAIM, &page->private)) {
>>>>> -               /* z3fold page is under reclaim, reclaim will free */
>>>>> -               if (bud != HEADLESS)
>>>>> -                       z3fold_page_unlock(zhdr);
>>>>> -               return;
>>>>> -       }
>>>>> -
>>>>> -       /* Remove from existing buddy list */
>>>>> -       if (bud != HEADLESS) {
>>>>> -               spin_lock(&pool->lock);
>>>>> -               /*
>>>>> -                * this object may have been removed from its list by
>>>>> -                * z3fold_alloc(). In that case we just do nothing,
>>>>> -                * z3fold_alloc() will allocate an object and add the page
>>>>> -                * to the relevant list.
>>>>> -                */
>>>>> -               if (!list_empty(&zhdr->buddy)) {
>>>>> -                       list_del(&zhdr->buddy);
>>>>> -               } else {
>>>>> -                       spin_unlock(&pool->lock);
>>>>> -                       z3fold_page_unlock(zhdr);
>>>>> -                       return;
>>>>> -               }
>>>>> -               spin_unlock(&pool->lock);
>>>>> -       }
>>>>> -
>>>>> -       if (bud == HEADLESS ||
>>>>> -           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
>>>>> -                       zhdr->last_chunks == 0)) {
>>>>> -               /* z3fold page is empty, free */
>>>>> +       if (bud == HEADLESS) {
>>>>>                 spin_lock(&pool->lock);
>>>>>                 list_del(&page->lru);
>>>>>                 spin_unlock(&pool->lock);
>>>>> -               clear_bit(PAGE_HEADLESS, &page->private);
>>>>> -               if (bud != HEADLESS)
>>>>> -                       z3fold_page_unlock(zhdr);
>>>>> -               free_z3fold_page(zhdr);
>>>>> +               free_z3fold_page(page);
>>>>>                 atomic64_dec(&pool->pages_nr);
>>>>>         } else {
>>>>> -               z3fold_compact_page(zhdr);
>>>>> -               /* Add to the unbuddied list */
>>>>> +               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
>>>>> +                   zhdr->last_chunks != 0) {
>>>>> +                       z3fold_compact_page(zhdr);
>>>>> +                       /* Add to the unbuddied list */
>>>>> +                       spin_lock(&pool->lock);
>>>>> +                       if (!list_empty(&zhdr->buddy))
>>>>> +                               list_del(&zhdr->buddy);
>>>>> +                       freechunks = num_free_chunks(zhdr);
>>>>> +                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>>>> +                       spin_unlock(&pool->lock);
>>>>> +               }
>>>>> +               z3fold_page_unlock(zhdr);
>>>>>                 spin_lock(&pool->lock);
>>>>> -               freechunks = num_free_chunks(zhdr);
>>>>> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
>>>>> +               if (kref_put(&zhdr->refcount, release_z3fold_page))
>>>>> +                       atomic64_dec(&pool->pages_nr);
>>>>>                 spin_unlock(&pool->lock);
>>>>> -               z3fold_page_unlock(zhdr);
>>>>>         }
>>>>>
>>>>>  }
>>>>> @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>>>                         return -EINVAL;
>>>>>                 }
>>>>>                 page = list_last_entry(&pool->lru, struct page, lru);
>>>>> -               list_del(&page->lru);
>>>>> +               list_del_init(&page->lru);
>>>>>
>>>>> -               /* Protect z3fold page against free */
>>>>> -               set_bit(UNDER_RECLAIM, &page->private);
>>>>>                 zhdr = page_address(page);
>>>>>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>>>> -                       list_del(&zhdr->buddy);
>>>>> +                       if (!list_empty(&zhdr->buddy))
>>>>> +                               list_del_init(&zhdr->buddy);
>>>>> +                       kref_get(&zhdr->refcount);
>>>>>                         spin_unlock(&pool->lock);
>>>>>                         z3fold_page_lock(zhdr);
>>>>>                         /*
>>>>> @@ -655,30 +641,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>>>                                 goto next;
>>>>>                 }
>>>>>  next:
>>>>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>>>> +               if (test_bit(PAGE_HEADLESS, &page->private)) {
>>>>> +                       if (ret == 0) {
>>>>> +                               free_z3fold_page(page);
>>>>> +                               return 0;
>>>>> +                       }
>>>>> +               } else {
>>>>> +                       int freed;
>>>>>                         z3fold_page_lock(zhdr);
>>>>> -               clear_bit(UNDER_RECLAIM, &page->private);
>>>>> -               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
>>>>> -                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
>>>>> -                    zhdr->middle_chunks == 0)) {
>>>>> -                       /*
>>>>> -                        * All buddies are now free, free the z3fold page and
>>>>> -                        * return success.
>>>>> -                        */
>>>>> -                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
>>>>> -                               z3fold_page_unlock(zhdr);
>>>>> -                       free_z3fold_page(zhdr);
>>>>> -                       atomic64_dec(&pool->pages_nr);
>>>>> -                       return 0;
>>>>> -               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
>>>>> -                       if (zhdr->first_chunks != 0 &&
>>>>> -                           zhdr->last_chunks != 0 &&
>>>>> -                           zhdr->middle_chunks != 0) {
>>>>> -                               /* Full, add to buddied list */
>>>>> -                               spin_lock(&pool->lock);
>>>>> -                               list_add(&zhdr->buddy, &pool->buddied);
>>>>> -                               spin_unlock(&pool->lock);
>>>>> -                       } else {
>>>>> +                       if ((zhdr->first_chunks || zhdr->last_chunks ||
>>>>> +                            zhdr->middle_chunks) &&
>>>>> +                           !(zhdr->first_chunks && zhdr->last_chunks &&
>>>>> +                             zhdr->middle_chunks)) {
>>>>>                                 z3fold_compact_page(zhdr);
>>>>>                                 /* add to unbuddied list */
>>>>>                                 spin_lock(&pool->lock);
>>>>> @@ -687,10 +661,15 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>>>>>                                          &pool->unbuddied[freechunks]);
>>>>>                                 spin_unlock(&pool->lock);
>>>>>                         }
>>>>> -               }
>>>>> -
>>>>> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>>>>>                         z3fold_page_unlock(zhdr);
>>>>> +                       spin_lock(&pool->lock);
>>>>> +                       freed = kref_put(&zhdr->refcount, release_z3fold_page);
>>>>> +                       spin_unlock(&pool->lock);
>>>>> +                       if (freed) {
>>>>> +                               atomic64_dec(&pool->pages_nr);
>>>>> +                               return 0;
>>>>> +                       }
>>>>
>>>> you still can't do this - put the kref and then add it back to the lru
>>>> later.  freed here is only useful for knowing that it was freed - you
>>>> can't assume that !freed means it's still valid for you to use.
>>>
>>> Oh right, thanks. I can however do something like
>>> ...
>>>                         spin_lock(&pool->lock);
>>>                         if (kref_put(&zhdr->refcount, release_z3fold_page)) {
>>>                                 atomic64_dec(&pool->pages_nr);
>>>                                 spin_unlock(&pool->lock);
>>>                                 return 0;
>>>                         }
>>>                 }
>>>
>>>                 /* add to beginning of LRU */
>>>                 list_add(&page->lru, &pool->lru);
>>>         }
>>> ...
>>>
>>> provided that I take the lock for the headless case above. That will
>>> work won't it?
>>
>> in this specific case - since every single kref_put in the driver is
>> protected by the pool lock - yeah, you can do that, since you know
>> that specific kref_put didn't free the page and no other kref_put
>> could happen since you're holding the pool lock.
>>
>> but that specific requirement isn't made obvious by the code, and I
>> can see how a future patch could release the pool lock between the
>> kref_put and lru list add, without realizing that introduces a bug.
>> isn't it better to just add it to the lru list before you put the
>> kref?  just a suggestion.
>
> That would require to add it to the lru separately for headless pages
> above, I don't think it makes things better or less error prone.
> I would rather add a comment before list_add.
>
>> i'd also suggest the pages_nr dec go into the page release function,
>> instead of checking every kref_put return value; that's easier and
>> less prone to forgetting to check one of the kref_puts.
>
> That will mean z3fold_header should contain a pointer to its pool. I'm
> not sure it is worth it but I can do that :)

the header's already rounded up to chunk size, so if there's room then
it won't take any extra memory.  but it works either way.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 18:03           ` Dan Streetman
@ 2017-01-11 20:51             ` Vitaly Wool
  2017-01-12 17:49               ` Dan Streetman
  0 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 20:51 UTC (permalink / raw)
  To: Dan Streetman; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, 11 Jan 2017 13:03:06 -0500
Dan Streetman <ddstreet@ieee.org> wrote:

<snip>
> >>> provided that I take the lock for the headless case above. That will
> >>> work won't it?
> >>
> >> in this specific case - since every single kref_put in the driver is
> >> protected by the pool lock - yeah, you can do that, since you know
> >> that specific kref_put didn't free the page and no other kref_put
> >> could happen since you're holding the pool lock.
> >>
> >> but that specific requirement isn't made obvious by the code, and I
> >> can see how a future patch could release the pool lock between the
> >> kref_put and lru list add, without realizing that introduces a bug.
> >> isn't it better to just add it to the lru list before you put the
> >> kref?  just a suggestion.
> >
> > That would require to add it to the lru separately for headless pages
> > above, I don't think it makes things better or less error prone.
> > I would rather add a comment before list_add.
> >
> >> i'd also suggest the pages_nr dec go into the page release function,
> >> instead of checking every kref_put return value; that's easier and
> >> less prone to forgetting to check one of the kref_puts.
> >
> > That will mean z3fold_header should contain a pointer to its pool. I'm
> > not sure it is worth it but I can do that :)
> 
> the header's already rounded up to chunk size, so if there's room then
> it won't take any extra memory.  but it works either way.

So let's have it like this then:


With both coming and already present locking optimizations,
introducing kref to reference-count z3fold objects is the right
thing to do. Moreover, it makes buddied list no longer necessary,
and allows for a simpler handling of headless pages.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 151 ++++++++++++++++++++++++++----------------------------------
 1 file changed, 66 insertions(+), 85 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 4325bde..f0d2eaf 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -52,6 +52,7 @@ enum buddy {
  *			z3fold page, except for HEADLESS pages
  * @buddy:	links the z3fold page into the relevant list in the pool
  * @page_lock:		per-page lock
+ * @refcount:		reference cound for the z3fold page
  * @first_chunks:	the size of the first buddy in chunks, 0 if free
  * @middle_chunks:	the size of the middle buddy in chunks, 0 if free
  * @last_chunks:	the size of the last buddy in chunks, 0 if free
@@ -60,6 +61,7 @@ enum buddy {
 struct z3fold_header {
 	struct list_head buddy;
 	spinlock_t page_lock;
+	struct kref refcount;
 	unsigned short first_chunks;
 	unsigned short middle_chunks;
 	unsigned short last_chunks;
@@ -95,8 +97,6 @@ struct z3fold_header {
  * @unbuddied:	array of lists tracking z3fold pages that contain 2- buddies;
  *		the lists each z3fold page is added to depends on the size of
  *		its free region.
- * @buddied:	list tracking the z3fold pages that contain 3 buddies;
- *		these z3fold pages are full
  * @lru:	list tracking the z3fold pages in LRU order by most recently
  *		added buddy.
  * @pages_nr:	number of z3fold pages in the pool.
@@ -109,7 +109,6 @@ struct z3fold_header {
 struct z3fold_pool {
 	spinlock_t lock;
 	struct list_head unbuddied[NCHUNKS];
-	struct list_head buddied;
 	struct list_head lru;
 	atomic64_t pages_nr;
 	const struct z3fold_ops *ops;
@@ -121,8 +120,7 @@ struct z3fold_pool {
  * Internal z3fold page flags
  */
 enum z3fold_page_flags {
-	UNDER_RECLAIM = 0,
-	PAGE_HEADLESS,
+	PAGE_HEADLESS = 0,
 	MIDDLE_CHUNK_MAPPED,
 };
 
@@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 	struct z3fold_header *zhdr = page_address(page);
 
 	INIT_LIST_HEAD(&page->lru);
-	clear_bit(UNDER_RECLAIM, &page->private);
 	clear_bit(PAGE_HEADLESS, &page->private);
 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 
 	spin_lock_init(&zhdr->page_lock);
+	kref_init(&zhdr->refcount);
 	zhdr->first_chunks = 0;
 	zhdr->middle_chunks = 0;
 	zhdr->last_chunks = 0;
@@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
 }
 
 /* Resets the struct page fields and frees the page */
-static void free_z3fold_page(struct z3fold_header *zhdr)
+static void free_z3fold_page(struct page *page)
 {
-	__free_page(virt_to_page(zhdr));
+	__free_page(page);
+}
+
+static void release_z3fold_page(struct kref *ref)
+{
+	struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
+						refcount);
+	struct page *page = virt_to_page(zhdr);
+	if (!list_empty(&zhdr->buddy))
+		list_del(&zhdr->buddy);
+	if (!list_empty(&page->lru))
+		list_del(&page->lru);
+	free_z3fold_page(page);
 }
 
 /* Lock a z3fold page */
@@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 	spin_lock_init(&pool->lock);
 	for_each_unbuddied_list(i, 0)
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
-	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
 	atomic64_set(&pool->pages_nr, 0);
 	pool->ops = ops;
@@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 				spin_unlock(&pool->lock);
 				continue;
 			}
+			kref_get(&zhdr->refcount);
 			list_del_init(&zhdr->buddy);
 			spin_unlock(&pool->lock);
 
@@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 			else if (zhdr->middle_chunks == 0)
 				bud = MIDDLE;
 			else {
+				z3fold_page_unlock(zhdr);
 				spin_lock(&pool->lock);
-				list_add(&zhdr->buddy, &pool->buddied);
+				if (kref_put(&zhdr->refcount,
+					     release_z3fold_page))
+					atomic64_dec(&pool->pages_nr);
 				spin_unlock(&pool->lock);
-				z3fold_page_unlock(zhdr);
 				pr_err("No free chunks in unbuddied\n");
 				WARN_ON(1);
 				continue;
@@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 		/* Add to unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
-	} else {
-		/* Add to buddied list */
-		list_add(&zhdr->buddy, &pool->buddied);
 	}
 
 headless:
@@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		}
 	}
 
-	if (test_bit(UNDER_RECLAIM, &page->private)) {
-		/* z3fold page is under reclaim, reclaim will free */
-		if (bud != HEADLESS)
-			z3fold_page_unlock(zhdr);
-		return;
-	}
-
-	/* Remove from existing buddy list */
-	if (bud != HEADLESS) {
-		spin_lock(&pool->lock);
-		/*
-		 * this object may have been removed from its list by
-		 * z3fold_alloc(). In that case we just do nothing,
-		 * z3fold_alloc() will allocate an object and add the page
-		 * to the relevant list.
-		 */
-		if (!list_empty(&zhdr->buddy)) {
-			list_del(&zhdr->buddy);
-		} else {
-			spin_unlock(&pool->lock);
-			z3fold_page_unlock(zhdr);
-			return;
-		}
-		spin_unlock(&pool->lock);
-	}
-
-	if (bud == HEADLESS ||
-	    (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
-			zhdr->last_chunks == 0)) {
-		/* z3fold page is empty, free */
+	if (bud == HEADLESS) {
 		spin_lock(&pool->lock);
 		list_del(&page->lru);
 		spin_unlock(&pool->lock);
-		clear_bit(PAGE_HEADLESS, &page->private);
-		if (bud != HEADLESS)
-			z3fold_page_unlock(zhdr);
-		free_z3fold_page(zhdr);
+		free_z3fold_page(page);
 		atomic64_dec(&pool->pages_nr);
 	} else {
-		z3fold_compact_page(zhdr);
-		/* Add to the unbuddied list */
+		if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
+		    zhdr->last_chunks != 0) {
+			z3fold_compact_page(zhdr);
+			/* Add to the unbuddied list */
+			spin_lock(&pool->lock);
+			if (!list_empty(&zhdr->buddy))
+				list_del(&zhdr->buddy);
+			freechunks = num_free_chunks(zhdr);
+			list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+			spin_unlock(&pool->lock);
+		}
+		z3fold_page_unlock(zhdr);
 		spin_lock(&pool->lock);
-		freechunks = num_free_chunks(zhdr);
-		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		if (kref_put(&zhdr->refcount, release_z3fold_page))
+			atomic64_dec(&pool->pages_nr);
 		spin_unlock(&pool->lock);
-		z3fold_page_unlock(zhdr);
 	}
 
 }
@@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			return -EINVAL;
 		}
 		page = list_last_entry(&pool->lru, struct page, lru);
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 
-		/* Protect z3fold page against free */
-		set_bit(UNDER_RECLAIM, &page->private);
 		zhdr = page_address(page);
 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
-			list_del(&zhdr->buddy);
+			if (!list_empty(&zhdr->buddy))
+				list_del_init(&zhdr->buddy);
+			kref_get(&zhdr->refcount);
 			spin_unlock(&pool->lock);
 			z3fold_page_lock(zhdr);
 			/*
@@ -655,30 +641,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 				goto next;
 		}
 next:
-		if (!test_bit(PAGE_HEADLESS, &page->private))
-			z3fold_page_lock(zhdr);
-		clear_bit(UNDER_RECLAIM, &page->private);
-		if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
-		    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
-		     zhdr->middle_chunks == 0)) {
-			/*
-			 * All buddies are now free, free the z3fold page and
-			 * return success.
-			 */
-			if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
-				z3fold_page_unlock(zhdr);
-			free_z3fold_page(zhdr);
-			atomic64_dec(&pool->pages_nr);
-			return 0;
-		}  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
-			if (zhdr->first_chunks != 0 &&
-			    zhdr->last_chunks != 0 &&
-			    zhdr->middle_chunks != 0) {
-				/* Full, add to buddied list */
-				spin_lock(&pool->lock);
-				list_add(&zhdr->buddy, &pool->buddied);
-				spin_unlock(&pool->lock);
+		if (test_bit(PAGE_HEADLESS, &page->private)) {
+			if (ret == 0) {
+				free_z3fold_page(page);
+				return 0;
 			} else {
+				spin_lock(&pool->lock);
+			}
+		} else {
+			z3fold_page_lock(zhdr);
+			if ((zhdr->first_chunks || zhdr->last_chunks ||
+			     zhdr->middle_chunks) &&
+			    !(zhdr->first_chunks && zhdr->last_chunks &&
+			      zhdr->middle_chunks)) {
 				z3fold_compact_page(zhdr);
 				/* add to unbuddied list */
 				spin_lock(&pool->lock);
@@ -687,13 +662,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 					 &pool->unbuddied[freechunks]);
 				spin_unlock(&pool->lock);
 			}
-		}
-
-		if (!test_bit(PAGE_HEADLESS, &page->private))
 			z3fold_page_unlock(zhdr);
+			spin_lock(&pool->lock);
+			if (kref_put(&zhdr->refcount, release_z3fold_page))
+				atomic64_dec(&pool->pages_nr);
+				return 0;
+			}
+		}
 
-		spin_lock(&pool->lock);
-		/* add to beginning of LRU */
+		/*
+		 * Add to the beginning of LRU.
+		 * Pool lock has to be kept here to ensure the page has
+		 * not already been released
+		 */
 		list_add(&page->lru, &pool->lru);
 	}
 	spin_unlock(&pool->lock);
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 3/5] z3fold: extend compaction function
  2017-01-11 16:43     ` Vitaly Wool
@ 2017-01-11 20:52       ` Vitaly Wool
  2017-01-12 17:50         ` Dan Streetman
  0 siblings, 1 reply; 20+ messages in thread
From: Vitaly Wool @ 2017-01-11 20:52 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Dan Streetman, Linux-MM, linux-kernel, Andrew Morton

On Wed, 11 Jan 2017 17:43:13 +0100
Vitaly Wool <vitalywool@gmail.com> wrote:

> On Wed, Jan 11, 2017 at 5:28 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> > On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
> >> z3fold_compact_page() currently only handles the situation when
> >> there's a single middle chunk within the z3fold page. However it
> >> may be worth it to move middle chunk closer to either first or
> >> last chunk, whichever is there, if the gap between them is big
> >> enough.
> >>
> >> This patch adds the relevant code, using BIG_CHUNK_GAP define as
> >> a threshold for middle chunk to be worth moving.
> >>
> >> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> >> ---
> >>  mm/z3fold.c | 26 +++++++++++++++++++++++++-
> >>  1 file changed, 25 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/mm/z3fold.c b/mm/z3fold.c
> >> index 98ab01f..fca3310 100644
> >> --- a/mm/z3fold.c
> >> +++ b/mm/z3fold.c
> >> @@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
> >>                        zhdr->middle_chunks << CHUNK_SHIFT);
> >>  }
> >>
> >> +#define BIG_CHUNK_GAP  3
> >>  /* Has to be called with lock held */
> >>  static int z3fold_compact_page(struct z3fold_header *zhdr)
> >>  {
> >> @@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
> >>                 zhdr->middle_chunks = 0;
> >>                 zhdr->start_middle = 0;
> >>                 zhdr->first_num++;
> >> +               return 1;
> >>         }
> >> -       return 1;
> >> +
> >> +       /*
> >> +        * moving data is expensive, so let's only do that if
> >> +        * there's substantial gain (at least BIG_CHUNK_GAP chunks)
> >> +        */
> >> +       if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
> >> +           zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
> >> +                       BIG_CHUNK_GAP) {
> >> +               mchunk_memmove(zhdr, zhdr->first_chunks + 1);
> >> +               zhdr->start_middle = zhdr->first_chunks + 1;
> >
> > this should be first_chunks + ZHDR_CHUNKS, not + 1.
> >
> >> +               return 1;
> >> +       } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
> >> +                  TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
> >> +                                       + zhdr->middle_chunks) >=
> >> +                       BIG_CHUNK_GAP) {
> >> +               unsigned short new_start = NCHUNKS - zhdr->last_chunks -
> >
> > this should be TOTAL_CHUNKS, not NCHUNKS.
> 
> Right :/

So here we go:


z3fold_compact_page() currently only handles the situation when
there's a single middle chunk within the z3fold page. However it
may be worth it to move middle chunk closer to either first or
last chunk, whichever is there, if the gap between them is big
enough.

This patch adds the relevant code, using BIG_CHUNK_GAP define as
a threshold for middle chunk to be worth moving.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 98ab01f..fca3310 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
 		       zhdr->middle_chunks << CHUNK_SHIFT);
 }
 
+#define BIG_CHUNK_GAP	3
 /* Has to be called with lock held */
 static int z3fold_compact_page(struct z3fold_header *zhdr)
 {
@@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
 		zhdr->middle_chunks = 0;
 		zhdr->start_middle = 0;
 		zhdr->first_num++;
+		return 1;
 	}
-	return 1;
+
+	/*
+	 * moving data is expensive, so let's only do that if
+	 * there's substantial gain (at least BIG_CHUNK_GAP chunks)
+	 */
+	if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
+	    zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
+			BIG_CHUNK_GAP) {
+		mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
+		zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
+		return 1;
+	} else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
+		   TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
+					+ zhdr->middle_chunks) >=
+			BIG_CHUNK_GAP) {
+		unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
+			zhdr->middle_chunks;
+		mchunk_memmove(zhdr, new_start);
+		zhdr->start_middle = new_start;
+		return 1;
+	}
+
+	return 0;
 }
 
 /**
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 5/5] z3fold: add kref refcounting
  2017-01-11 20:51             ` Vitaly Wool
@ 2017-01-12 17:49               ` Dan Streetman
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Streetman @ 2017-01-12 17:49 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 3:51 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
> On Wed, 11 Jan 2017 13:03:06 -0500
> Dan Streetman <ddstreet@ieee.org> wrote:
>
> <snip>
>> >>> provided that I take the lock for the headless case above. That will
>> >>> work won't it?
>> >>
>> >> in this specific case - since every single kref_put in the driver is
>> >> protected by the pool lock - yeah, you can do that, since you know
>> >> that specific kref_put didn't free the page and no other kref_put
>> >> could happen since you're holding the pool lock.
>> >>
>> >> but that specific requirement isn't made obvious by the code, and I
>> >> can see how a future patch could release the pool lock between the
>> >> kref_put and lru list add, without realizing that introduces a bug.
>> >> isn't it better to just add it to the lru list before you put the
>> >> kref?  just a suggestion.
>> >
>> > That would require to add it to the lru separately for headless pages
>> > above, I don't think it makes things better or less error prone.
>> > I would rather add a comment before list_add.
>> >
>> >> i'd also suggest the pages_nr dec go into the page release function,
>> >> instead of checking every kref_put return value; that's easier and
>> >> less prone to forgetting to check one of the kref_puts.
>> >
>> > That will mean z3fold_header should contain a pointer to its pool. I'm
>> > not sure it is worth it but I can do that :)
>>
>> the header's already rounded up to chunk size, so if there's room then
>> it won't take any extra memory.  but it works either way.
>
> So let's have it like this then:
>
>
> With both coming and already present locking optimizations,
> introducing kref to reference-count z3fold objects is the right
> thing to do. Moreover, it makes buddied list no longer necessary,
> and allows for a simpler handling of headless pages.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>

Reviewed-by: Dan Streetman <ddstreet@ieee.org>

> ---
>  mm/z3fold.c | 151 ++++++++++++++++++++++++++----------------------------------
>  1 file changed, 66 insertions(+), 85 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 4325bde..f0d2eaf 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -52,6 +52,7 @@ enum buddy {
>   *                     z3fold page, except for HEADLESS pages
>   * @buddy:     links the z3fold page into the relevant list in the pool
>   * @page_lock:         per-page lock
> + * @refcount:          reference cound for the z3fold page
>   * @first_chunks:      the size of the first buddy in chunks, 0 if free
>   * @middle_chunks:     the size of the middle buddy in chunks, 0 if free
>   * @last_chunks:       the size of the last buddy in chunks, 0 if free
> @@ -60,6 +61,7 @@ enum buddy {
>  struct z3fold_header {
>         struct list_head buddy;
>         spinlock_t page_lock;
> +       struct kref refcount;
>         unsigned short first_chunks;
>         unsigned short middle_chunks;
>         unsigned short last_chunks;
> @@ -95,8 +97,6 @@ struct z3fold_header {
>   * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies;
>   *             the lists each z3fold page is added to depends on the size of
>   *             its free region.
> - * @buddied:   list tracking the z3fold pages that contain 3 buddies;
> - *             these z3fold pages are full
>   * @lru:       list tracking the z3fold pages in LRU order by most recently
>   *             added buddy.
>   * @pages_nr:  number of z3fold pages in the pool.
> @@ -109,7 +109,6 @@ struct z3fold_header {
>  struct z3fold_pool {
>         spinlock_t lock;
>         struct list_head unbuddied[NCHUNKS];
> -       struct list_head buddied;
>         struct list_head lru;
>         atomic64_t pages_nr;
>         const struct z3fold_ops *ops;
> @@ -121,8 +120,7 @@ struct z3fold_pool {
>   * Internal z3fold page flags
>   */
>  enum z3fold_page_flags {
> -       UNDER_RECLAIM = 0,
> -       PAGE_HEADLESS,
> +       PAGE_HEADLESS = 0,
>         MIDDLE_CHUNK_MAPPED,
>  };
>
> @@ -146,11 +144,11 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>         struct z3fold_header *zhdr = page_address(page);
>
>         INIT_LIST_HEAD(&page->lru);
> -       clear_bit(UNDER_RECLAIM, &page->private);
>         clear_bit(PAGE_HEADLESS, &page->private);
>         clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
>
>         spin_lock_init(&zhdr->page_lock);
> +       kref_init(&zhdr->refcount);
>         zhdr->first_chunks = 0;
>         zhdr->middle_chunks = 0;
>         zhdr->last_chunks = 0;
> @@ -161,9 +159,21 @@ static struct z3fold_header *init_z3fold_page(struct page *page)
>  }
>
>  /* Resets the struct page fields and frees the page */
> -static void free_z3fold_page(struct z3fold_header *zhdr)
> +static void free_z3fold_page(struct page *page)
>  {
> -       __free_page(virt_to_page(zhdr));
> +       __free_page(page);
> +}
> +
> +static void release_z3fold_page(struct kref *ref)
> +{
> +       struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
> +                                               refcount);
> +       struct page *page = virt_to_page(zhdr);
> +       if (!list_empty(&zhdr->buddy))
> +               list_del(&zhdr->buddy);
> +       if (!list_empty(&page->lru))
> +               list_del(&page->lru);
> +       free_z3fold_page(page);
>  }
>
>  /* Lock a z3fold page */
> @@ -257,7 +267,6 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>         spin_lock_init(&pool->lock);
>         for_each_unbuddied_list(i, 0)
>                 INIT_LIST_HEAD(&pool->unbuddied[i]);
> -       INIT_LIST_HEAD(&pool->buddied);
>         INIT_LIST_HEAD(&pool->lru);
>         atomic64_set(&pool->pages_nr, 0);
>         pool->ops = ops;
> @@ -378,6 +387,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                                 spin_unlock(&pool->lock);
>                                 continue;
>                         }
> +                       kref_get(&zhdr->refcount);
>                         list_del_init(&zhdr->buddy);
>                         spin_unlock(&pool->lock);
>
> @@ -394,10 +404,12 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                         else if (zhdr->middle_chunks == 0)
>                                 bud = MIDDLE;
>                         else {
> +                               z3fold_page_unlock(zhdr);
>                                 spin_lock(&pool->lock);
> -                               list_add(&zhdr->buddy, &pool->buddied);
> +                               if (kref_put(&zhdr->refcount,
> +                                            release_z3fold_page))
> +                                       atomic64_dec(&pool->pages_nr);
>                                 spin_unlock(&pool->lock);
> -                               z3fold_page_unlock(zhdr);
>                                 pr_err("No free chunks in unbuddied\n");
>                                 WARN_ON(1);
>                                 continue;
> @@ -438,9 +450,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>                 /* Add to unbuddied list */
>                 freechunks = num_free_chunks(zhdr);
>                 list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> -       } else {
> -               /* Add to buddied list */
> -               list_add(&zhdr->buddy, &pool->buddied);
>         }
>
>  headless:
> @@ -504,52 +513,29 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>                 }
>         }
>
> -       if (test_bit(UNDER_RECLAIM, &page->private)) {
> -               /* z3fold page is under reclaim, reclaim will free */
> -               if (bud != HEADLESS)
> -                       z3fold_page_unlock(zhdr);
> -               return;
> -       }
> -
> -       /* Remove from existing buddy list */
> -       if (bud != HEADLESS) {
> -               spin_lock(&pool->lock);
> -               /*
> -                * this object may have been removed from its list by
> -                * z3fold_alloc(). In that case we just do nothing,
> -                * z3fold_alloc() will allocate an object and add the page
> -                * to the relevant list.
> -                */
> -               if (!list_empty(&zhdr->buddy)) {
> -                       list_del(&zhdr->buddy);
> -               } else {
> -                       spin_unlock(&pool->lock);
> -                       z3fold_page_unlock(zhdr);
> -                       return;
> -               }
> -               spin_unlock(&pool->lock);
> -       }
> -
> -       if (bud == HEADLESS ||
> -           (zhdr->first_chunks == 0 && zhdr->middle_chunks == 0 &&
> -                       zhdr->last_chunks == 0)) {
> -               /* z3fold page is empty, free */
> +       if (bud == HEADLESS) {
>                 spin_lock(&pool->lock);
>                 list_del(&page->lru);
>                 spin_unlock(&pool->lock);
> -               clear_bit(PAGE_HEADLESS, &page->private);
> -               if (bud != HEADLESS)
> -                       z3fold_page_unlock(zhdr);
> -               free_z3fold_page(zhdr);
> +               free_z3fold_page(page);
>                 atomic64_dec(&pool->pages_nr);
>         } else {
> -               z3fold_compact_page(zhdr);
> -               /* Add to the unbuddied list */
> +               if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 ||
> +                   zhdr->last_chunks != 0) {
> +                       z3fold_compact_page(zhdr);
> +                       /* Add to the unbuddied list */
> +                       spin_lock(&pool->lock);
> +                       if (!list_empty(&zhdr->buddy))
> +                               list_del(&zhdr->buddy);
> +                       freechunks = num_free_chunks(zhdr);
> +                       list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +                       spin_unlock(&pool->lock);
> +               }
> +               z3fold_page_unlock(zhdr);
>                 spin_lock(&pool->lock);
> -               freechunks = num_free_chunks(zhdr);
> -               list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
> +               if (kref_put(&zhdr->refcount, release_z3fold_page))
> +                       atomic64_dec(&pool->pages_nr);
>                 spin_unlock(&pool->lock);
> -               z3fold_page_unlock(zhdr);
>         }
>
>  }
> @@ -608,13 +594,13 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                         return -EINVAL;
>                 }
>                 page = list_last_entry(&pool->lru, struct page, lru);
> -               list_del(&page->lru);
> +               list_del_init(&page->lru);
>
> -               /* Protect z3fold page against free */
> -               set_bit(UNDER_RECLAIM, &page->private);
>                 zhdr = page_address(page);
>                 if (!test_bit(PAGE_HEADLESS, &page->private)) {
> -                       list_del(&zhdr->buddy);
> +                       if (!list_empty(&zhdr->buddy))
> +                               list_del_init(&zhdr->buddy);
> +                       kref_get(&zhdr->refcount);
>                         spin_unlock(&pool->lock);
>                         z3fold_page_lock(zhdr);
>                         /*
> @@ -655,30 +641,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                 goto next;
>                 }
>  next:
> -               if (!test_bit(PAGE_HEADLESS, &page->private))
> -                       z3fold_page_lock(zhdr);
> -               clear_bit(UNDER_RECLAIM, &page->private);
> -               if ((test_bit(PAGE_HEADLESS, &page->private) && ret == 0) ||
> -                   (zhdr->first_chunks == 0 && zhdr->last_chunks == 0 &&
> -                    zhdr->middle_chunks == 0)) {
> -                       /*
> -                        * All buddies are now free, free the z3fold page and
> -                        * return success.
> -                        */
> -                       if (!test_and_clear_bit(PAGE_HEADLESS, &page->private))
> -                               z3fold_page_unlock(zhdr);
> -                       free_z3fold_page(zhdr);
> -                       atomic64_dec(&pool->pages_nr);
> -                       return 0;
> -               }  else if (!test_bit(PAGE_HEADLESS, &page->private)) {
> -                       if (zhdr->first_chunks != 0 &&
> -                           zhdr->last_chunks != 0 &&
> -                           zhdr->middle_chunks != 0) {
> -                               /* Full, add to buddied list */
> -                               spin_lock(&pool->lock);
> -                               list_add(&zhdr->buddy, &pool->buddied);
> -                               spin_unlock(&pool->lock);
> +               if (test_bit(PAGE_HEADLESS, &page->private)) {
> +                       if (ret == 0) {
> +                               free_z3fold_page(page);
> +                               return 0;
>                         } else {
> +                               spin_lock(&pool->lock);
> +                       }
> +               } else {
> +                       z3fold_page_lock(zhdr);
> +                       if ((zhdr->first_chunks || zhdr->last_chunks ||
> +                            zhdr->middle_chunks) &&
> +                           !(zhdr->first_chunks && zhdr->last_chunks &&
> +                             zhdr->middle_chunks)) {
>                                 z3fold_compact_page(zhdr);
>                                 /* add to unbuddied list */
>                                 spin_lock(&pool->lock);
> @@ -687,13 +662,19 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>                                          &pool->unbuddied[freechunks]);
>                                 spin_unlock(&pool->lock);
>                         }
> -               }
> -
> -               if (!test_bit(PAGE_HEADLESS, &page->private))
>                         z3fold_page_unlock(zhdr);
> +                       spin_lock(&pool->lock);
> +                       if (kref_put(&zhdr->refcount, release_z3fold_page))
> +                               atomic64_dec(&pool->pages_nr);
> +                               return 0;
> +                       }
> +               }
>
> -               spin_lock(&pool->lock);
> -               /* add to beginning of LRU */
> +               /*
> +                * Add to the beginning of LRU.
> +                * Pool lock has to be kept here to ensure the page has
> +                * not already been released
> +                */
>                 list_add(&page->lru, &pool->lru);
>         }
>         spin_unlock(&pool->lock);
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH/RESEND v2 3/5] z3fold: extend compaction function
  2017-01-11 20:52       ` Vitaly Wool
@ 2017-01-12 17:50         ` Dan Streetman
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Streetman @ 2017-01-12 17:50 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, linux-kernel, Andrew Morton

On Wed, Jan 11, 2017 at 3:52 PM, Vitaly Wool <vitalywool@gmail.com> wrote:
> On Wed, 11 Jan 2017 17:43:13 +0100
> Vitaly Wool <vitalywool@gmail.com> wrote:
>
>> On Wed, Jan 11, 2017 at 5:28 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>> > On Wed, Jan 11, 2017 at 10:06 AM, Vitaly Wool <vitalywool@gmail.com> wrote:
>> >> z3fold_compact_page() currently only handles the situation when
>> >> there's a single middle chunk within the z3fold page. However it
>> >> may be worth it to move middle chunk closer to either first or
>> >> last chunk, whichever is there, if the gap between them is big
>> >> enough.
>> >>
>> >> This patch adds the relevant code, using BIG_CHUNK_GAP define as
>> >> a threshold for middle chunk to be worth moving.
>> >>
>> >> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
>> >> ---
>> >>  mm/z3fold.c | 26 +++++++++++++++++++++++++-
>> >>  1 file changed, 25 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> >> index 98ab01f..fca3310 100644
>> >> --- a/mm/z3fold.c
>> >> +++ b/mm/z3fold.c
>> >> @@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
>> >>                        zhdr->middle_chunks << CHUNK_SHIFT);
>> >>  }
>> >>
>> >> +#define BIG_CHUNK_GAP  3
>> >>  /* Has to be called with lock held */
>> >>  static int z3fold_compact_page(struct z3fold_header *zhdr)
>> >>  {
>> >> @@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
>> >>                 zhdr->middle_chunks = 0;
>> >>                 zhdr->start_middle = 0;
>> >>                 zhdr->first_num++;
>> >> +               return 1;
>> >>         }
>> >> -       return 1;
>> >> +
>> >> +       /*
>> >> +        * moving data is expensive, so let's only do that if
>> >> +        * there's substantial gain (at least BIG_CHUNK_GAP chunks)
>> >> +        */
>> >> +       if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
>> >> +           zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
>> >> +                       BIG_CHUNK_GAP) {
>> >> +               mchunk_memmove(zhdr, zhdr->first_chunks + 1);
>> >> +               zhdr->start_middle = zhdr->first_chunks + 1;
>> >
>> > this should be first_chunks + ZHDR_CHUNKS, not + 1.
>> >
>> >> +               return 1;
>> >> +       } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
>> >> +                  TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
>> >> +                                       + zhdr->middle_chunks) >=
>> >> +                       BIG_CHUNK_GAP) {
>> >> +               unsigned short new_start = NCHUNKS - zhdr->last_chunks -
>> >
>> > this should be TOTAL_CHUNKS, not NCHUNKS.
>>
>> Right :/
>
> So here we go:
>
>
> z3fold_compact_page() currently only handles the situation when
> there's a single middle chunk within the z3fold page. However it
> may be worth it to move middle chunk closer to either first or
> last chunk, whichever is there, if the gap between them is big
> enough.
>
> This patch adds the relevant code, using BIG_CHUNK_GAP define as
> a threshold for middle chunk to be worth moving.
>
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>

Acked-by: Dan Streetman <ddstreet@ieee.org>

> ---
>  mm/z3fold.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 98ab01f..fca3310 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -268,6 +268,7 @@ static inline void *mchunk_memmove(struct z3fold_header *zhdr,
>                        zhdr->middle_chunks << CHUNK_SHIFT);
>  }
>
> +#define BIG_CHUNK_GAP  3
>  /* Has to be called with lock held */
>  static int z3fold_compact_page(struct z3fold_header *zhdr)
>  {
> @@ -286,8 +287,31 @@ static int z3fold_compact_page(struct z3fold_header *zhdr)
>                 zhdr->middle_chunks = 0;
>                 zhdr->start_middle = 0;
>                 zhdr->first_num++;
> +               return 1;
>         }
> -       return 1;
> +
> +       /*
> +        * moving data is expensive, so let's only do that if
> +        * there's substantial gain (at least BIG_CHUNK_GAP chunks)
> +        */
> +       if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 &&
> +           zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >=
> +                       BIG_CHUNK_GAP) {
> +               mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS);
> +               zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS;
> +               return 1;
> +       } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 &&
> +                  TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle
> +                                       + zhdr->middle_chunks) >=
> +                       BIG_CHUNK_GAP) {
> +               unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks -
> +                       zhdr->middle_chunks;
> +               mchunk_memmove(zhdr, new_start);
> +               zhdr->start_middle = new_start;
> +               return 1;
> +       }
> +
> +       return 0;
>  }
>
>  /**
> --
> 2.4.2

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-01-12 17:53 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-11 14:59 [PATCH/RESEND v2 0/5] z3fold optimizations and fixes Vitaly Wool
2017-01-11 15:06 ` [PATCH/RESEND v2 1/5] z3fold: make pages_nr atomic Vitaly Wool
2017-01-11 16:09   ` Dan Streetman
2017-01-11 15:06 ` [PATCH/RESEND v2 2/5] z3fold: fix header size related issues Vitaly Wool
2017-01-11 16:24   ` Dan Streetman
2017-01-11 15:06 ` [PATCH/RESEND v2 3/5] z3fold: extend compaction function Vitaly Wool
2017-01-11 16:28   ` Dan Streetman
2017-01-11 16:43     ` Vitaly Wool
2017-01-11 20:52       ` Vitaly Wool
2017-01-12 17:50         ` Dan Streetman
2017-01-11 15:06 ` [PATCH/RESEND v2 4/5] z3fold: use per-page spinlock Vitaly Wool
2017-01-11 16:42   ` Dan Streetman
2017-01-11 15:06 ` [PATCH/RESEND v2 5/5] z3fold: add kref refcounting Vitaly Wool
2017-01-11 17:08   ` Dan Streetman
2017-01-11 17:27     ` Vitaly Wool
2017-01-11 17:39       ` Dan Streetman
2017-01-11 17:51         ` Vitaly Wool
2017-01-11 18:03           ` Dan Streetman
2017-01-11 20:51             ` Vitaly Wool
2017-01-12 17:49               ` Dan Streetman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).