linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] z3fold: stricter locking and more careful reclaim
@ 2020-12-04 20:04 Vitaly Wool
  2020-12-04 21:47 ` Mike Galbraith
  0 siblings, 1 reply; 4+ messages in thread
From: Vitaly Wool @ 2020-12-04 20:04 UTC (permalink / raw)
  To: linux-mm; +Cc: Sebastian Andrzej Siewior, Mike Galbraith, akpm, Vitaly Wool

Use temporary slots in reclaim function to avoid possible race when
freeing those.

While at it, make sure we check CLAIMED flag under page lock in the
reclaim function to make sure we are not racing with z3fold_alloc().

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
---
 mm/z3fold.c | 133 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 74 insertions(+), 59 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 18feaa0bc537..47f05228abdf 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -186,6 +186,7 @@ enum z3fold_page_flags {
  */
 enum z3fold_handle_flags {
 	HANDLES_ORPHANED = 0,
+	HANDLES_NOFREE = 1,
 };
 
 /*
@@ -320,7 +321,7 @@ static inline void free_handle(unsigned long handle)
 	slots = handle_to_slots(handle);
 	write_lock(&slots->lock);
 	*(unsigned long *)handle = 0;
-	if (zhdr->slots == slots) {
+	if (zhdr->slots == slots || test_bit(HANDLES_NOFREE, &slots->pool)) {
 		write_unlock(&slots->lock);
 		return; /* simple case, nothing else to do */
 	}
@@ -653,6 +654,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool,
 	}
 }
 
+static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks)
+{
+	enum buddy bud = HEADLESS;
+
+	if (zhdr->middle_chunks) {
+		if (!zhdr->first_chunks &&
+		    chunks <= zhdr->start_middle - ZHDR_CHUNKS)
+			bud = FIRST;
+		else if (!zhdr->last_chunks)
+			bud = LAST;
+	} else {
+		if (!zhdr->first_chunks)
+			bud = FIRST;
+		else if (!zhdr->last_chunks)
+			bud = LAST;
+		else
+			bud = MIDDLE;
+	}
+
+	return bud;
+}
+
 static inline void *mchunk_memmove(struct z3fold_header *zhdr,
 				unsigned short dst_chunk)
 {
@@ -714,18 +737,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr)
 		if (WARN_ON(new_zhdr == zhdr))
 			goto out_fail;
 
-		if (new_zhdr->first_chunks == 0) {
-			if (new_zhdr->middle_chunks != 0 &&
-					chunks >= new_zhdr->start_middle) {
-				new_bud = LAST;
-			} else {
-				new_bud = FIRST;
-			}
-		} else if (new_zhdr->last_chunks == 0) {
-			new_bud = LAST;
-		} else if (new_zhdr->middle_chunks == 0) {
-			new_bud = MIDDLE;
-		}
+		new_bud = get_free_buddy(new_zhdr, chunks);
 		q = new_zhdr;
 		switch (new_bud) {
 		case FIRST:
@@ -847,9 +859,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
 		return;
 	}
 
-	if (unlikely(PageIsolated(page) ||
-		     test_bit(PAGE_CLAIMED, &page->private) ||
-		     test_bit(PAGE_STALE, &page->private))) {
+	if (test_bit(PAGE_STALE, &page->private) ||
+	    test_and_set_bit(PAGE_CLAIMED, &page->private)) {
 		z3fold_page_unlock(zhdr);
 		return;
 	}
@@ -858,13 +869,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
 	    zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) {
 		if (kref_put(&zhdr->refcount, release_z3fold_page_locked))
 			atomic64_dec(&pool->pages_nr);
-		else
+		else {
+			clear_bit(PAGE_CLAIMED, &page->private);
 			z3fold_page_unlock(zhdr);
+		}
 		return;
 	}
 
 	z3fold_compact_page(zhdr);
 	add_to_unbuddied(pool, zhdr);
+	clear_bit(PAGE_CLAIMED, &page->private);
 	z3fold_page_unlock(zhdr);
 }
 
@@ -1109,17 +1123,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 retry:
 		zhdr = __z3fold_alloc(pool, size, can_sleep);
 		if (zhdr) {
-			if (zhdr->first_chunks == 0) {
-				if (zhdr->middle_chunks != 0 &&
-				    chunks >= zhdr->start_middle)
-					bud = LAST;
-				else
-					bud = FIRST;
-			} else if (zhdr->last_chunks == 0)
-				bud = LAST;
-			else if (zhdr->middle_chunks == 0)
-				bud = MIDDLE;
-			else {
+			bud = get_free_buddy(zhdr, chunks);
+			if (bud == HEADLESS) {
 				if (kref_put(&zhdr->refcount,
 					     release_z3fold_page_locked))
 					atomic64_dec(&pool->pages_nr);
@@ -1265,7 +1270,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		pr_err("%s: unknown bud %d\n", __func__, bud);
 		WARN_ON(1);
 		put_z3fold_header(zhdr);
-		clear_bit(PAGE_CLAIMED, &page->private);
 		return;
 	}
 
@@ -1280,8 +1284,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		z3fold_page_unlock(zhdr);
 		return;
 	}
-	if (unlikely(PageIsolated(page)) ||
-	    test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
+	if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
 		put_z3fold_header(zhdr);
 		clear_bit(PAGE_CLAIMED, &page->private);
 		return;
@@ -1345,6 +1348,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 	struct page *page = NULL;
 	struct list_head *pos;
 	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
+	struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN)));
+
+	rwlock_init(&slots.lock);
+	slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
 
 	spin_lock(&pool->lock);
 	if (!pool->ops || !pool->ops->evict || retries == 0) {
@@ -1359,35 +1366,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		list_for_each_prev(pos, &pool->lru) {
 			page = list_entry(pos, struct page, lru);
 
-			/* this bit could have been set by free, in which case
-			 * we pass over to the next page in the pool.
-			 */
-			if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
-				page = NULL;
-				continue;
-			}
-
-			if (unlikely(PageIsolated(page))) {
-				clear_bit(PAGE_CLAIMED, &page->private);
-				page = NULL;
-				continue;
-			}
 			zhdr = page_address(page);
 			if (test_bit(PAGE_HEADLESS, &page->private))
 				break;
 
+			if (kref_get_unless_zero(&zhdr->refcount) == 0) {
+				zhdr = NULL;
+				break;
+			}
 			if (!z3fold_page_trylock(zhdr)) {
-				clear_bit(PAGE_CLAIMED, &page->private);
+				if (kref_put(&zhdr->refcount,
+						release_z3fold_page))
+					atomic64_dec(&pool->pages_nr);
 				zhdr = NULL;
 				continue; /* can't evict at this point */
 			}
-			if (zhdr->foreign_handles) {
-				clear_bit(PAGE_CLAIMED, &page->private);
-				z3fold_page_unlock(zhdr);
+
+			/* test_and_set_bit is of course atomic, but we still
+			 * need to do it under page lock, otherwise checking
+			 * that bit in __z3fold_alloc wouldn't make sense
+			 */
+			if (zhdr->foreign_handles ||
+			    test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+				if (kref_put(&zhdr->refcount,
+						release_z3fold_page))
+					atomic64_dec(&pool->pages_nr);
+				else
+					z3fold_page_unlock(zhdr);
 				zhdr = NULL;
 				continue; /* can't evict such page */
 			}
-			kref_get(&zhdr->refcount);
 			list_del_init(&zhdr->buddy);
 			zhdr->cpu = -1;
 			break;
@@ -1409,12 +1417,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			first_handle = 0;
 			last_handle = 0;
 			middle_handle = 0;
+			memset(slots.slot, 0, sizeof(slots.slot));
 			if (zhdr->first_chunks)
-				first_handle = encode_handle(zhdr, FIRST);
+				first_handle = __encode_handle(zhdr, &slots,
+								FIRST);
 			if (zhdr->middle_chunks)
-				middle_handle = encode_handle(zhdr, MIDDLE);
+				middle_handle = __encode_handle(zhdr, &slots,
+								MIDDLE);
 			if (zhdr->last_chunks)
-				last_handle = encode_handle(zhdr, LAST);
+				last_handle = __encode_handle(zhdr, &slots,
+								LAST);
 			/*
 			 * it's safe to unlock here because we hold a
 			 * reference to this page
@@ -1429,19 +1441,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			ret = pool->ops->evict(pool, middle_handle);
 			if (ret)
 				goto next;
-			free_handle(middle_handle);
 		}
 		if (first_handle) {
 			ret = pool->ops->evict(pool, first_handle);
 			if (ret)
 				goto next;
-			free_handle(first_handle);
 		}
 		if (last_handle) {
 			ret = pool->ops->evict(pool, last_handle);
 			if (ret)
 				goto next;
-			free_handle(last_handle);
 		}
 next:
 		if (test_bit(PAGE_HEADLESS, &page->private)) {
@@ -1455,9 +1464,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			spin_unlock(&pool->lock);
 			clear_bit(PAGE_CLAIMED, &page->private);
 		} else {
+			struct z3fold_buddy_slots *slots = zhdr->slots;
 			z3fold_page_lock(zhdr);
 			if (kref_put(&zhdr->refcount,
 					release_z3fold_page_locked)) {
+				kmem_cache_free(pool->c_handle, slots);
 				atomic64_dec(&pool->pages_nr);
 				return 0;
 			}
@@ -1573,8 +1584,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 	VM_BUG_ON_PAGE(!PageMovable(page), page);
 	VM_BUG_ON_PAGE(PageIsolated(page), page);
 
-	if (test_bit(PAGE_HEADLESS, &page->private) ||
-	    test_bit(PAGE_CLAIMED, &page->private))
+	if (test_bit(PAGE_HEADLESS, &page->private))
 		return false;
 
 	zhdr = page_address(page);
@@ -1586,6 +1596,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
 	if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0)
 		goto out;
 
+	if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+		goto out;
 	pool = zhdr_to_pool(zhdr);
 	spin_lock(&pool->lock);
 	if (!list_empty(&zhdr->buddy))
@@ -1612,16 +1624,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
 
 	VM_BUG_ON_PAGE(!PageMovable(page), page);
 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
+	VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
 
 	zhdr = page_address(page);
 	pool = zhdr_to_pool(zhdr);
 
-	if (!z3fold_page_trylock(zhdr)) {
+	if (!z3fold_page_trylock(zhdr))
 		return -EAGAIN;
-	}
 	if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) {
 		z3fold_page_unlock(zhdr);
+		clear_bit(PAGE_CLAIMED, &page->private);
 		return -EBUSY;
 	}
 	if (work_pending(&zhdr->work)) {
@@ -1663,6 +1676,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
 	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
 
 	page_mapcount_reset(page);
+	clear_bit(PAGE_CLAIMED, &page->private);
 	put_page(page);
 	return 0;
 }
@@ -1686,6 +1700,7 @@ static void z3fold_page_putback(struct page *page)
 	spin_lock(&pool->lock);
 	list_add(&page->lru, &pool->lru);
 	spin_unlock(&pool->lock);
+	clear_bit(PAGE_CLAIMED, &page->private);
 	z3fold_page_unlock(zhdr);
 }
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] z3fold: stricter locking and more careful reclaim
  2020-12-04 20:04 [PATCH] z3fold: stricter locking and more careful reclaim Vitaly Wool
@ 2020-12-04 21:47 ` Mike Galbraith
  2020-12-05 11:38   ` Vitaly Wool
  0 siblings, 1 reply; 4+ messages in thread
From: Mike Galbraith @ 2020-12-04 21:47 UTC (permalink / raw)
  To: Vitaly Wool, linux-mm; +Cc: Sebastian Andrzej Siewior, akpm

On Fri, 2020-12-04 at 22:04 +0200, Vitaly Wool wrote:
> Use temporary slots in reclaim function to avoid possible race when
> freeing those.
>
> While at it, make sure we check CLAIMED flag under page lock in the
> reclaim function to make sure we are not racing with z3fold_alloc().
>
> Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>

FYI, with this applied, ltp::mm still crashes the RT kernel, with the
same rwlock corruption being what takes it down.

	-Mike
> ---
>  mm/z3fold.c | 133 +++++++++++++++++++++++++++++-----------------------
>  1 file changed, 74 insertions(+), 59 deletions(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 18feaa0bc537..47f05228abdf 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -186,6 +186,7 @@ enum z3fold_page_flags {
>   */
>  enum z3fold_handle_flags {
>  	HANDLES_ORPHANED = 0,
> +	HANDLES_NOFREE = 1,
>  };
>
>  /*
> @@ -320,7 +321,7 @@ static inline void free_handle(unsigned long handle)
>  	slots = handle_to_slots(handle);
>  	write_lock(&slots->lock);
>  	*(unsigned long *)handle = 0;
> -	if (zhdr->slots == slots) {
> +	if (zhdr->slots == slots || test_bit(HANDLES_NOFREE, &slots->pool)) {
>  		write_unlock(&slots->lock);
>  		return; /* simple case, nothing else to do */
>  	}
> @@ -653,6 +654,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool,
>  	}
>  }
>
> +static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks)
> +{
> +	enum buddy bud = HEADLESS;
> +
> +	if (zhdr->middle_chunks) {
> +		if (!zhdr->first_chunks &&
> +		    chunks <= zhdr->start_middle - ZHDR_CHUNKS)
> +			bud = FIRST;
> +		else if (!zhdr->last_chunks)
> +			bud = LAST;
> +	} else {
> +		if (!zhdr->first_chunks)
> +			bud = FIRST;
> +		else if (!zhdr->last_chunks)
> +			bud = LAST;
> +		else
> +			bud = MIDDLE;
> +	}
> +
> +	return bud;
> +}
> +
>  static inline void *mchunk_memmove(struct z3fold_header *zhdr,
>  				unsigned short dst_chunk)
>  {
> @@ -714,18 +737,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr)
>  		if (WARN_ON(new_zhdr == zhdr))
>  			goto out_fail;
>
> -		if (new_zhdr->first_chunks == 0) {
> -			if (new_zhdr->middle_chunks != 0 &&
> -					chunks >= new_zhdr->start_middle) {
> -				new_bud = LAST;
> -			} else {
> -				new_bud = FIRST;
> -			}
> -		} else if (new_zhdr->last_chunks == 0) {
> -			new_bud = LAST;
> -		} else if (new_zhdr->middle_chunks == 0) {
> -			new_bud = MIDDLE;
> -		}
> +		new_bud = get_free_buddy(new_zhdr, chunks);
>  		q = new_zhdr;
>  		switch (new_bud) {
>  		case FIRST:
> @@ -847,9 +859,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
>  		return;
>  	}
>
> -	if (unlikely(PageIsolated(page) ||
> -		     test_bit(PAGE_CLAIMED, &page->private) ||
> -		     test_bit(PAGE_STALE, &page->private))) {
> +	if (test_bit(PAGE_STALE, &page->private) ||
> +	    test_and_set_bit(PAGE_CLAIMED, &page->private)) {
>  		z3fold_page_unlock(zhdr);
>  		return;
>  	}
> @@ -858,13 +869,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
>  	    zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) {
>  		if (kref_put(&zhdr->refcount, release_z3fold_page_locked))
>  			atomic64_dec(&pool->pages_nr);
> -		else
> +		else {
> +			clear_bit(PAGE_CLAIMED, &page->private);
>  			z3fold_page_unlock(zhdr);
> +		}
>  		return;
>  	}
>
>  	z3fold_compact_page(zhdr);
>  	add_to_unbuddied(pool, zhdr);
> +	clear_bit(PAGE_CLAIMED, &page->private);
>  	z3fold_page_unlock(zhdr);
>  }
>
> @@ -1109,17 +1123,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
>  retry:
>  		zhdr = __z3fold_alloc(pool, size, can_sleep);
>  		if (zhdr) {
> -			if (zhdr->first_chunks == 0) {
> -				if (zhdr->middle_chunks != 0 &&
> -				    chunks >= zhdr->start_middle)
> -					bud = LAST;
> -				else
> -					bud = FIRST;
> -			} else if (zhdr->last_chunks == 0)
> -				bud = LAST;
> -			else if (zhdr->middle_chunks == 0)
> -				bud = MIDDLE;
> -			else {
> +			bud = get_free_buddy(zhdr, chunks);
> +			if (bud == HEADLESS) {
>  				if (kref_put(&zhdr->refcount,
>  					     release_z3fold_page_locked))
>  					atomic64_dec(&pool->pages_nr);
> @@ -1265,7 +1270,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>  		pr_err("%s: unknown bud %d\n", __func__, bud);
>  		WARN_ON(1);
>  		put_z3fold_header(zhdr);
> -		clear_bit(PAGE_CLAIMED, &page->private);
>  		return;
>  	}
>
> @@ -1280,8 +1284,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
>  		z3fold_page_unlock(zhdr);
>  		return;
>  	}
> -	if (unlikely(PageIsolated(page)) ||
> -	    test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> +	if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
>  		put_z3fold_header(zhdr);
>  		clear_bit(PAGE_CLAIMED, &page->private);
>  		return;
> @@ -1345,6 +1348,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  	struct page *page = NULL;
>  	struct list_head *pos;
>  	unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
> +	struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN)));
> +
> +	rwlock_init(&slots.lock);
> +	slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
>
>  	spin_lock(&pool->lock);
>  	if (!pool->ops || !pool->ops->evict || retries == 0) {
> @@ -1359,35 +1366,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  		list_for_each_prev(pos, &pool->lru) {
>  			page = list_entry(pos, struct page, lru);
>
> -			/* this bit could have been set by free, in which case
> -			 * we pass over to the next page in the pool.
> -			 */
> -			if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> -				page = NULL;
> -				continue;
> -			}
> -
> -			if (unlikely(PageIsolated(page))) {
> -				clear_bit(PAGE_CLAIMED, &page->private);
> -				page = NULL;
> -				continue;
> -			}
>  			zhdr = page_address(page);
>  			if (test_bit(PAGE_HEADLESS, &page->private))
>  				break;
>
> +			if (kref_get_unless_zero(&zhdr->refcount) == 0) {
> +				zhdr = NULL;
> +				break;
> +			}
>  			if (!z3fold_page_trylock(zhdr)) {
> -				clear_bit(PAGE_CLAIMED, &page->private);
> +				if (kref_put(&zhdr->refcount,
> +						release_z3fold_page))
> +					atomic64_dec(&pool->pages_nr);
>  				zhdr = NULL;
>  				continue; /* can't evict at this point */
>  			}
> -			if (zhdr->foreign_handles) {
> -				clear_bit(PAGE_CLAIMED, &page->private);
> -				z3fold_page_unlock(zhdr);
> +
> +			/* test_and_set_bit is of course atomic, but we still
> +			 * need to do it under page lock, otherwise checking
> +			 * that bit in __z3fold_alloc wouldn't make sense
> +			 */
> +			if (zhdr->foreign_handles ||
> +			    test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> +				if (kref_put(&zhdr->refcount,
> +						release_z3fold_page))
> +					atomic64_dec(&pool->pages_nr);
> +				else
> +					z3fold_page_unlock(zhdr);
>  				zhdr = NULL;
>  				continue; /* can't evict such page */
>  			}
> -			kref_get(&zhdr->refcount);
>  			list_del_init(&zhdr->buddy);
>  			zhdr->cpu = -1;
>  			break;
> @@ -1409,12 +1417,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  			first_handle = 0;
>  			last_handle = 0;
>  			middle_handle = 0;
> +			memset(slots.slot, 0, sizeof(slots.slot));
>  			if (zhdr->first_chunks)
> -				first_handle = encode_handle(zhdr, FIRST);
> +				first_handle = __encode_handle(zhdr, &slots,
> +								FIRST);
>  			if (zhdr->middle_chunks)
> -				middle_handle = encode_handle(zhdr, MIDDLE);
> +				middle_handle = __encode_handle(zhdr, &slots,
> +								MIDDLE);
>  			if (zhdr->last_chunks)
> -				last_handle = encode_handle(zhdr, LAST);
> +				last_handle = __encode_handle(zhdr, &slots,
> +								LAST);
>  			/*
>  			 * it's safe to unlock here because we hold a
>  			 * reference to this page
> @@ -1429,19 +1441,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  			ret = pool->ops->evict(pool, middle_handle);
>  			if (ret)
>  				goto next;
> -			free_handle(middle_handle);
>  		}
>  		if (first_handle) {
>  			ret = pool->ops->evict(pool, first_handle);
>  			if (ret)
>  				goto next;
> -			free_handle(first_handle);
>  		}
>  		if (last_handle) {
>  			ret = pool->ops->evict(pool, last_handle);
>  			if (ret)
>  				goto next;
> -			free_handle(last_handle);
>  		}
>  next:
>  		if (test_bit(PAGE_HEADLESS, &page->private)) {
> @@ -1455,9 +1464,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
>  			spin_unlock(&pool->lock);
>  			clear_bit(PAGE_CLAIMED, &page->private);
>  		} else {
> +			struct z3fold_buddy_slots *slots = zhdr->slots;
>  			z3fold_page_lock(zhdr);
>  			if (kref_put(&zhdr->refcount,
>  					release_z3fold_page_locked)) {
> +				kmem_cache_free(pool->c_handle, slots);
>  				atomic64_dec(&pool->pages_nr);
>  				return 0;
>  			}
> @@ -1573,8 +1584,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>  	VM_BUG_ON_PAGE(!PageMovable(page), page);
>  	VM_BUG_ON_PAGE(PageIsolated(page), page);
>
> -	if (test_bit(PAGE_HEADLESS, &page->private) ||
> -	    test_bit(PAGE_CLAIMED, &page->private))
> +	if (test_bit(PAGE_HEADLESS, &page->private))
>  		return false;
>
>  	zhdr = page_address(page);
> @@ -1586,6 +1596,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
>  	if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0)
>  		goto out;
>
> +	if (test_and_set_bit(PAGE_CLAIMED, &page->private))
> +		goto out;
>  	pool = zhdr_to_pool(zhdr);
>  	spin_lock(&pool->lock);
>  	if (!list_empty(&zhdr->buddy))
> @@ -1612,16 +1624,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
>
>  	VM_BUG_ON_PAGE(!PageMovable(page), page);
>  	VM_BUG_ON_PAGE(!PageIsolated(page), page);
> +	VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page);
>  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
>
>  	zhdr = page_address(page);
>  	pool = zhdr_to_pool(zhdr);
>
> -	if (!z3fold_page_trylock(zhdr)) {
> +	if (!z3fold_page_trylock(zhdr))
>  		return -EAGAIN;
> -	}
>  	if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) {
>  		z3fold_page_unlock(zhdr);
> +		clear_bit(PAGE_CLAIMED, &page->private);
>  		return -EBUSY;
>  	}
>  	if (work_pending(&zhdr->work)) {
> @@ -1663,6 +1676,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
>  	queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
>
>  	page_mapcount_reset(page);
> +	clear_bit(PAGE_CLAIMED, &page->private);
>  	put_page(page);
>  	return 0;
>  }
> @@ -1686,6 +1700,7 @@ static void z3fold_page_putback(struct page *page)
>  	spin_lock(&pool->lock);
>  	list_add(&page->lru, &pool->lru);
>  	spin_unlock(&pool->lock);
> +	clear_bit(PAGE_CLAIMED, &page->private);
>  	z3fold_page_unlock(zhdr);
>  }
>



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] z3fold: stricter locking and more careful reclaim
  2020-12-04 21:47 ` Mike Galbraith
@ 2020-12-05 11:38   ` Vitaly Wool
  2020-12-05 18:09     ` Mike Galbraith
  0 siblings, 1 reply; 4+ messages in thread
From: Vitaly Wool @ 2020-12-05 11:38 UTC (permalink / raw)
  To: Mike Galbraith; +Cc: Linux-MM, Sebastian Andrzej Siewior, Andrew Morton

On Fri, Dec 4, 2020 at 10:47 PM Mike Galbraith <efault@gmx.de> wrote:
>
> On Fri, 2020-12-04 at 22:04 +0200, Vitaly Wool wrote:
> > Use temporary slots in reclaim function to avoid possible race when
> > freeing those.
> >
> > While at it, make sure we check CLAIMED flag under page lock in the
> > reclaim function to make sure we are not racing with z3fold_alloc().
> >
> > Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
>
> FYI, with this applied, ltp::mm still crashes the RT kernel, with the
> same rwlock corruption being what takes it down.

So sad it does, but thanks a lot for the quick response.

~Vitaly

>         -Mike
> > ---
> >  mm/z3fold.c | 133 +++++++++++++++++++++++++++++-----------------------
> >  1 file changed, 74 insertions(+), 59 deletions(-)
> >
> > diff --git a/mm/z3fold.c b/mm/z3fold.c
> > index 18feaa0bc537..47f05228abdf 100644
> > --- a/mm/z3fold.c
> > +++ b/mm/z3fold.c
> > @@ -186,6 +186,7 @@ enum z3fold_page_flags {
> >   */
> >  enum z3fold_handle_flags {
> >       HANDLES_ORPHANED = 0,
> > +     HANDLES_NOFREE = 1,
> >  };
> >
> >  /*
> > @@ -320,7 +321,7 @@ static inline void free_handle(unsigned long handle)
> >       slots = handle_to_slots(handle);
> >       write_lock(&slots->lock);
> >       *(unsigned long *)handle = 0;
> > -     if (zhdr->slots == slots) {
> > +     if (zhdr->slots == slots || test_bit(HANDLES_NOFREE, &slots->pool)) {
> >               write_unlock(&slots->lock);
> >               return; /* simple case, nothing else to do */
> >       }
> > @@ -653,6 +654,28 @@ static inline void add_to_unbuddied(struct z3fold_pool *pool,
> >       }
> >  }
> >
> > +static inline enum buddy get_free_buddy(struct z3fold_header *zhdr, int chunks)
> > +{
> > +     enum buddy bud = HEADLESS;
> > +
> > +     if (zhdr->middle_chunks) {
> > +             if (!zhdr->first_chunks &&
> > +                 chunks <= zhdr->start_middle - ZHDR_CHUNKS)
> > +                     bud = FIRST;
> > +             else if (!zhdr->last_chunks)
> > +                     bud = LAST;
> > +     } else {
> > +             if (!zhdr->first_chunks)
> > +                     bud = FIRST;
> > +             else if (!zhdr->last_chunks)
> > +                     bud = LAST;
> > +             else
> > +                     bud = MIDDLE;
> > +     }
> > +
> > +     return bud;
> > +}
> > +
> >  static inline void *mchunk_memmove(struct z3fold_header *zhdr,
> >                               unsigned short dst_chunk)
> >  {
> > @@ -714,18 +737,7 @@ static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr)
> >               if (WARN_ON(new_zhdr == zhdr))
> >                       goto out_fail;
> >
> > -             if (new_zhdr->first_chunks == 0) {
> > -                     if (new_zhdr->middle_chunks != 0 &&
> > -                                     chunks >= new_zhdr->start_middle) {
> > -                             new_bud = LAST;
> > -                     } else {
> > -                             new_bud = FIRST;
> > -                     }
> > -             } else if (new_zhdr->last_chunks == 0) {
> > -                     new_bud = LAST;
> > -             } else if (new_zhdr->middle_chunks == 0) {
> > -                     new_bud = MIDDLE;
> > -             }
> > +             new_bud = get_free_buddy(new_zhdr, chunks);
> >               q = new_zhdr;
> >               switch (new_bud) {
> >               case FIRST:
> > @@ -847,9 +859,8 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
> >               return;
> >       }
> >
> > -     if (unlikely(PageIsolated(page) ||
> > -                  test_bit(PAGE_CLAIMED, &page->private) ||
> > -                  test_bit(PAGE_STALE, &page->private))) {
> > +     if (test_bit(PAGE_STALE, &page->private) ||
> > +         test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> >               z3fold_page_unlock(zhdr);
> >               return;
> >       }
> > @@ -858,13 +869,16 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
> >           zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) {
> >               if (kref_put(&zhdr->refcount, release_z3fold_page_locked))
> >                       atomic64_dec(&pool->pages_nr);
> > -             else
> > +             else {
> > +                     clear_bit(PAGE_CLAIMED, &page->private);
> >                       z3fold_page_unlock(zhdr);
> > +             }
> >               return;
> >       }
> >
> >       z3fold_compact_page(zhdr);
> >       add_to_unbuddied(pool, zhdr);
> > +     clear_bit(PAGE_CLAIMED, &page->private);
> >       z3fold_page_unlock(zhdr);
> >  }
> >
> > @@ -1109,17 +1123,8 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
> >  retry:
> >               zhdr = __z3fold_alloc(pool, size, can_sleep);
> >               if (zhdr) {
> > -                     if (zhdr->first_chunks == 0) {
> > -                             if (zhdr->middle_chunks != 0 &&
> > -                                 chunks >= zhdr->start_middle)
> > -                                     bud = LAST;
> > -                             else
> > -                                     bud = FIRST;
> > -                     } else if (zhdr->last_chunks == 0)
> > -                             bud = LAST;
> > -                     else if (zhdr->middle_chunks == 0)
> > -                             bud = MIDDLE;
> > -                     else {
> > +                     bud = get_free_buddy(zhdr, chunks);
> > +                     if (bud == HEADLESS) {
> >                               if (kref_put(&zhdr->refcount,
> >                                            release_z3fold_page_locked))
> >                                       atomic64_dec(&pool->pages_nr);
> > @@ -1265,7 +1270,6 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> >               pr_err("%s: unknown bud %d\n", __func__, bud);
> >               WARN_ON(1);
> >               put_z3fold_header(zhdr);
> > -             clear_bit(PAGE_CLAIMED, &page->private);
> >               return;
> >       }
> >
> > @@ -1280,8 +1284,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
> >               z3fold_page_unlock(zhdr);
> >               return;
> >       }
> > -     if (unlikely(PageIsolated(page)) ||
> > -         test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> > +     if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
> >               put_z3fold_header(zhdr);
> >               clear_bit(PAGE_CLAIMED, &page->private);
> >               return;
> > @@ -1345,6 +1348,10 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> >       struct page *page = NULL;
> >       struct list_head *pos;
> >       unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
> > +     struct z3fold_buddy_slots slots __attribute__((aligned(SLOTS_ALIGN)));
> > +
> > +     rwlock_init(&slots.lock);
> > +     slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
> >
> >       spin_lock(&pool->lock);
> >       if (!pool->ops || !pool->ops->evict || retries == 0) {
> > @@ -1359,35 +1366,36 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> >               list_for_each_prev(pos, &pool->lru) {
> >                       page = list_entry(pos, struct page, lru);
> >
> > -                     /* this bit could have been set by free, in which case
> > -                      * we pass over to the next page in the pool.
> > -                      */
> > -                     if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> > -                             page = NULL;
> > -                             continue;
> > -                     }
> > -
> > -                     if (unlikely(PageIsolated(page))) {
> > -                             clear_bit(PAGE_CLAIMED, &page->private);
> > -                             page = NULL;
> > -                             continue;
> > -                     }
> >                       zhdr = page_address(page);
> >                       if (test_bit(PAGE_HEADLESS, &page->private))
> >                               break;
> >
> > +                     if (kref_get_unless_zero(&zhdr->refcount) == 0) {
> > +                             zhdr = NULL;
> > +                             break;
> > +                     }
> >                       if (!z3fold_page_trylock(zhdr)) {
> > -                             clear_bit(PAGE_CLAIMED, &page->private);
> > +                             if (kref_put(&zhdr->refcount,
> > +                                             release_z3fold_page))
> > +                                     atomic64_dec(&pool->pages_nr);
> >                               zhdr = NULL;
> >                               continue; /* can't evict at this point */
> >                       }
> > -                     if (zhdr->foreign_handles) {
> > -                             clear_bit(PAGE_CLAIMED, &page->private);
> > -                             z3fold_page_unlock(zhdr);
> > +
> > +                     /* test_and_set_bit is of course atomic, but we still
> > +                      * need to do it under page lock, otherwise checking
> > +                      * that bit in __z3fold_alloc wouldn't make sense
> > +                      */
> > +                     if (zhdr->foreign_handles ||
> > +                         test_and_set_bit(PAGE_CLAIMED, &page->private)) {
> > +                             if (kref_put(&zhdr->refcount,
> > +                                             release_z3fold_page))
> > +                                     atomic64_dec(&pool->pages_nr);
> > +                             else
> > +                                     z3fold_page_unlock(zhdr);
> >                               zhdr = NULL;
> >                               continue; /* can't evict such page */
> >                       }
> > -                     kref_get(&zhdr->refcount);
> >                       list_del_init(&zhdr->buddy);
> >                       zhdr->cpu = -1;
> >                       break;
> > @@ -1409,12 +1417,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> >                       first_handle = 0;
> >                       last_handle = 0;
> >                       middle_handle = 0;
> > +                     memset(slots.slot, 0, sizeof(slots.slot));
> >                       if (zhdr->first_chunks)
> > -                             first_handle = encode_handle(zhdr, FIRST);
> > +                             first_handle = __encode_handle(zhdr, &slots,
> > +                                                             FIRST);
> >                       if (zhdr->middle_chunks)
> > -                             middle_handle = encode_handle(zhdr, MIDDLE);
> > +                             middle_handle = __encode_handle(zhdr, &slots,
> > +                                                             MIDDLE);
> >                       if (zhdr->last_chunks)
> > -                             last_handle = encode_handle(zhdr, LAST);
> > +                             last_handle = __encode_handle(zhdr, &slots,
> > +                                                             LAST);
> >                       /*
> >                        * it's safe to unlock here because we hold a
> >                        * reference to this page
> > @@ -1429,19 +1441,16 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> >                       ret = pool->ops->evict(pool, middle_handle);
> >                       if (ret)
> >                               goto next;
> > -                     free_handle(middle_handle);
> >               }
> >               if (first_handle) {
> >                       ret = pool->ops->evict(pool, first_handle);
> >                       if (ret)
> >                               goto next;
> > -                     free_handle(first_handle);
> >               }
> >               if (last_handle) {
> >                       ret = pool->ops->evict(pool, last_handle);
> >                       if (ret)
> >                               goto next;
> > -                     free_handle(last_handle);
> >               }
> >  next:
> >               if (test_bit(PAGE_HEADLESS, &page->private)) {
> > @@ -1455,9 +1464,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
> >                       spin_unlock(&pool->lock);
> >                       clear_bit(PAGE_CLAIMED, &page->private);
> >               } else {
> > +                     struct z3fold_buddy_slots *slots = zhdr->slots;
> >                       z3fold_page_lock(zhdr);
> >                       if (kref_put(&zhdr->refcount,
> >                                       release_z3fold_page_locked)) {
> > +                             kmem_cache_free(pool->c_handle, slots);
> >                               atomic64_dec(&pool->pages_nr);
> >                               return 0;
> >                       }
> > @@ -1573,8 +1584,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
> >       VM_BUG_ON_PAGE(!PageMovable(page), page);
> >       VM_BUG_ON_PAGE(PageIsolated(page), page);
> >
> > -     if (test_bit(PAGE_HEADLESS, &page->private) ||
> > -         test_bit(PAGE_CLAIMED, &page->private))
> > +     if (test_bit(PAGE_HEADLESS, &page->private))
> >               return false;
> >
> >       zhdr = page_address(page);
> > @@ -1586,6 +1596,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
> >       if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0)
> >               goto out;
> >
> > +     if (test_and_set_bit(PAGE_CLAIMED, &page->private))
> > +             goto out;
> >       pool = zhdr_to_pool(zhdr);
> >       spin_lock(&pool->lock);
> >       if (!list_empty(&zhdr->buddy))
> > @@ -1612,16 +1624,17 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
> >
> >       VM_BUG_ON_PAGE(!PageMovable(page), page);
> >       VM_BUG_ON_PAGE(!PageIsolated(page), page);
> > +     VM_BUG_ON_PAGE(!test_bit(PAGE_CLAIMED, &page->private), page);
> >       VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
> >
> >       zhdr = page_address(page);
> >       pool = zhdr_to_pool(zhdr);
> >
> > -     if (!z3fold_page_trylock(zhdr)) {
> > +     if (!z3fold_page_trylock(zhdr))
> >               return -EAGAIN;
> > -     }
> >       if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) {
> >               z3fold_page_unlock(zhdr);
> > +             clear_bit(PAGE_CLAIMED, &page->private);
> >               return -EBUSY;
> >       }
> >       if (work_pending(&zhdr->work)) {
> > @@ -1663,6 +1676,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
> >       queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
> >
> >       page_mapcount_reset(page);
> > +     clear_bit(PAGE_CLAIMED, &page->private);
> >       put_page(page);
> >       return 0;
> >  }
> > @@ -1686,6 +1700,7 @@ static void z3fold_page_putback(struct page *page)
> >       spin_lock(&pool->lock);
> >       list_add(&page->lru, &pool->lru);
> >       spin_unlock(&pool->lock);
> > +     clear_bit(PAGE_CLAIMED, &page->private);
> >       z3fold_page_unlock(zhdr);
> >  }
> >
>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] z3fold: stricter locking and more careful reclaim
  2020-12-05 11:38   ` Vitaly Wool
@ 2020-12-05 18:09     ` Mike Galbraith
  0 siblings, 0 replies; 4+ messages in thread
From: Mike Galbraith @ 2020-12-05 18:09 UTC (permalink / raw)
  To: Vitaly Wool; +Cc: Linux-MM, Sebastian Andrzej Siewior, Andrew Morton

On Sat, 2020-12-05 at 12:38 +0100, Vitaly Wool wrote:
> On Fri, Dec 4, 2020 at 10:47 PM Mike Galbraith <efault@gmx.de> wrote:
> >
> > On Fri, 2020-12-04 at 22:04 +0200, Vitaly Wool wrote:
> > > Use temporary slots in reclaim function to avoid possible race when
> > > freeing those.
> > >
> > > While at it, make sure we check CLAIMED flag under page lock in the
> > > reclaim function to make sure we are not racing with z3fold_alloc().
> > >
> > > Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
> >
> > FYI, with this applied, ltp::mm still crashes the RT kernel, with the
> > same rwlock corruption being what takes it down.
>
> So sad it does, but thanks a lot for the quick response.

Thanks for taking a poke at it.  Intuitive switch to write_lock() in
__release_z3fold_page() "fixed" the darn thing, but staring at it since
has yet to inspire a clear "and that works because...".

	-Mike



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-12-05 18:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-04 20:04 [PATCH] z3fold: stricter locking and more careful reclaim Vitaly Wool
2020-12-04 21:47 ` Mike Galbraith
2020-12-05 11:38   ` Vitaly Wool
2020-12-05 18:09     ` Mike Galbraith

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).