All of lore.kernel.org
 help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: Nhat Pham <nphamcs@gmail.com>
Cc: akpm@linux-foundation.org, hannes@cmpxchg.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	ngupta@vflare.org, senozhatsky@chromium.org, sjenning@redhat.com,
	ddstreet@ieee.org, vitaly.wool@konsulko.com
Subject: Re: [PATCH v4 5/5] zsmalloc: Implement writeback mechanism for zsmalloc
Date: Thu, 17 Nov 2022 14:48:28 -0800	[thread overview]
Message-ID: <Y3a6PGDB5ievPYgQ@google.com> (raw)
In-Reply-To: <20221117163839.230900-6-nphamcs@gmail.com>

On Thu, Nov 17, 2022 at 08:38:39AM -0800, Nhat Pham wrote:
> This commit adds the writeback mechanism for zsmalloc, analogous to the
> zbud allocator. Zsmalloc will attempt to determine the coldest zspage
> (i.e least recently used) in the pool, and attempt to write back all the
> stored compressed objects via the pool's evict handler.
> 
> Signed-off-by: Nhat Pham <nphamcs@gmail.com>
> ---
>  mm/zsmalloc.c | 203 +++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 184 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 776d0e15a401..0ab9f173e964 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -280,10 +280,13 @@ struct zspage {
>  	struct list_head lru;
>  #endif
> 
> +	bool under_reclaim;
> +
> +	/* list of unfreed handles whose objects have been reclaimed */
> +	unsigned long *deferred_handles;
> +

Guess we don't need to bloat the zspage on !CONFIG_ZPOOL?

>  	struct zs_pool *pool;
> -#ifdef CONFIG_COMPACTION
>  	rwlock_t lock;
> -#endif
>  };
> 
>  struct mapping_area {
> @@ -304,10 +307,11 @@ static bool ZsHugePage(struct zspage *zspage)
>  	return zspage->huge;
>  }
> 
> -#ifdef CONFIG_COMPACTION
>  static void migrate_lock_init(struct zspage *zspage);
>  static void migrate_read_lock(struct zspage *zspage);
>  static void migrate_read_unlock(struct zspage *zspage);
> +
> +#ifdef CONFIG_COMPACTION
>  static void migrate_write_lock(struct zspage *zspage);
>  static void migrate_write_lock_nested(struct zspage *zspage);
>  static void migrate_write_unlock(struct zspage *zspage);
> @@ -315,9 +319,6 @@ static void kick_deferred_free(struct zs_pool *pool);
>  static void init_deferred_free(struct zs_pool *pool);
>  static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
>  #else
> -static void migrate_lock_init(struct zspage *zspage) {}
> -static void migrate_read_lock(struct zspage *zspage) {}
> -static void migrate_read_unlock(struct zspage *zspage) {}
>  static void migrate_write_lock(struct zspage *zspage) {}
>  static void migrate_write_lock_nested(struct zspage *zspage) {}
>  static void migrate_write_unlock(struct zspage *zspage) {}
> @@ -449,6 +450,27 @@ static void zs_zpool_free(void *pool, unsigned long handle)
>  	zs_free(pool, handle);
>  }
> 
> +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries);
> +
> +static int zs_zpool_shrink(void *pool, unsigned int pages,

                                                       what pages?
                                                       

> +			unsigned int *reclaimed)
> +{
> +	unsigned int total = 0;
> +	int ret = -EINVAL;
> +
> +	while (total < pages) {
> +		ret = zs_reclaim_page(pool, 8);
> +		if (ret < 0)
> +			break;
> +		total++;
> +	}
> +
> +	if (reclaimed)
> +		*reclaimed = total;
> +
> +	return ret;
> +}
> +
>  static void *zs_zpool_map(void *pool, unsigned long handle,
>  			enum zpool_mapmode mm)
>  {
> @@ -487,6 +509,7 @@ static struct zpool_driver zs_zpool_driver = {
>  	.malloc_support_movable = true,
>  	.malloc =		  zs_zpool_malloc,
>  	.free =			  zs_zpool_free,
> +	.shrink =		  zs_zpool_shrink,
>  	.map =			  zs_zpool_map,
>  	.unmap =		  zs_zpool_unmap,
>  	.total_size =		  zs_zpool_total_size,
> @@ -960,6 +983,21 @@ static int trylock_zspage(struct zspage *zspage)
>  	return 0;
>  }
> 
> +/*
> + * Free all the deferred handles whose objects are freed in zs_free.
> + */
> +static void free_handles(struct zs_pool *pool, struct zspage *zspage)
> +{
> +	unsigned long handle = (unsigned long)zspage->deferred_handles;
> +
> +	while (handle) {
> +		unsigned long nxt_handle = handle_to_obj(handle);
> +
> +		cache_free_handle(pool, handle);
> +		handle = nxt_handle;
> +	}
> +}
> +

Shouldn't we have it under CONFIG_ZPOOL?

>  static void __free_zspage(struct zs_pool *pool, struct size_class *class,
>  				struct zspage *zspage)
>  {
> @@ -974,6 +1012,9 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
>  	VM_BUG_ON(get_zspage_inuse(zspage));
>  	VM_BUG_ON(fg != ZS_EMPTY);
> 
> +	/* Free all deferred handles from zs_free */
> +	free_handles(pool, zspage);
> +
>  	next = page = get_first_page(zspage);
>  	do {
>  		VM_BUG_ON_PAGE(!PageLocked(page), page);
> @@ -1060,6 +1101,8 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
>  #ifdef CONFIG_ZPOOL
>  	INIT_LIST_HEAD(&zspage->lru);
>  #endif
> +	zspage->under_reclaim = false;
> +	zspage->deferred_handles = NULL;
> 
>  	set_freeobj(zspage, 0);
>  }
> @@ -1482,13 +1525,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
>  		record_obj(handle, obj);
>  		class_stat_inc(class, OBJ_USED, 1);
> 
> -#ifdef CONFIG_ZPOOL
> -		/* Move the zspage to front of pool's LRU */
> -		move_to_front(pool, zspage);
> -#endif
> -		spin_unlock(&pool->lock);
> -
> -		return handle;
> +		goto out;

This chunk should go in "Add a LRU to zs_pool to keep track of zspages in
LRU order"

>  	}
> 
>  	spin_unlock(&pool->lock);
> @@ -1512,12 +1549,12 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
> 
>  	/* We completely set up zspage so mark them as movable */
>  	SetZsPageMovable(pool, zspage);
> +out:
>  #ifdef CONFIG_ZPOOL
>  	/* Move the zspage to front of pool's LRU */
>  	move_to_front(pool, zspage);
>  #endif
>  	spin_unlock(&pool->lock);
> -

Unnecessary change.

>  	return handle;
>  }
>  EXPORT_SYMBOL_GPL(zs_malloc);
> @@ -1571,12 +1608,24 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
> 
>  	obj_free(class->size, obj);
>  	class_stat_dec(class, OBJ_USED, 1);
> +
> +	if (zspage->under_reclaim) {
> +		/*
> +		 * Reclaim needs the handles during writeback. It'll free
> +		 * them along with the zspage when it's done with them.
> +		 *
> +		 * Record current deferred handle at the memory location
> +		 * whose address is given by handle.
> +		 */
> +		record_obj(handle, (unsigned long)zspage->deferred_handles);
> +		zspage->deferred_handles = (unsigned long *)handle;
> +		spin_unlock(&pool->lock);
> +		return;
> +	}
>  	fullness = fix_fullness_group(class, zspage);
> -	if (fullness != ZS_EMPTY)
> -		goto out;
> +	if (fullness == ZS_EMPTY)
> +		free_zspage(pool, class, zspage);
> 
> -	free_zspage(pool, class, zspage);
> -out:
>  	spin_unlock(&pool->lock);
>  	cache_free_handle(pool, handle);
>  }
> @@ -1776,7 +1825,7 @@ static enum fullness_group putback_zspage(struct size_class *class,
>  	return fullness;
>  }
> 
> -#ifdef CONFIG_COMPACTION
> +#if defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION)
>  /*
>   * To prevent zspage destroy during migration, zspage freeing should
>   * hold locks of all pages in the zspage.
> @@ -1818,6 +1867,24 @@ static void lock_zspage(struct zspage *zspage)
>  	}
>  	migrate_read_unlock(zspage);
>  }
> +#endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */
> +
> +#ifdef CONFIG_ZPOOL
> +/*
> + * Unlocks all the pages of the zspage.
> + *
> + * pool->lock must be held before this function is called
> + * to prevent the underlying pages from migrating.
> + */
> +static void unlock_zspage(struct zspage *zspage)
> +{
> +	struct page *page = get_first_page(zspage);
> +
> +	do {
> +		unlock_page(page);
> +	} while ((page = get_next_page(page)) != NULL);
> +}
> +#endif /* CONFIG_ZPOOL */
> 
>  static void migrate_lock_init(struct zspage *zspage)
>  {
> @@ -1834,6 +1901,7 @@ static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
>  	read_unlock(&zspage->lock);
>  }
> 
> +#ifdef CONFIG_COMPACTION
>  static void migrate_write_lock(struct zspage *zspage)
>  {
>  	write_lock(&zspage->lock);
> @@ -2398,6 +2466,103 @@ void zs_destroy_pool(struct zs_pool *pool)
>  }
>  EXPORT_SYMBOL_GPL(zs_destroy_pool);
> 
> +#ifdef CONFIG_ZPOOL
> +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
> +{
> +	int i, obj_idx, ret = 0;
> +	unsigned long handle;
> +	struct zspage *zspage;
> +	struct page *page;
> +	enum fullness_group fullness;
> +
> +	if (retries == 0 || !pool->ops || !pool->ops->evict)
> +		return -EINVAL;
> +
> +	/* Lock LRU and fullness list */
> +	spin_lock(&pool->lock);
> +	if (list_empty(&pool->lru)) {
> +		spin_unlock(&pool->lock);
> +		return -EINVAL;
> +	}
> +
> +	for (i = 0; i < retries; i++) {
> +		struct size_class *class;
> +
> +		zspage = list_last_entry(&pool->lru, struct zspage, lru);
> +		list_del(&zspage->lru);
> +
> +		/* zs_free may free objects, but not the zspage and handles */
> +		zspage->under_reclaim = true;
> +
> +		class = zspage_class(pool, zspage);
> +		fullness = get_fullness_group(class, zspage);
> +
> +		/* Lock out object allocations and object compaction */
> +		remove_zspage(class, zspage, fullness);
> +
> +		spin_unlock(&pool->lock);
> +
> +		/* Lock backing pages into place */
> +		lock_zspage(zspage);
> +
> +		obj_idx = 0;
> +		page = zspage->first_page;
> +		while (1) {
> +			handle = find_alloced_obj(class, page, &obj_idx);
> +			if (!handle) {
> +				page = get_next_page(page);
> +				if (!page)
> +					break;
> +				obj_idx = 0;
> +				continue;
> +			}
> +
> +			/*
> +			 * This will write the object and call
> +			 * zs_free.

You want to write in the same line.

> +			 *
> +			 * zs_free will free the object, but the
> +			 * under_reclaim flag prevents it from freeing
> +			 * the zspage altogether. This is necessary so
> +			 * that we can continue working with the
> +			 * zspage potentially after the last object
> +			 * has been freed.
> +			 */
> +			ret = pool->ops->evict(pool, handle);
> +			if (ret)
> +				goto next;
> +
> +			obj_idx++;
> +		}
> +
> +next:
> +		/* For freeing the zspage, or putting it back in the pool and LRU list. */
> +		spin_lock(&pool->lock);
> +		zspage->under_reclaim = false;
> +
> +		if (!get_zspage_inuse(zspage)) {
> +			/*
> +			 * Fullness went stale as zs_free() won't touch it
> +			 * while the page is removed from the pool. Fix it
> +			 * up for the check in __free_zspage().
> +			 */
> +			zspage->fullness = ZS_EMPTY;
> +
> +			__free_zspage(pool, class, zspage);
> +			spin_unlock(&pool->lock);
> +			return 0;
> +		}
> +
> +		putback_zspage(class, zspage);
> +		list_add(&zspage->lru, &pool->lru);
> +		unlock_zspage(zspage);
> +	}
> +
> +	spin_unlock(&pool->lock);
> +	return -EAGAIN;
> +}
> +#endif /* CONFIG_ZPOOL */
> +
>  static int __init zs_init(void)
>  {
>  	int ret;
> --
> 2.30.2

  reply	other threads:[~2022-11-17 22:48 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-17 16:38 [PATCH v4 0/5] Implement writeback for zsmalloc Nhat Pham
2022-11-17 16:38 ` [PATCH v4 1/5] zswap: fix writeback lock ordering " Nhat Pham
2022-11-17 16:38 ` [PATCH v4 2/5] zsmalloc: Consolidate zs_pool's migrate_lock and size_class's locks Nhat Pham
2022-11-17 22:02   ` Minchan Kim
2022-11-17 16:38 ` [PATCH v4 3/5] zsmalloc: Add a LRU to zs_pool to keep track of zspages in LRU order Nhat Pham
2022-11-17 22:15   ` Minchan Kim
2022-11-18  4:30     ` Johannes Weiner
2022-11-18 19:24       ` Minchan Kim
2022-11-18 19:54         ` Johannes Weiner
2022-11-17 16:38 ` [PATCH v4 4/5] zsmalloc: Add ops fields to zs_pool to store evict handlers Nhat Pham
2022-11-17 22:25   ` Minchan Kim
2022-11-18  5:15     ` Johannes Weiner
2022-11-18  5:52       ` Johannes Weiner
2022-11-17 16:38 ` [PATCH v4 5/5] zsmalloc: Implement writeback mechanism for zsmalloc Nhat Pham
2022-11-17 22:48   ` Minchan Kim [this message]
2022-11-18  5:24     ` Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y3a6PGDB5ievPYgQ@google.com \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=ddstreet@ieee.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ngupta@vflare.org \
    --cc=nphamcs@gmail.com \
    --cc=senozhatsky@chromium.org \
    --cc=sjenning@redhat.com \
    --cc=vitaly.wool@konsulko.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.