Linux-mm Archive on lore.kernel.org
 help / color / Atom feed
From: Dan Streetman <ddstreet@ieee.org>
To: Hui Zhu <teawater@gmail.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Seth Jennings <sjenning@redhat.com>,
	 linux-kernel <linux-kernel@vger.kernel.org>,
	Linux-MM <linux-mm@kvack.org>,
	 Hui Zhu <teawaterz@linux.alibaba.com>
Subject: Re: [PATCH 2/2] mm, zswap: Support THP
Date: Fri, 8 Nov 2019 11:15:31 -0500
Message-ID: <CALZtONA9Y9tvOJcHUyac770fSQhCoGMb7kDL1R5N9Bueqd+7_g@mail.gmail.com> (raw)
In-Reply-To: <1571111349-5041-2-git-send-email-teawater@gmail.com>

On Mon, Oct 14, 2019 at 11:49 PM Hui Zhu <teawater@gmail.com> wrote:
>
> This commit let zswap treats THP as continuous normal pages
> in zswap_frontswap_store.
> It will store them to a lot of "zswap_entry".  These "zswap_entry"
> will be inserted to "zswap_tree" together.

why does zswap need to carry the added complexity of converting THP
into separate normal sized pages?  That should be done higher up in
the swap layer.

>
> Signed-off-by: Hui Zhu <teawaterz@linux.alibaba.com>
> ---
>  mm/zswap.c | 170 +++++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 109 insertions(+), 61 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 46a3223..36aa10d 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -316,11 +316,7 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
>         }
>  }
>
> -/*
> - * Carries out the common pattern of freeing and entry's zpool allocation,
> - * freeing the entry itself, and decrementing the number of stored pages.
> - */
> -static void zswap_free_entry(struct zswap_entry *entry)
> +static void zswap_free_entry_1(struct zswap_entry *entry)
>  {
>         if (!entry->length)
>                 atomic_dec(&zswap_same_filled_pages);
> @@ -329,6 +325,15 @@ static void zswap_free_entry(struct zswap_entry *entry)
>                 zswap_pool_put(entry->pool);
>         }
>         zswap_entry_cache_free(entry);
> +}
> +
> +/*
> + * Carries out the common pattern of freeing and entry's zpool allocation,
> + * freeing the entry itself, and decrementing the number of stored pages.
> + */
> +static void zswap_free_entry(struct zswap_entry *entry)
> +{
> +       zswap_free_entry_1(entry);
>         atomic_dec(&zswap_stored_pages);
>         zswap_update_total_size();
>  }
> @@ -980,15 +985,11 @@ static void zswap_fill_page(void *ptr, unsigned long value)
>         memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
>  }
>
> -/*********************************
> -* frontswap hooks
> -**********************************/
> -/* attempts to compress and store an single page */
> -static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> -                               struct page *page)
> +static int zswap_frontswap_store_1(unsigned type, pgoff_t offset,
> +                               struct page *page,
> +                               struct zswap_entry **entry_pointer)
>  {
> -       struct zswap_tree *tree = zswap_trees[type];
> -       struct zswap_entry *entry, *dupentry;
> +       struct zswap_entry *entry;
>         struct crypto_comp *tfm;
>         int ret;
>         unsigned int hlen, dlen = PAGE_SIZE;
> @@ -998,36 +999,6 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>         struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
>         gfp_t gfp;
>
> -       /* THP isn't supported */
> -       if (PageTransHuge(page)) {
> -               ret = -EINVAL;
> -               goto reject;
> -       }
> -
> -       if (!zswap_enabled || !tree) {
> -               ret = -ENODEV;
> -               goto reject;
> -       }
> -
> -       /* reclaim space if needed */
> -       if (zswap_is_full()) {
> -               zswap_pool_limit_hit++;
> -               if (zswap_shrink()) {
> -                       zswap_reject_reclaim_fail++;
> -                       ret = -ENOMEM;
> -                       goto reject;
> -               }
> -
> -               /* A second zswap_is_full() check after
> -                * zswap_shrink() to make sure it's now
> -                * under the max_pool_percent
> -                */
> -               if (zswap_is_full()) {
> -                       ret = -ENOMEM;
> -                       goto reject;
> -               }
> -       }
> -
>         /* allocate entry */
>         entry = zswap_entry_cache_alloc(GFP_KERNEL);
>         if (!entry) {
> @@ -1035,6 +1006,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>                 ret = -ENOMEM;
>                 goto reject;
>         }
> +       *entry_pointer = entry;
>
>         if (zswap_same_filled_pages_enabled) {
>                 src = kmap_atomic(page);
> @@ -1044,7 +1016,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>                         entry->length = 0;
>                         entry->value = value;
>                         atomic_inc(&zswap_same_filled_pages);
> -                       goto insert_entry;
> +                       goto out;
>                 }
>                 kunmap_atomic(src);
>         }
> @@ -1093,31 +1065,105 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>         entry->handle = handle;
>         entry->length = dlen;
>
> -insert_entry:
> +out:
> +       return 0;
> +
> +put_dstmem:
> +       put_cpu_var(zswap_dstmem);
> +       zswap_pool_put(entry->pool);
> +freepage:
> +       zswap_entry_cache_free(entry);
> +reject:
> +       return ret;
> +}
> +
> +/*********************************
> +* frontswap hooks
> +**********************************/
> +/* attempts to compress and store an single page */
> +static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> +                               struct page *page)
> +{
> +       struct zswap_tree *tree = zswap_trees[type];
> +       struct zswap_entry **entries = NULL, *dupentry;
> +       struct zswap_entry *single_entry[1];
> +       int ret;
> +       int i, nr;
> +
> +       if (!zswap_enabled || !tree) {
> +               ret = -ENODEV;
> +               goto reject;
> +       }
> +
> +       /* reclaim space if needed */
> +       if (zswap_is_full()) {
> +               zswap_pool_limit_hit++;
> +               if (zswap_shrink()) {
> +                       zswap_reject_reclaim_fail++;
> +                       ret = -ENOMEM;
> +                       goto reject;
> +               }
> +
> +               /* A second zswap_is_full() check after
> +                * zswap_shrink() to make sure it's now
> +                * under the max_pool_percent
> +                */
> +               if (zswap_is_full()) {
> +                       ret = -ENOMEM;
> +                       goto reject;
> +               }
> +       }
> +
> +       nr = hpage_nr_pages(page);
> +
> +       if (unlikely(nr > 1)) {
> +               entries = kvmalloc(sizeof(struct zswap_entry *) * nr,
> +                               GFP_KERNEL);
> +               if (!entries) {
> +                       ret = -ENOMEM;
> +                       goto reject;
> +               }
> +       } else
> +               entries = single_entry;
> +
> +       for (i = 0; i < nr; i++) {
> +               ret = zswap_frontswap_store_1(type, offset + i, page + i,
> +                                       &entries[i]);
> +               if (ret)
> +                       goto freepage;
> +       }
> +
>         /* map */
>         spin_lock(&tree->lock);
> -       do {
> -               ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
> -               if (ret == -EEXIST) {
> -                       zswap_duplicate_entry++;
> -                       /* remove from rbtree */
> -                       zswap_rb_erase(&tree->rbroot, dupentry);
> -                       zswap_entry_put(tree, dupentry);
> -               }
> -       } while (ret == -EEXIST);
> +       for (i = 0; i < nr; i++) {
> +               do {
> +                       ret = zswap_rb_insert(&tree->rbroot, entries[i],
> +                                       &dupentry);
> +                       if (ret == -EEXIST) {
> +                               zswap_duplicate_entry++;
> +                               /* remove from rbtree */
> +                               zswap_rb_erase(&tree->rbroot, dupentry);
> +                               zswap_entry_put(tree, dupentry);
> +                       }
> +               } while (ret == -EEXIST);
> +       }
>         spin_unlock(&tree->lock);
>
>         /* update stats */
> -       atomic_inc(&zswap_stored_pages);
> +       atomic_add(nr, &zswap_stored_pages);
>         zswap_update_total_size();
>
> -       return 0;
> -
> -put_dstmem:
> -       put_cpu_var(zswap_dstmem);
> -       zswap_pool_put(entry->pool);
> +       ret = 0;
>  freepage:
> -       zswap_entry_cache_free(entry);
> +       if (unlikely(nr > 1)) {
> +               if (ret) {
> +                       int j;
> +
> +                       for (j = 0; j < i; j++)
> +                               zswap_free_entry_1(entries[j]);
> +               }
> +               kvfree(entries);
> +       }
>  reject:
>         return ret;
>  }
> @@ -1136,6 +1182,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>         unsigned int dlen;
>         int ret;
>
> +       BUG_ON(PageTransHuge(page));
> +
>         /* find */
>         spin_lock(&tree->lock);
>         entry = zswap_entry_find_get(&tree->rbroot, offset);
> --
> 2.7.4
>


  reply index

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-15  3:49 [PATCH 1/2] mm, frontswap: Fix frontswap_map issue with THP Hui Zhu
2019-10-15  3:49 ` [PATCH 2/2] mm, zswap: Support THP Hui Zhu
2019-11-08 16:15   ` Dan Streetman [this message]
2019-11-11  1:48     ` Hui Zhu

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CALZtONA9Y9tvOJcHUyac770fSQhCoGMb7kDL1R5N9Bueqd+7_g@mail.gmail.com \
    --to=ddstreet@ieee.org \
    --cc=konrad.wilk@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=sjenning@redhat.com \
    --cc=teawater@gmail.com \
    --cc=teawaterz@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org
	public-inbox-index linux-mm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git