zswap: dynamic pool creation
diff mbox series

Message ID 1433419358-21820-1-git-send-email-ddstreet@ieee.org
State New, archived
Headers show
Series
  • zswap: dynamic pool creation
Related show

Commit Message

Dan Streetman June 4, 2015, 12:02 p.m. UTC
Add dynamic creation of pools.  Move the static crypto compression
per-cpu transforms into each pool.  Add a pointer to zswap_entry to
the pool it's in.

This is required by a separate patch which enables changing the
zswap zpool and compressor params at runtime.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
---
 mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 408 insertions(+), 142 deletions(-)

Comments

Dan Streetman June 4, 2015, 12:13 p.m. UTC | #1
On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> Add dynamic creation of pools.  Move the static crypto compression
> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
> the pool it's in.

Seth, as far as the design, from your previous comments I assume you
were thinking of maintaining separate lists of zpools and compressors?
 I do see how that will reduce duplication of zpools and compressors,
but it also requires adding a new pointer to each zswap_entry, and
increasing the amount of code to manage each list separately.  And the
most common case in zswap will be just a single zpool and compressor,
not repeatedly changing params.  What do you think?

>
> This is required by a separate patch which enables changing the
> zswap zpool and compressor params at runtime.
>
> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
> ---
>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 408 insertions(+), 142 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 2d5727b..fc93770 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
>  static struct zpool *zswap_pool;
>
>  /*********************************
> -* compression functions
> +* data structures
>  **********************************/
> -/* per-cpu compression transforms */
> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
>
> -enum comp_op {
> -       ZSWAP_COMPOP_COMPRESS,
> -       ZSWAP_COMPOP_DECOMPRESS
> +struct zswap_pool {
> +       struct zpool *zpool;
> +       struct kref kref;
> +       struct list_head list;
> +       struct rcu_head rcu_head;
> +       struct notifier_block notifier;
> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
> +       struct crypto_comp * __percpu *tfm;
>  };
>
> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
> -                               u8 *dst, unsigned int *dlen)
> -{
> -       struct crypto_comp *tfm;
> -       int ret;
> -
> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
> -       switch (op) {
> -       case ZSWAP_COMPOP_COMPRESS:
> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
> -               break;
> -       case ZSWAP_COMPOP_DECOMPRESS:
> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
> -               break;
> -       default:
> -               ret = -EINVAL;
> -       }
> -
> -       put_cpu();
> -       return ret;
> -}
> -
> -static int __init zswap_comp_init(void)
> -{
> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> -               pr_info("%s compressor not available\n", zswap_compressor);
> -               /* fall back to default compressor */
> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
> -                       /* can't even load the default compressor */
> -                       return -ENODEV;
> -       }
> -       pr_info("using %s compressor\n", zswap_compressor);
> -
> -       /* alloc percpu transforms */
> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
> -       if (!zswap_comp_pcpu_tfms)
> -               return -ENOMEM;
> -       return 0;
> -}
> -
> -static void __init zswap_comp_exit(void)
> -{
> -       /* free percpu transforms */
> -       free_percpu(zswap_comp_pcpu_tfms);
> -}
> -
> -/*********************************
> -* data structures
> -**********************************/
>  /*
>   * struct zswap_entry
>   *
> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
>   * page within zswap.
>   *
>   * rbnode - links the entry into red-black tree for the appropriate swap type
> + * offset - the swap offset for the entry.  Index into the red-black tree.
>   * refcount - the number of outstanding reference to the entry. This is needed
>   *            to protect against premature freeing of the entry by code
>   *            concurrent calls to load, invalidate, and writeback.  The lock
>   *            for the zswap_tree structure that contains the entry must
>   *            be held while changing the refcount.  Since the lock must
>   *            be held, there is no reason to also make refcount atomic.
> - * offset - the swap offset for the entry.  Index into the red-black tree.
> - * handle - zpool allocation handle that stores the compressed page data
>   * length - the length in bytes of the compressed page data.  Needed during
>   *          decompression
> + * pool - the zswap_pool the entry's data is in
> + * handle - zpool allocation handle that stores the compressed page data
>   */
>  struct zswap_entry {
>         struct rb_node rbnode;
>         pgoff_t offset;
>         int refcount;
>         unsigned int length;
> +       struct zswap_pool *pool;
>         unsigned long handle;
>  };
>
> @@ -201,6 +156,44 @@ struct zswap_tree {
>
>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
>
> +/* RCU-protected iteration */
> +static LIST_HEAD(zswap_pools);
> +/* protects zswap_pools list modification */
> +static DEFINE_SPINLOCK(zswap_pools_lock);
> +
> +/*********************************
> +* helpers and fwd declarations
> +**********************************/
> +
> +#define zswap_pool_debug(msg, p)                               \
> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> +                zpool_get_type((p)->zpool))
> +
> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
> +static int zswap_pool_get(struct zswap_pool *pool);
> +static void zswap_pool_put(struct zswap_pool *pool);
> +
> +static bool zswap_is_full(void)
> +{
> +       return totalram_pages * zswap_max_pool_percent / 100 <
> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> +}
> +
> +static void zswap_update_total_size(void)
> +{
> +       struct zswap_pool *pool;
> +       u64 total = 0;
> +
> +       rcu_read_lock();
> +
> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> +               total += zpool_get_total_size(pool->zpool);
> +
> +       rcu_read_unlock();
> +
> +       zswap_pool_total_size = total;
> +}
> +
>  /*********************************
>  * zswap entry functions
>  **********************************/
> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
>   */
>  static void zswap_free_entry(struct zswap_entry *entry)
>  {
> -       zpool_free(zswap_pool, entry->handle);
> +       zpool_free(entry->pool->zpool, entry->handle);
> +       zswap_pool_put(entry->pool);
>         zswap_entry_cache_free(entry);
>         atomic_dec(&zswap_stored_pages);
> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> +       zswap_update_total_size();
>  }
>
>  /* caller must hold the tree lock */
> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
>  **********************************/
>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
>
> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
>  {
> -       struct crypto_comp *tfm;
>         u8 *dst;
>
>         switch (action) {
>         case CPU_UP_PREPARE:
> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
> -               if (IS_ERR(tfm)) {
> -                       pr_err("can't allocate compressor transform\n");
> -                       return NOTIFY_BAD;
> -               }
> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
>                 if (!dst) {
>                         pr_err("can't allocate compressor buffer\n");
> -                       crypto_free_comp(tfm);
> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>                         return NOTIFY_BAD;
>                 }
>                 per_cpu(zswap_dstmem, cpu) = dst;
>                 break;
>         case CPU_DEAD:
>         case CPU_UP_CANCELED:
> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
> -               if (tfm) {
> -                       crypto_free_comp(tfm);
> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> -               }
>                 dst = per_cpu(zswap_dstmem, cpu);
>                 kfree(dst);
>                 per_cpu(zswap_dstmem, cpu) = NULL;
> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>         return NOTIFY_OK;
>  }
>
> -static int zswap_cpu_notifier(struct notifier_block *nb,
> -                               unsigned long action, void *pcpu)
> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
> +                                    unsigned long action, void *pcpu)
>  {
> -       unsigned long cpu = (unsigned long)pcpu;
> -       return __zswap_cpu_notifier(action, cpu);
> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
>  }
>
> -static struct notifier_block zswap_cpu_notifier_block = {
> -       .notifier_call = zswap_cpu_notifier
> +static struct notifier_block zswap_dstmem_notifier = {
> +       .notifier_call =        zswap_cpu_dstmem_notifier,
>  };
>
> -static int __init zswap_cpu_init(void)
> +static int __init zswap_cpu_dstmem_init(void)
>  {
>         unsigned long cpu;
>
>         cpu_notifier_register_begin();
>         for_each_online_cpu(cpu)
> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
> +                   NOTIFY_BAD)
>                         goto cleanup;
> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
> +       __register_cpu_notifier(&zswap_dstmem_notifier);
>         cpu_notifier_register_done();
>         return 0;
>
>  cleanup:
>         for_each_online_cpu(cpu)
> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>         cpu_notifier_register_done();
>         return -ENOMEM;
>  }
>
> +static void zswap_cpu_dstmem_destroy(void)
> +{
> +       unsigned long cpu;
> +
> +       cpu_notifier_register_begin();
> +       for_each_online_cpu(cpu)
> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
> +       cpu_notifier_register_done();
> +}
> +
> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
> +                                    unsigned long action, unsigned long cpu)
> +{
> +       struct crypto_comp *tfm;
> +
> +       switch (action) {
> +       case CPU_UP_PREPARE:
> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
> +                       break;
> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
> +               if (IS_ERR_OR_NULL(tfm)) {
> +                       pr_err("could not alloc crypto comp %s : %ld\n",
> +                              pool->tfm_name, PTR_ERR(tfm));
> +                       return NOTIFY_BAD;
> +               }
> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
> +               break;
> +       case CPU_DEAD:
> +       case CPU_UP_CANCELED:
> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
> +               if (!IS_ERR_OR_NULL(tfm))
> +                       crypto_free_comp(tfm);
> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
> +               break;
> +       default:
> +               break;
> +       }
> +       return NOTIFY_OK;
> +}
> +
> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
> +                                  unsigned long action, void *pcpu)
> +{
> +       unsigned long cpu = (unsigned long)pcpu;
> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
> +
> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
> +}
> +
> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
> +{
> +       unsigned long cpu;
> +
> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
> +
> +       cpu_notifier_register_begin();
> +       for_each_online_cpu(cpu)
> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
> +                   NOTIFY_BAD)
> +                       goto cleanup;
> +       __register_cpu_notifier(&pool->notifier);
> +       cpu_notifier_register_done();
> +       return 0;
> +
> +cleanup:
> +       for_each_online_cpu(cpu)
> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> +       cpu_notifier_register_done();
> +       return -ENOMEM;
> +}
> +
> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
> +{
> +       unsigned long cpu;
> +
> +       cpu_notifier_register_begin();
> +       for_each_online_cpu(cpu)
> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> +       __unregister_cpu_notifier(&pool->notifier);
> +       cpu_notifier_register_done();
> +}
> +
>  /*********************************
> -* helpers
> +* pool functions
>  **********************************/
> -static bool zswap_is_full(void)
> +
> +static struct zswap_pool *__zswap_pool_current(void)
>  {
> -       return totalram_pages * zswap_max_pool_percent / 100 <
> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> +       struct zswap_pool *pool;
> +
> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
> +       WARN_ON(!pool);
> +
> +       return pool;
> +}
> +
> +static struct zswap_pool *zswap_pool_current(void)
> +{
> +       assert_spin_locked(&zswap_pools_lock);
> +
> +       return __zswap_pool_current();
> +}
> +
> +static struct zswap_pool *zswap_pool_current_get(void)
> +{
> +       struct zswap_pool *pool;
> +
> +       rcu_read_lock();
> +
> +       pool = __zswap_pool_current();
> +       if (!pool || !zswap_pool_get(pool))
> +               pool = NULL;
> +
> +       rcu_read_unlock();
> +
> +       return pool;
> +}
> +
> +static struct zswap_pool *zswap_pool_last_get(void)
> +{
> +       struct zswap_pool *pool, *last = NULL;
> +
> +       rcu_read_lock();
> +
> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> +               last = pool;
> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
> +               last = NULL;
> +
> +       rcu_read_unlock();
> +
> +       return last;
> +}
> +
> +static struct zpool_ops zswap_zpool_ops = {
> +       .evict = zswap_writeback_entry
> +};
> +
> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> +{
> +       struct zswap_pool *pool;
> +
> +       assert_spin_locked(&zswap_pools_lock);
> +
> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
> +                       continue;
> +               if (strncmp(zpool_get_type(pool->zpool), type,
> +                           sizeof(zswap_zpool_type)))
> +                       continue;
> +               /* if we can't get it, it's about to be destroyed */
> +               if (!zswap_pool_get(pool))
> +                       continue;
> +               return pool;
> +       }
> +
> +       return NULL;
> +}
> +
> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> +{
> +       struct zswap_pool *pool;
> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> +
> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> +       if (!pool) {
> +               pr_err("pool alloc failed\n");
> +               return NULL;
> +       }
> +
> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
> +       if (!pool->zpool) {
> +               pr_err("%s zpool not available\n", type);
> +               goto error;
> +       }
> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> +
> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> +       pool->tfm = alloc_percpu(struct crypto_comp *);
> +       if (!pool->tfm) {
> +               pr_err("percpu alloc failed\n");
> +               goto error;
> +       }
> +
> +       if (zswap_cpu_comp_init(pool))
> +               goto error;
> +       pr_debug("using %s compressor\n", pool->tfm_name);
> +
> +       /* being the current pool takes 1 ref; this func expects the
> +        * caller to always add the new pool as the current pool
> +        */
> +       kref_init(&pool->kref);
> +       INIT_LIST_HEAD(&pool->list);
> +
> +       zswap_pool_debug("created", pool);
> +
> +       return pool;
> +
> +error:
> +       free_percpu(pool->tfm);
> +       if (pool->zpool)
> +               zpool_destroy_pool(pool->zpool);
> +       kfree(pool);
> +       return NULL;
> +}
> +
> +static struct zswap_pool *__zswap_pool_create_fallback(void)
> +{
> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> +               pr_err("compressor %s not available, using default %s\n",
> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
> +                       sizeof(zswap_compressor));
> +       }
> +       if (!zpool_has_pool(zswap_zpool_type)) {
> +               pr_err("zpool %s not available, using default %s\n",
> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
> +                       sizeof(zswap_zpool_type));
> +       }
> +
> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
> +}
> +
> +static void zswap_pool_destroy(struct zswap_pool *pool)
> +{
> +       zswap_pool_debug("destroying", pool);
> +
> +       zswap_cpu_comp_destroy(pool);
> +       free_percpu(pool->tfm);
> +       zpool_destroy_pool(pool->zpool);
> +       kfree(pool);
> +}
> +
> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
> +{
> +       return kref_get_unless_zero(&pool->kref);
> +}
> +
> +static void __zswap_pool_release(struct rcu_head *head)
> +{
> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
> +
> +       /* nobody should have been able to get a kref... */
> +       WARN_ON(kref_get_unless_zero(&pool->kref));
> +
> +       /* pool is now off zswap_pools list and has no references. */
> +       zswap_pool_destroy(pool);
> +}
> +
> +static void __zswap_pool_empty(struct kref *kref)
> +{
> +       struct zswap_pool *pool;
> +
> +       pool = container_of(kref, typeof(*pool), kref);
> +
> +       spin_lock(&zswap_pools_lock);
> +
> +       WARN_ON(pool == zswap_pool_current());
> +
> +       list_del_rcu(&pool->list);
> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
> +
> +       spin_unlock(&zswap_pools_lock);
> +}
> +
> +static void zswap_pool_put(struct zswap_pool *pool)
> +{
> +       kref_put(&pool->kref, __zswap_pool_empty);
> +}
> +
>  }
>
>  /*********************************
> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>         pgoff_t offset;
>         struct zswap_entry *entry;
>         struct page *page;
> +       struct crypto_comp *tfm;
>         u8 *src, *dst;
>         unsigned int dlen;
>         int ret;
> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
>                 /* decompress */
>                 dlen = PAGE_SIZE;
> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
>                 dst = kmap_atomic(page);
> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
> -                               entry->length, dst, &dlen);
> +               tfm = *get_cpu_ptr(entry->pool->tfm);
> +               ret = crypto_comp_decompress(tfm, src, entry->length,
> +                                            dst, &dlen);
> +               put_cpu_ptr(entry->pool->tfm);
>                 kunmap_atomic(dst);
> -               zpool_unmap_handle(zswap_pool, entry->handle);
> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
>                 BUG_ON(ret);
>                 BUG_ON(dlen != PAGE_SIZE);
>
> @@ -633,6 +882,22 @@ end:
>         return ret;
>  }
>
> +static int zswap_shrink(void)
> +{
> +       struct zswap_pool *pool;
> +       int ret;
> +
> +       pool = zswap_pool_last_get();
> +       if (!pool)
> +               return -ENOENT;
> +
> +       ret = zpool_shrink(pool->zpool, 1, NULL);
> +
> +       zswap_pool_put(pool);
> +
> +       return ret;
> +}
> +
>  /*********************************
>  * frontswap hooks
>  **********************************/
> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>  {
>         struct zswap_tree *tree = zswap_trees[type];
>         struct zswap_entry *entry, *dupentry;
> +       struct crypto_comp *tfm;
>         int ret;
>         unsigned int dlen = PAGE_SIZE, len;
>         unsigned long handle;
> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>         /* reclaim space if needed */
>         if (zswap_is_full()) {
>                 zswap_pool_limit_hit++;
> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
> +               if (zswap_shrink()) {
>                         zswap_reject_reclaim_fail++;
>                         ret = -ENOMEM;
>                         goto reject;
> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>                 goto reject;
>         }
>
> +       /* if entry is successfully added, it keeps the reference */
> +       entry->pool = zswap_pool_current_get();
> +       if (!entry->pool) {
> +               ret = -EINVAL;
> +               goto freepage;
> +       }
> +
>         /* compress */
>         dst = get_cpu_var(zswap_dstmem);
> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>         src = kmap_atomic(page);
> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
>         kunmap_atomic(src);
> +       put_cpu_ptr(entry->pool->tfm);
>         if (ret) {
>                 ret = -EINVAL;
> -               goto freepage;
> +               goto put_dstmem;
>         }
>
>         /* store */
>         len = dlen + sizeof(struct zswap_header);
> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
> -               &handle);
> +       ret = zpool_malloc(entry->pool->zpool, len,
> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
>         if (ret == -ENOSPC) {
>                 zswap_reject_compress_poor++;
> -               goto freepage;
> +               goto put_dstmem;
>         }
>         if (ret) {
>                 zswap_reject_alloc_fail++;
> -               goto freepage;
> +               goto put_dstmem;
>         }
> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
>         zhdr->swpentry = swp_entry(type, offset);
>         buf = (u8 *)(zhdr + 1);
>         memcpy(buf, dst, dlen);
> -       zpool_unmap_handle(zswap_pool, handle);
> +       zpool_unmap_handle(entry->pool->zpool, handle);
>         put_cpu_var(zswap_dstmem);
>
>         /* populate entry */
> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>
>         /* update stats */
>         atomic_inc(&zswap_stored_pages);
> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> +       zswap_update_total_size();
>
>         return 0;
>
> -freepage:
> +put_dstmem:
>         put_cpu_var(zswap_dstmem);
> +       zswap_pool_put(entry->pool);
> +freepage:
>         zswap_entry_cache_free(entry);
>  reject:
>         return ret;
> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>  {
>         struct zswap_tree *tree = zswap_trees[type];
>         struct zswap_entry *entry;
> +       struct crypto_comp *tfm;
>         u8 *src, *dst;
>         unsigned int dlen;
>         int ret;
> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>
>         /* decompress */
>         dlen = PAGE_SIZE;
> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
>         dst = kmap_atomic(page);
> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
> -               dst, &dlen);
> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
> +       put_cpu_ptr(entry->pool->tfm);
>         kunmap_atomic(dst);
> -       zpool_unmap_handle(zswap_pool, entry->handle);
> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
>         BUG_ON(ret);
>
>         spin_lock(&tree->lock);
> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
>         zswap_trees[type] = NULL;
>  }
>
> -static struct zpool_ops zswap_zpool_ops = {
> -       .evict = zswap_writeback_entry
> -};
> -
>  static void zswap_frontswap_init(unsigned type)
>  {
>         struct zswap_tree *tree;
> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
>  **********************************/
>  static int __init init_zswap(void)
>  {
> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> +       struct zswap_pool *pool;
>
> -       pr_info("loading zswap\n");
> -
> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> -                                       &zswap_zpool_ops);
> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
> -               pr_info("%s zpool not available\n", zswap_zpool_type);
> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> -                                       &zswap_zpool_ops);
> -       }
> -       if (!zswap_pool) {
> -               pr_err("%s zpool not available\n", zswap_zpool_type);
> -               pr_err("zpool creation failed\n");
> -               goto error;
> -       }
> -       pr_info("using %s pool\n", zswap_zpool_type);
> +       pr_info("loading\n");
>
>         if (zswap_entry_cache_create()) {
>                 pr_err("entry cache creation failed\n");
> -               goto cachefail;
> +               goto cache_fail;
>         }
> -       if (zswap_comp_init()) {
> -               pr_err("compressor initialization failed\n");
> -               goto compfail;
> +
> +       if (zswap_cpu_dstmem_init()) {
> +               pr_err("dstmem alloc failed\n");
> +               goto dstmem_fail;
>         }
> -       if (zswap_cpu_init()) {
> -               pr_err("per-cpu initialization failed\n");
> -               goto pcpufail;
> +
> +       pool = __zswap_pool_create_fallback();
> +       if (!pool) {
> +               pr_err("pool creation failed\n");
> +               goto pool_fail;
>         }
> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> +               zpool_get_type(pool->zpool));
> +
> +       list_add(&pool->list, &zswap_pools);
>
>         frontswap_register_ops(&zswap_frontswap_ops);
>         if (zswap_debugfs_init())
>                 pr_warn("debugfs initialization failed\n");
>         return 0;
> -pcpufail:
> -       zswap_comp_exit();
> -compfail:
> +
> +pool_fail:
> +       zswap_cpu_dstmem_destroy();
> +dstmem_fail:
>         zswap_entry_cache_destroy();
> -cachefail:
> -       zpool_destroy_pool(zswap_pool);
> -error:
> +cache_fail:
>         return -ENOMEM;
>  }
>  /* must be late so crypto has time to come up */
> --
> 2.1.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Dan Streetman June 10, 2015, 8:54 p.m. UTC | #2
On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
>> Add dynamic creation of pools.  Move the static crypto compression
>> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
>> the pool it's in.
>
> Seth, as far as the design, from your previous comments I assume you
> were thinking of maintaining separate lists of zpools and compressors?
>  I do see how that will reduce duplication of zpools and compressors,
> but it also requires adding a new pointer to each zswap_entry, and
> increasing the amount of code to manage each list separately.  And the
> most common case in zswap will be just a single zpool and compressor,
> not repeatedly changing params.  What do you think?

Any opinion on this patch?  If you want, I can break it up so there's
a list of zpools and a list of compressors.  Either the combined way
(this patch) or separate lists works for me, as long as the params are
changeable at runtime :-)


>
>>
>> This is required by a separate patch which enables changing the
>> zswap zpool and compressor params at runtime.
>>
>> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
>> ---
>>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
>>  1 file changed, 408 insertions(+), 142 deletions(-)
>>
>> diff --git a/mm/zswap.c b/mm/zswap.c
>> index 2d5727b..fc93770 100644
>> --- a/mm/zswap.c
>> +++ b/mm/zswap.c
>> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
>>  static struct zpool *zswap_pool;
>>
>>  /*********************************
>> -* compression functions
>> +* data structures
>>  **********************************/
>> -/* per-cpu compression transforms */
>> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
>>
>> -enum comp_op {
>> -       ZSWAP_COMPOP_COMPRESS,
>> -       ZSWAP_COMPOP_DECOMPRESS
>> +struct zswap_pool {
>> +       struct zpool *zpool;
>> +       struct kref kref;
>> +       struct list_head list;
>> +       struct rcu_head rcu_head;
>> +       struct notifier_block notifier;
>> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
>> +       struct crypto_comp * __percpu *tfm;
>>  };
>>
>> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
>> -                               u8 *dst, unsigned int *dlen)
>> -{
>> -       struct crypto_comp *tfm;
>> -       int ret;
>> -
>> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
>> -       switch (op) {
>> -       case ZSWAP_COMPOP_COMPRESS:
>> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
>> -               break;
>> -       case ZSWAP_COMPOP_DECOMPRESS:
>> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
>> -               break;
>> -       default:
>> -               ret = -EINVAL;
>> -       }
>> -
>> -       put_cpu();
>> -       return ret;
>> -}
>> -
>> -static int __init zswap_comp_init(void)
>> -{
>> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
>> -               pr_info("%s compressor not available\n", zswap_compressor);
>> -               /* fall back to default compressor */
>> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
>> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
>> -                       /* can't even load the default compressor */
>> -                       return -ENODEV;
>> -       }
>> -       pr_info("using %s compressor\n", zswap_compressor);
>> -
>> -       /* alloc percpu transforms */
>> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
>> -       if (!zswap_comp_pcpu_tfms)
>> -               return -ENOMEM;
>> -       return 0;
>> -}
>> -
>> -static void __init zswap_comp_exit(void)
>> -{
>> -       /* free percpu transforms */
>> -       free_percpu(zswap_comp_pcpu_tfms);
>> -}
>> -
>> -/*********************************
>> -* data structures
>> -**********************************/
>>  /*
>>   * struct zswap_entry
>>   *
>> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
>>   * page within zswap.
>>   *
>>   * rbnode - links the entry into red-black tree for the appropriate swap type
>> + * offset - the swap offset for the entry.  Index into the red-black tree.
>>   * refcount - the number of outstanding reference to the entry. This is needed
>>   *            to protect against premature freeing of the entry by code
>>   *            concurrent calls to load, invalidate, and writeback.  The lock
>>   *            for the zswap_tree structure that contains the entry must
>>   *            be held while changing the refcount.  Since the lock must
>>   *            be held, there is no reason to also make refcount atomic.
>> - * offset - the swap offset for the entry.  Index into the red-black tree.
>> - * handle - zpool allocation handle that stores the compressed page data
>>   * length - the length in bytes of the compressed page data.  Needed during
>>   *          decompression
>> + * pool - the zswap_pool the entry's data is in
>> + * handle - zpool allocation handle that stores the compressed page data
>>   */
>>  struct zswap_entry {
>>         struct rb_node rbnode;
>>         pgoff_t offset;
>>         int refcount;
>>         unsigned int length;
>> +       struct zswap_pool *pool;
>>         unsigned long handle;
>>  };
>>
>> @@ -201,6 +156,44 @@ struct zswap_tree {
>>
>>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
>>
>> +/* RCU-protected iteration */
>> +static LIST_HEAD(zswap_pools);
>> +/* protects zswap_pools list modification */
>> +static DEFINE_SPINLOCK(zswap_pools_lock);
>> +
>> +/*********************************
>> +* helpers and fwd declarations
>> +**********************************/
>> +
>> +#define zswap_pool_debug(msg, p)                               \
>> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
>> +                zpool_get_type((p)->zpool))
>> +
>> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
>> +static int zswap_pool_get(struct zswap_pool *pool);
>> +static void zswap_pool_put(struct zswap_pool *pool);
>> +
>> +static bool zswap_is_full(void)
>> +{
>> +       return totalram_pages * zswap_max_pool_percent / 100 <
>> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
>> +}
>> +
>> +static void zswap_update_total_size(void)
>> +{
>> +       struct zswap_pool *pool;
>> +       u64 total = 0;
>> +
>> +       rcu_read_lock();
>> +
>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
>> +               total += zpool_get_total_size(pool->zpool);
>> +
>> +       rcu_read_unlock();
>> +
>> +       zswap_pool_total_size = total;
>> +}
>> +
>>  /*********************************
>>  * zswap entry functions
>>  **********************************/
>> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
>>   */
>>  static void zswap_free_entry(struct zswap_entry *entry)
>>  {
>> -       zpool_free(zswap_pool, entry->handle);
>> +       zpool_free(entry->pool->zpool, entry->handle);
>> +       zswap_pool_put(entry->pool);
>>         zswap_entry_cache_free(entry);
>>         atomic_dec(&zswap_stored_pages);
>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
>> +       zswap_update_total_size();
>>  }
>>
>>  /* caller must hold the tree lock */
>> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
>>  **********************************/
>>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
>>
>> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
>>  {
>> -       struct crypto_comp *tfm;
>>         u8 *dst;
>>
>>         switch (action) {
>>         case CPU_UP_PREPARE:
>> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
>> -               if (IS_ERR(tfm)) {
>> -                       pr_err("can't allocate compressor transform\n");
>> -                       return NOTIFY_BAD;
>> -               }
>> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
>>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
>>                 if (!dst) {
>>                         pr_err("can't allocate compressor buffer\n");
>> -                       crypto_free_comp(tfm);
>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>>                         return NOTIFY_BAD;
>>                 }
>>                 per_cpu(zswap_dstmem, cpu) = dst;
>>                 break;
>>         case CPU_DEAD:
>>         case CPU_UP_CANCELED:
>> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
>> -               if (tfm) {
>> -                       crypto_free_comp(tfm);
>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>> -               }
>>                 dst = per_cpu(zswap_dstmem, cpu);
>>                 kfree(dst);
>>                 per_cpu(zswap_dstmem, cpu) = NULL;
>> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>>         return NOTIFY_OK;
>>  }
>>
>> -static int zswap_cpu_notifier(struct notifier_block *nb,
>> -                               unsigned long action, void *pcpu)
>> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
>> +                                    unsigned long action, void *pcpu)
>>  {
>> -       unsigned long cpu = (unsigned long)pcpu;
>> -       return __zswap_cpu_notifier(action, cpu);
>> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
>>  }
>>
>> -static struct notifier_block zswap_cpu_notifier_block = {
>> -       .notifier_call = zswap_cpu_notifier
>> +static struct notifier_block zswap_dstmem_notifier = {
>> +       .notifier_call =        zswap_cpu_dstmem_notifier,
>>  };
>>
>> -static int __init zswap_cpu_init(void)
>> +static int __init zswap_cpu_dstmem_init(void)
>>  {
>>         unsigned long cpu;
>>
>>         cpu_notifier_register_begin();
>>         for_each_online_cpu(cpu)
>> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
>> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
>> +                   NOTIFY_BAD)
>>                         goto cleanup;
>> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
>> +       __register_cpu_notifier(&zswap_dstmem_notifier);
>>         cpu_notifier_register_done();
>>         return 0;
>>
>>  cleanup:
>>         for_each_online_cpu(cpu)
>> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>>         cpu_notifier_register_done();
>>         return -ENOMEM;
>>  }
>>
>> +static void zswap_cpu_dstmem_destroy(void)
>> +{
>> +       unsigned long cpu;
>> +
>> +       cpu_notifier_register_begin();
>> +       for_each_online_cpu(cpu)
>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
>> +       cpu_notifier_register_done();
>> +}
>> +
>> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
>> +                                    unsigned long action, unsigned long cpu)
>> +{
>> +       struct crypto_comp *tfm;
>> +
>> +       switch (action) {
>> +       case CPU_UP_PREPARE:
>> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
>> +                       break;
>> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
>> +               if (IS_ERR_OR_NULL(tfm)) {
>> +                       pr_err("could not alloc crypto comp %s : %ld\n",
>> +                              pool->tfm_name, PTR_ERR(tfm));
>> +                       return NOTIFY_BAD;
>> +               }
>> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
>> +               break;
>> +       case CPU_DEAD:
>> +       case CPU_UP_CANCELED:
>> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
>> +               if (!IS_ERR_OR_NULL(tfm))
>> +                       crypto_free_comp(tfm);
>> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
>> +               break;
>> +       default:
>> +               break;
>> +       }
>> +       return NOTIFY_OK;
>> +}
>> +
>> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
>> +                                  unsigned long action, void *pcpu)
>> +{
>> +       unsigned long cpu = (unsigned long)pcpu;
>> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
>> +
>> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
>> +}
>> +
>> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
>> +{
>> +       unsigned long cpu;
>> +
>> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
>> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
>> +
>> +       cpu_notifier_register_begin();
>> +       for_each_online_cpu(cpu)
>> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
>> +                   NOTIFY_BAD)
>> +                       goto cleanup;
>> +       __register_cpu_notifier(&pool->notifier);
>> +       cpu_notifier_register_done();
>> +       return 0;
>> +
>> +cleanup:
>> +       for_each_online_cpu(cpu)
>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
>> +       cpu_notifier_register_done();
>> +       return -ENOMEM;
>> +}
>> +
>> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
>> +{
>> +       unsigned long cpu;
>> +
>> +       cpu_notifier_register_begin();
>> +       for_each_online_cpu(cpu)
>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
>> +       __unregister_cpu_notifier(&pool->notifier);
>> +       cpu_notifier_register_done();
>> +}
>> +
>>  /*********************************
>> -* helpers
>> +* pool functions
>>  **********************************/
>> -static bool zswap_is_full(void)
>> +
>> +static struct zswap_pool *__zswap_pool_current(void)
>>  {
>> -       return totalram_pages * zswap_max_pool_percent / 100 <
>> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
>> +       struct zswap_pool *pool;
>> +
>> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
>> +       WARN_ON(!pool);
>> +
>> +       return pool;
>> +}
>> +
>> +static struct zswap_pool *zswap_pool_current(void)
>> +{
>> +       assert_spin_locked(&zswap_pools_lock);
>> +
>> +       return __zswap_pool_current();
>> +}
>> +
>> +static struct zswap_pool *zswap_pool_current_get(void)
>> +{
>> +       struct zswap_pool *pool;
>> +
>> +       rcu_read_lock();
>> +
>> +       pool = __zswap_pool_current();
>> +       if (!pool || !zswap_pool_get(pool))
>> +               pool = NULL;
>> +
>> +       rcu_read_unlock();
>> +
>> +       return pool;
>> +}
>> +
>> +static struct zswap_pool *zswap_pool_last_get(void)
>> +{
>> +       struct zswap_pool *pool, *last = NULL;
>> +
>> +       rcu_read_lock();
>> +
>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
>> +               last = pool;
>> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
>> +               last = NULL;
>> +
>> +       rcu_read_unlock();
>> +
>> +       return last;
>> +}
>> +
>> +static struct zpool_ops zswap_zpool_ops = {
>> +       .evict = zswap_writeback_entry
>> +};
>> +
>> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
>> +{
>> +       struct zswap_pool *pool;
>> +
>> +       assert_spin_locked(&zswap_pools_lock);
>> +
>> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
>> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
>> +                       continue;
>> +               if (strncmp(zpool_get_type(pool->zpool), type,
>> +                           sizeof(zswap_zpool_type)))
>> +                       continue;
>> +               /* if we can't get it, it's about to be destroyed */
>> +               if (!zswap_pool_get(pool))
>> +                       continue;
>> +               return pool;
>> +       }
>> +
>> +       return NULL;
>> +}
>> +
>> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>> +{
>> +       struct zswap_pool *pool;
>> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
>> +
>> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
>> +       if (!pool) {
>> +               pr_err("pool alloc failed\n");
>> +               return NULL;
>> +       }
>> +
>> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
>> +       if (!pool->zpool) {
>> +               pr_err("%s zpool not available\n", type);
>> +               goto error;
>> +       }
>> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
>> +
>> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
>> +       pool->tfm = alloc_percpu(struct crypto_comp *);
>> +       if (!pool->tfm) {
>> +               pr_err("percpu alloc failed\n");
>> +               goto error;
>> +       }
>> +
>> +       if (zswap_cpu_comp_init(pool))
>> +               goto error;
>> +       pr_debug("using %s compressor\n", pool->tfm_name);
>> +
>> +       /* being the current pool takes 1 ref; this func expects the
>> +        * caller to always add the new pool as the current pool
>> +        */
>> +       kref_init(&pool->kref);
>> +       INIT_LIST_HEAD(&pool->list);
>> +
>> +       zswap_pool_debug("created", pool);
>> +
>> +       return pool;
>> +
>> +error:
>> +       free_percpu(pool->tfm);
>> +       if (pool->zpool)
>> +               zpool_destroy_pool(pool->zpool);
>> +       kfree(pool);
>> +       return NULL;
>> +}
>> +
>> +static struct zswap_pool *__zswap_pool_create_fallback(void)
>> +{
>> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
>> +               pr_err("compressor %s not available, using default %s\n",
>> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
>> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
>> +                       sizeof(zswap_compressor));
>> +       }
>> +       if (!zpool_has_pool(zswap_zpool_type)) {
>> +               pr_err("zpool %s not available, using default %s\n",
>> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
>> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
>> +                       sizeof(zswap_zpool_type));
>> +       }
>> +
>> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
>> +}
>> +
>> +static void zswap_pool_destroy(struct zswap_pool *pool)
>> +{
>> +       zswap_pool_debug("destroying", pool);
>> +
>> +       zswap_cpu_comp_destroy(pool);
>> +       free_percpu(pool->tfm);
>> +       zpool_destroy_pool(pool->zpool);
>> +       kfree(pool);
>> +}
>> +
>> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
>> +{
>> +       return kref_get_unless_zero(&pool->kref);
>> +}
>> +
>> +static void __zswap_pool_release(struct rcu_head *head)
>> +{
>> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
>> +
>> +       /* nobody should have been able to get a kref... */
>> +       WARN_ON(kref_get_unless_zero(&pool->kref));
>> +
>> +       /* pool is now off zswap_pools list and has no references. */
>> +       zswap_pool_destroy(pool);
>> +}
>> +
>> +static void __zswap_pool_empty(struct kref *kref)
>> +{
>> +       struct zswap_pool *pool;
>> +
>> +       pool = container_of(kref, typeof(*pool), kref);
>> +
>> +       spin_lock(&zswap_pools_lock);
>> +
>> +       WARN_ON(pool == zswap_pool_current());
>> +
>> +       list_del_rcu(&pool->list);
>> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
>> +
>> +       spin_unlock(&zswap_pools_lock);
>> +}
>> +
>> +static void zswap_pool_put(struct zswap_pool *pool)
>> +{
>> +       kref_put(&pool->kref, __zswap_pool_empty);
>> +}
>> +
>>  }
>>
>>  /*********************************
>> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>>         pgoff_t offset;
>>         struct zswap_entry *entry;
>>         struct page *page;
>> +       struct crypto_comp *tfm;
>>         u8 *src, *dst;
>>         unsigned int dlen;
>>         int ret;
>> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
>>                 /* decompress */
>>                 dlen = PAGE_SIZE;
>> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
>> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
>>                 dst = kmap_atomic(page);
>> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
>> -                               entry->length, dst, &dlen);
>> +               tfm = *get_cpu_ptr(entry->pool->tfm);
>> +               ret = crypto_comp_decompress(tfm, src, entry->length,
>> +                                            dst, &dlen);
>> +               put_cpu_ptr(entry->pool->tfm);
>>                 kunmap_atomic(dst);
>> -               zpool_unmap_handle(zswap_pool, entry->handle);
>> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
>>                 BUG_ON(ret);
>>                 BUG_ON(dlen != PAGE_SIZE);
>>
>> @@ -633,6 +882,22 @@ end:
>>         return ret;
>>  }
>>
>> +static int zswap_shrink(void)
>> +{
>> +       struct zswap_pool *pool;
>> +       int ret;
>> +
>> +       pool = zswap_pool_last_get();
>> +       if (!pool)
>> +               return -ENOENT;
>> +
>> +       ret = zpool_shrink(pool->zpool, 1, NULL);
>> +
>> +       zswap_pool_put(pool);
>> +
>> +       return ret;
>> +}
>> +
>>  /*********************************
>>  * frontswap hooks
>>  **********************************/
>> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>  {
>>         struct zswap_tree *tree = zswap_trees[type];
>>         struct zswap_entry *entry, *dupentry;
>> +       struct crypto_comp *tfm;
>>         int ret;
>>         unsigned int dlen = PAGE_SIZE, len;
>>         unsigned long handle;
>> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>         /* reclaim space if needed */
>>         if (zswap_is_full()) {
>>                 zswap_pool_limit_hit++;
>> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
>> +               if (zswap_shrink()) {
>>                         zswap_reject_reclaim_fail++;
>>                         ret = -ENOMEM;
>>                         goto reject;
>> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>                 goto reject;
>>         }
>>
>> +       /* if entry is successfully added, it keeps the reference */
>> +       entry->pool = zswap_pool_current_get();
>> +       if (!entry->pool) {
>> +               ret = -EINVAL;
>> +               goto freepage;
>> +       }
>> +
>>         /* compress */
>>         dst = get_cpu_var(zswap_dstmem);
>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>>         src = kmap_atomic(page);
>> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
>> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
>>         kunmap_atomic(src);
>> +       put_cpu_ptr(entry->pool->tfm);
>>         if (ret) {
>>                 ret = -EINVAL;
>> -               goto freepage;
>> +               goto put_dstmem;
>>         }
>>
>>         /* store */
>>         len = dlen + sizeof(struct zswap_header);
>> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
>> -               &handle);
>> +       ret = zpool_malloc(entry->pool->zpool, len,
>> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
>>         if (ret == -ENOSPC) {
>>                 zswap_reject_compress_poor++;
>> -               goto freepage;
>> +               goto put_dstmem;
>>         }
>>         if (ret) {
>>                 zswap_reject_alloc_fail++;
>> -               goto freepage;
>> +               goto put_dstmem;
>>         }
>> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
>> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
>>         zhdr->swpentry = swp_entry(type, offset);
>>         buf = (u8 *)(zhdr + 1);
>>         memcpy(buf, dst, dlen);
>> -       zpool_unmap_handle(zswap_pool, handle);
>> +       zpool_unmap_handle(entry->pool->zpool, handle);
>>         put_cpu_var(zswap_dstmem);
>>
>>         /* populate entry */
>> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>
>>         /* update stats */
>>         atomic_inc(&zswap_stored_pages);
>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
>> +       zswap_update_total_size();
>>
>>         return 0;
>>
>> -freepage:
>> +put_dstmem:
>>         put_cpu_var(zswap_dstmem);
>> +       zswap_pool_put(entry->pool);
>> +freepage:
>>         zswap_entry_cache_free(entry);
>>  reject:
>>         return ret;
>> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>>  {
>>         struct zswap_tree *tree = zswap_trees[type];
>>         struct zswap_entry *entry;
>> +       struct crypto_comp *tfm;
>>         u8 *src, *dst;
>>         unsigned int dlen;
>>         int ret;
>> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>>
>>         /* decompress */
>>         dlen = PAGE_SIZE;
>> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
>> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
>>         dst = kmap_atomic(page);
>> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
>> -               dst, &dlen);
>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
>> +       put_cpu_ptr(entry->pool->tfm);
>>         kunmap_atomic(dst);
>> -       zpool_unmap_handle(zswap_pool, entry->handle);
>> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
>>         BUG_ON(ret);
>>
>>         spin_lock(&tree->lock);
>> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
>>         zswap_trees[type] = NULL;
>>  }
>>
>> -static struct zpool_ops zswap_zpool_ops = {
>> -       .evict = zswap_writeback_entry
>> -};
>> -
>>  static void zswap_frontswap_init(unsigned type)
>>  {
>>         struct zswap_tree *tree;
>> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
>>  **********************************/
>>  static int __init init_zswap(void)
>>  {
>> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
>> +       struct zswap_pool *pool;
>>
>> -       pr_info("loading zswap\n");
>> -
>> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
>> -                                       &zswap_zpool_ops);
>> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
>> -               pr_info("%s zpool not available\n", zswap_zpool_type);
>> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
>> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
>> -                                       &zswap_zpool_ops);
>> -       }
>> -       if (!zswap_pool) {
>> -               pr_err("%s zpool not available\n", zswap_zpool_type);
>> -               pr_err("zpool creation failed\n");
>> -               goto error;
>> -       }
>> -       pr_info("using %s pool\n", zswap_zpool_type);
>> +       pr_info("loading\n");
>>
>>         if (zswap_entry_cache_create()) {
>>                 pr_err("entry cache creation failed\n");
>> -               goto cachefail;
>> +               goto cache_fail;
>>         }
>> -       if (zswap_comp_init()) {
>> -               pr_err("compressor initialization failed\n");
>> -               goto compfail;
>> +
>> +       if (zswap_cpu_dstmem_init()) {
>> +               pr_err("dstmem alloc failed\n");
>> +               goto dstmem_fail;
>>         }
>> -       if (zswap_cpu_init()) {
>> -               pr_err("per-cpu initialization failed\n");
>> -               goto pcpufail;
>> +
>> +       pool = __zswap_pool_create_fallback();
>> +       if (!pool) {
>> +               pr_err("pool creation failed\n");
>> +               goto pool_fail;
>>         }
>> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
>> +               zpool_get_type(pool->zpool));
>> +
>> +       list_add(&pool->list, &zswap_pools);
>>
>>         frontswap_register_ops(&zswap_frontswap_ops);
>>         if (zswap_debugfs_init())
>>                 pr_warn("debugfs initialization failed\n");
>>         return 0;
>> -pcpufail:
>> -       zswap_comp_exit();
>> -compfail:
>> +
>> +pool_fail:
>> +       zswap_cpu_dstmem_destroy();
>> +dstmem_fail:
>>         zswap_entry_cache_destroy();
>> -cachefail:
>> -       zpool_destroy_pool(zswap_pool);
>> -error:
>> +cache_fail:
>>         return -ENOMEM;
>>  }
>>  /* must be late so crypto has time to come up */
>> --
>> 2.1.0
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Seth Jennings June 11, 2015, 6:51 p.m. UTC | #3
On Wed, Jun 10, 2015 at 04:54:24PM -0400, Dan Streetman wrote:
> On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> > On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> >> Add dynamic creation of pools.  Move the static crypto compression
> >> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
> >> the pool it's in.
> >
> > Seth, as far as the design, from your previous comments I assume you
> > were thinking of maintaining separate lists of zpools and compressors?
> >  I do see how that will reduce duplication of zpools and compressors,
> > but it also requires adding a new pointer to each zswap_entry, and
> > increasing the amount of code to manage each list separately.  And the
> > most common case in zswap will be just a single zpool and compressor,
> > not repeatedly changing params.  What do you think?
> 
> Any opinion on this patch?  If you want, I can break it up so there's
> a list of zpools and a list of compressors.  Either the combined way
> (this patch) or separate lists works for me, as long as the params are
> changeable at runtime :-)

I'm still reviewing the code.  I was going to test it too but it doesn't
compile for me:

  CC      mm/zswap.o
mm/zswap.c: In function ‘__zswap_pool_create_fallback’:
mm/zswap.c:605:10: warning: argument to ‘sizeof’ in ‘strncpy’ call is the same expression as the destination; did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess]
    sizeof(zswap_compressor));
          ^
mm/zswap.c:607:7: error: implicit declaration of function ‘zpool_has_pool’ [-Werror=implicit-function-declaration]
  if (!zpool_has_pool(zswap_zpool_type)) {
       ^
mm/zswap.c:611:10: warning: argument to ‘sizeof’ in ‘strncpy’ call is the same expression as the destination; did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess]
    sizeof(zswap_zpool_type));
          ^
mm/zswap.c: At top level:
mm/zswap.c:664:1: error: expected identifier or ‘(’ before ‘}’ token
 }
 ^
mm/zswap.c:99:22: warning: ‘zswap_pool’ defined but not used [-Wunused-variable]
 static struct zpool *zswap_pool;
                      ^
mm/zswap.c:531:27: warning: ‘zswap_pool_find_get’ defined but not used [-Wunused-function]
 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
                           ^

Seth

> 
> 
> >
> >>
> >> This is required by a separate patch which enables changing the
> >> zswap zpool and compressor params at runtime.
> >>
> >> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
> >> ---
> >>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
> >>  1 file changed, 408 insertions(+), 142 deletions(-)
> >>
> >> diff --git a/mm/zswap.c b/mm/zswap.c
> >> index 2d5727b..fc93770 100644
> >> --- a/mm/zswap.c
> >> +++ b/mm/zswap.c
> >> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
> >>  static struct zpool *zswap_pool;
> >>
> >>  /*********************************
> >> -* compression functions
> >> +* data structures
> >>  **********************************/
> >> -/* per-cpu compression transforms */
> >> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
> >>
> >> -enum comp_op {
> >> -       ZSWAP_COMPOP_COMPRESS,
> >> -       ZSWAP_COMPOP_DECOMPRESS
> >> +struct zswap_pool {
> >> +       struct zpool *zpool;
> >> +       struct kref kref;
> >> +       struct list_head list;
> >> +       struct rcu_head rcu_head;
> >> +       struct notifier_block notifier;
> >> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
> >> +       struct crypto_comp * __percpu *tfm;
> >>  };
> >>
> >> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
> >> -                               u8 *dst, unsigned int *dlen)
> >> -{
> >> -       struct crypto_comp *tfm;
> >> -       int ret;
> >> -
> >> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
> >> -       switch (op) {
> >> -       case ZSWAP_COMPOP_COMPRESS:
> >> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
> >> -               break;
> >> -       case ZSWAP_COMPOP_DECOMPRESS:
> >> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
> >> -               break;
> >> -       default:
> >> -               ret = -EINVAL;
> >> -       }
> >> -
> >> -       put_cpu();
> >> -       return ret;
> >> -}
> >> -
> >> -static int __init zswap_comp_init(void)
> >> -{
> >> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> >> -               pr_info("%s compressor not available\n", zswap_compressor);
> >> -               /* fall back to default compressor */
> >> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
> >> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
> >> -                       /* can't even load the default compressor */
> >> -                       return -ENODEV;
> >> -       }
> >> -       pr_info("using %s compressor\n", zswap_compressor);
> >> -
> >> -       /* alloc percpu transforms */
> >> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
> >> -       if (!zswap_comp_pcpu_tfms)
> >> -               return -ENOMEM;
> >> -       return 0;
> >> -}
> >> -
> >> -static void __init zswap_comp_exit(void)
> >> -{
> >> -       /* free percpu transforms */
> >> -       free_percpu(zswap_comp_pcpu_tfms);
> >> -}
> >> -
> >> -/*********************************
> >> -* data structures
> >> -**********************************/
> >>  /*
> >>   * struct zswap_entry
> >>   *
> >> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
> >>   * page within zswap.
> >>   *
> >>   * rbnode - links the entry into red-black tree for the appropriate swap type
> >> + * offset - the swap offset for the entry.  Index into the red-black tree.
> >>   * refcount - the number of outstanding reference to the entry. This is needed
> >>   *            to protect against premature freeing of the entry by code
> >>   *            concurrent calls to load, invalidate, and writeback.  The lock
> >>   *            for the zswap_tree structure that contains the entry must
> >>   *            be held while changing the refcount.  Since the lock must
> >>   *            be held, there is no reason to also make refcount atomic.
> >> - * offset - the swap offset for the entry.  Index into the red-black tree.
> >> - * handle - zpool allocation handle that stores the compressed page data
> >>   * length - the length in bytes of the compressed page data.  Needed during
> >>   *          decompression
> >> + * pool - the zswap_pool the entry's data is in
> >> + * handle - zpool allocation handle that stores the compressed page data
> >>   */
> >>  struct zswap_entry {
> >>         struct rb_node rbnode;
> >>         pgoff_t offset;
> >>         int refcount;
> >>         unsigned int length;
> >> +       struct zswap_pool *pool;
> >>         unsigned long handle;
> >>  };
> >>
> >> @@ -201,6 +156,44 @@ struct zswap_tree {
> >>
> >>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
> >>
> >> +/* RCU-protected iteration */
> >> +static LIST_HEAD(zswap_pools);
> >> +/* protects zswap_pools list modification */
> >> +static DEFINE_SPINLOCK(zswap_pools_lock);
> >> +
> >> +/*********************************
> >> +* helpers and fwd declarations
> >> +**********************************/
> >> +
> >> +#define zswap_pool_debug(msg, p)                               \
> >> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> >> +                zpool_get_type((p)->zpool))
> >> +
> >> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
> >> +static int zswap_pool_get(struct zswap_pool *pool);
> >> +static void zswap_pool_put(struct zswap_pool *pool);
> >> +
> >> +static bool zswap_is_full(void)
> >> +{
> >> +       return totalram_pages * zswap_max_pool_percent / 100 <
> >> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> >> +}
> >> +
> >> +static void zswap_update_total_size(void)
> >> +{
> >> +       struct zswap_pool *pool;
> >> +       u64 total = 0;
> >> +
> >> +       rcu_read_lock();
> >> +
> >> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> >> +               total += zpool_get_total_size(pool->zpool);
> >> +
> >> +       rcu_read_unlock();
> >> +
> >> +       zswap_pool_total_size = total;
> >> +}
> >> +
> >>  /*********************************
> >>  * zswap entry functions
> >>  **********************************/
> >> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
> >>   */
> >>  static void zswap_free_entry(struct zswap_entry *entry)
> >>  {
> >> -       zpool_free(zswap_pool, entry->handle);
> >> +       zpool_free(entry->pool->zpool, entry->handle);
> >> +       zswap_pool_put(entry->pool);
> >>         zswap_entry_cache_free(entry);
> >>         atomic_dec(&zswap_stored_pages);
> >> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> >> +       zswap_update_total_size();
> >>  }
> >>
> >>  /* caller must hold the tree lock */
> >> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
> >>  **********************************/
> >>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
> >>
> >> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> >> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
> >>  {
> >> -       struct crypto_comp *tfm;
> >>         u8 *dst;
> >>
> >>         switch (action) {
> >>         case CPU_UP_PREPARE:
> >> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
> >> -               if (IS_ERR(tfm)) {
> >> -                       pr_err("can't allocate compressor transform\n");
> >> -                       return NOTIFY_BAD;
> >> -               }
> >> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
> >>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
> >>                 if (!dst) {
> >>                         pr_err("can't allocate compressor buffer\n");
> >> -                       crypto_free_comp(tfm);
> >> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> >>                         return NOTIFY_BAD;
> >>                 }
> >>                 per_cpu(zswap_dstmem, cpu) = dst;
> >>                 break;
> >>         case CPU_DEAD:
> >>         case CPU_UP_CANCELED:
> >> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
> >> -               if (tfm) {
> >> -                       crypto_free_comp(tfm);
> >> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> >> -               }
> >>                 dst = per_cpu(zswap_dstmem, cpu);
> >>                 kfree(dst);
> >>                 per_cpu(zswap_dstmem, cpu) = NULL;
> >> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> >>         return NOTIFY_OK;
> >>  }
> >>
> >> -static int zswap_cpu_notifier(struct notifier_block *nb,
> >> -                               unsigned long action, void *pcpu)
> >> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
> >> +                                    unsigned long action, void *pcpu)
> >>  {
> >> -       unsigned long cpu = (unsigned long)pcpu;
> >> -       return __zswap_cpu_notifier(action, cpu);
> >> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
> >>  }
> >>
> >> -static struct notifier_block zswap_cpu_notifier_block = {
> >> -       .notifier_call = zswap_cpu_notifier
> >> +static struct notifier_block zswap_dstmem_notifier = {
> >> +       .notifier_call =        zswap_cpu_dstmem_notifier,
> >>  };
> >>
> >> -static int __init zswap_cpu_init(void)
> >> +static int __init zswap_cpu_dstmem_init(void)
> >>  {
> >>         unsigned long cpu;
> >>
> >>         cpu_notifier_register_begin();
> >>         for_each_online_cpu(cpu)
> >> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
> >> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
> >> +                   NOTIFY_BAD)
> >>                         goto cleanup;
> >> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
> >> +       __register_cpu_notifier(&zswap_dstmem_notifier);
> >>         cpu_notifier_register_done();
> >>         return 0;
> >>
> >>  cleanup:
> >>         for_each_online_cpu(cpu)
> >> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
> >> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> >>         cpu_notifier_register_done();
> >>         return -ENOMEM;
> >>  }
> >>
> >> +static void zswap_cpu_dstmem_destroy(void)
> >> +{
> >> +       unsigned long cpu;
> >> +
> >> +       cpu_notifier_register_begin();
> >> +       for_each_online_cpu(cpu)
> >> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> >> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
> >> +       cpu_notifier_register_done();
> >> +}
> >> +
> >> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
> >> +                                    unsigned long action, unsigned long cpu)
> >> +{
> >> +       struct crypto_comp *tfm;
> >> +
> >> +       switch (action) {
> >> +       case CPU_UP_PREPARE:
> >> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
> >> +                       break;
> >> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
> >> +               if (IS_ERR_OR_NULL(tfm)) {
> >> +                       pr_err("could not alloc crypto comp %s : %ld\n",
> >> +                              pool->tfm_name, PTR_ERR(tfm));
> >> +                       return NOTIFY_BAD;
> >> +               }
> >> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
> >> +               break;
> >> +       case CPU_DEAD:
> >> +       case CPU_UP_CANCELED:
> >> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
> >> +               if (!IS_ERR_OR_NULL(tfm))
> >> +                       crypto_free_comp(tfm);
> >> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
> >> +               break;
> >> +       default:
> >> +               break;
> >> +       }
> >> +       return NOTIFY_OK;
> >> +}
> >> +
> >> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
> >> +                                  unsigned long action, void *pcpu)
> >> +{
> >> +       unsigned long cpu = (unsigned long)pcpu;
> >> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
> >> +
> >> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
> >> +}
> >> +
> >> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
> >> +{
> >> +       unsigned long cpu;
> >> +
> >> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
> >> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
> >> +
> >> +       cpu_notifier_register_begin();
> >> +       for_each_online_cpu(cpu)
> >> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
> >> +                   NOTIFY_BAD)
> >> +                       goto cleanup;
> >> +       __register_cpu_notifier(&pool->notifier);
> >> +       cpu_notifier_register_done();
> >> +       return 0;
> >> +
> >> +cleanup:
> >> +       for_each_online_cpu(cpu)
> >> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> >> +       cpu_notifier_register_done();
> >> +       return -ENOMEM;
> >> +}
> >> +
> >> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
> >> +{
> >> +       unsigned long cpu;
> >> +
> >> +       cpu_notifier_register_begin();
> >> +       for_each_online_cpu(cpu)
> >> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> >> +       __unregister_cpu_notifier(&pool->notifier);
> >> +       cpu_notifier_register_done();
> >> +}
> >> +
> >>  /*********************************
> >> -* helpers
> >> +* pool functions
> >>  **********************************/
> >> -static bool zswap_is_full(void)
> >> +
> >> +static struct zswap_pool *__zswap_pool_current(void)
> >>  {
> >> -       return totalram_pages * zswap_max_pool_percent / 100 <
> >> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> >> +       struct zswap_pool *pool;
> >> +
> >> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
> >> +       WARN_ON(!pool);
> >> +
> >> +       return pool;
> >> +}
> >> +
> >> +static struct zswap_pool *zswap_pool_current(void)
> >> +{
> >> +       assert_spin_locked(&zswap_pools_lock);
> >> +
> >> +       return __zswap_pool_current();
> >> +}
> >> +
> >> +static struct zswap_pool *zswap_pool_current_get(void)
> >> +{
> >> +       struct zswap_pool *pool;
> >> +
> >> +       rcu_read_lock();
> >> +
> >> +       pool = __zswap_pool_current();
> >> +       if (!pool || !zswap_pool_get(pool))
> >> +               pool = NULL;
> >> +
> >> +       rcu_read_unlock();
> >> +
> >> +       return pool;
> >> +}
> >> +
> >> +static struct zswap_pool *zswap_pool_last_get(void)
> >> +{
> >> +       struct zswap_pool *pool, *last = NULL;
> >> +
> >> +       rcu_read_lock();
> >> +
> >> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> >> +               last = pool;
> >> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
> >> +               last = NULL;
> >> +
> >> +       rcu_read_unlock();
> >> +
> >> +       return last;
> >> +}
> >> +
> >> +static struct zpool_ops zswap_zpool_ops = {
> >> +       .evict = zswap_writeback_entry
> >> +};
> >> +
> >> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> >> +{
> >> +       struct zswap_pool *pool;
> >> +
> >> +       assert_spin_locked(&zswap_pools_lock);
> >> +
> >> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> >> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
> >> +                       continue;
> >> +               if (strncmp(zpool_get_type(pool->zpool), type,
> >> +                           sizeof(zswap_zpool_type)))
> >> +                       continue;
> >> +               /* if we can't get it, it's about to be destroyed */
> >> +               if (!zswap_pool_get(pool))
> >> +                       continue;
> >> +               return pool;
> >> +       }
> >> +
> >> +       return NULL;
> >> +}
> >> +
> >> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >> +{
> >> +       struct zswap_pool *pool;
> >> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> >> +
> >> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> >> +       if (!pool) {
> >> +               pr_err("pool alloc failed\n");
> >> +               return NULL;
> >> +       }
> >> +
> >> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
> >> +       if (!pool->zpool) {
> >> +               pr_err("%s zpool not available\n", type);
> >> +               goto error;
> >> +       }
> >> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> >> +
> >> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> >> +       pool->tfm = alloc_percpu(struct crypto_comp *);
> >> +       if (!pool->tfm) {
> >> +               pr_err("percpu alloc failed\n");
> >> +               goto error;
> >> +       }
> >> +
> >> +       if (zswap_cpu_comp_init(pool))
> >> +               goto error;
> >> +       pr_debug("using %s compressor\n", pool->tfm_name);
> >> +
> >> +       /* being the current pool takes 1 ref; this func expects the
> >> +        * caller to always add the new pool as the current pool
> >> +        */
> >> +       kref_init(&pool->kref);
> >> +       INIT_LIST_HEAD(&pool->list);
> >> +
> >> +       zswap_pool_debug("created", pool);
> >> +
> >> +       return pool;
> >> +
> >> +error:
> >> +       free_percpu(pool->tfm);
> >> +       if (pool->zpool)
> >> +               zpool_destroy_pool(pool->zpool);
> >> +       kfree(pool);
> >> +       return NULL;
> >> +}
> >> +
> >> +static struct zswap_pool *__zswap_pool_create_fallback(void)
> >> +{
> >> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> >> +               pr_err("compressor %s not available, using default %s\n",
> >> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
> >> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
> >> +                       sizeof(zswap_compressor));
> >> +       }
> >> +       if (!zpool_has_pool(zswap_zpool_type)) {
> >> +               pr_err("zpool %s not available, using default %s\n",
> >> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
> >> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
> >> +                       sizeof(zswap_zpool_type));
> >> +       }
> >> +
> >> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
> >> +}
> >> +
> >> +static void zswap_pool_destroy(struct zswap_pool *pool)
> >> +{
> >> +       zswap_pool_debug("destroying", pool);
> >> +
> >> +       zswap_cpu_comp_destroy(pool);
> >> +       free_percpu(pool->tfm);
> >> +       zpool_destroy_pool(pool->zpool);
> >> +       kfree(pool);
> >> +}
> >> +
> >> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
> >> +{
> >> +       return kref_get_unless_zero(&pool->kref);
> >> +}
> >> +
> >> +static void __zswap_pool_release(struct rcu_head *head)
> >> +{
> >> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
> >> +
> >> +       /* nobody should have been able to get a kref... */
> >> +       WARN_ON(kref_get_unless_zero(&pool->kref));
> >> +
> >> +       /* pool is now off zswap_pools list and has no references. */
> >> +       zswap_pool_destroy(pool);
> >> +}
> >> +
> >> +static void __zswap_pool_empty(struct kref *kref)
> >> +{
> >> +       struct zswap_pool *pool;
> >> +
> >> +       pool = container_of(kref, typeof(*pool), kref);
> >> +
> >> +       spin_lock(&zswap_pools_lock);
> >> +
> >> +       WARN_ON(pool == zswap_pool_current());
> >> +
> >> +       list_del_rcu(&pool->list);
> >> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
> >> +
> >> +       spin_unlock(&zswap_pools_lock);
> >> +}
> >> +
> >> +static void zswap_pool_put(struct zswap_pool *pool)
> >> +{
> >> +       kref_put(&pool->kref, __zswap_pool_empty);
> >> +}
> >> +
> >>  }
> >>
> >>  /*********************************
> >> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> >>         pgoff_t offset;
> >>         struct zswap_entry *entry;
> >>         struct page *page;
> >> +       struct crypto_comp *tfm;
> >>         u8 *src, *dst;
> >>         unsigned int dlen;
> >>         int ret;
> >> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> >>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
> >>                 /* decompress */
> >>                 dlen = PAGE_SIZE;
> >> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> >> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
> >>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
> >>                 dst = kmap_atomic(page);
> >> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
> >> -                               entry->length, dst, &dlen);
> >> +               tfm = *get_cpu_ptr(entry->pool->tfm);
> >> +               ret = crypto_comp_decompress(tfm, src, entry->length,
> >> +                                            dst, &dlen);
> >> +               put_cpu_ptr(entry->pool->tfm);
> >>                 kunmap_atomic(dst);
> >> -               zpool_unmap_handle(zswap_pool, entry->handle);
> >> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
> >>                 BUG_ON(ret);
> >>                 BUG_ON(dlen != PAGE_SIZE);
> >>
> >> @@ -633,6 +882,22 @@ end:
> >>         return ret;
> >>  }
> >>
> >> +static int zswap_shrink(void)
> >> +{
> >> +       struct zswap_pool *pool;
> >> +       int ret;
> >> +
> >> +       pool = zswap_pool_last_get();
> >> +       if (!pool)
> >> +               return -ENOENT;
> >> +
> >> +       ret = zpool_shrink(pool->zpool, 1, NULL);
> >> +
> >> +       zswap_pool_put(pool);
> >> +
> >> +       return ret;
> >> +}
> >> +
> >>  /*********************************
> >>  * frontswap hooks
> >>  **********************************/
> >> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>  {
> >>         struct zswap_tree *tree = zswap_trees[type];
> >>         struct zswap_entry *entry, *dupentry;
> >> +       struct crypto_comp *tfm;
> >>         int ret;
> >>         unsigned int dlen = PAGE_SIZE, len;
> >>         unsigned long handle;
> >> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>         /* reclaim space if needed */
> >>         if (zswap_is_full()) {
> >>                 zswap_pool_limit_hit++;
> >> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
> >> +               if (zswap_shrink()) {
> >>                         zswap_reject_reclaim_fail++;
> >>                         ret = -ENOMEM;
> >>                         goto reject;
> >> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>                 goto reject;
> >>         }
> >>
> >> +       /* if entry is successfully added, it keeps the reference */
> >> +       entry->pool = zswap_pool_current_get();
> >> +       if (!entry->pool) {
> >> +               ret = -EINVAL;
> >> +               goto freepage;
> >> +       }
> >> +
> >>         /* compress */
> >>         dst = get_cpu_var(zswap_dstmem);
> >> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> >>         src = kmap_atomic(page);
> >> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
> >> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
> >>         kunmap_atomic(src);
> >> +       put_cpu_ptr(entry->pool->tfm);
> >>         if (ret) {
> >>                 ret = -EINVAL;
> >> -               goto freepage;
> >> +               goto put_dstmem;
> >>         }
> >>
> >>         /* store */
> >>         len = dlen + sizeof(struct zswap_header);
> >> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
> >> -               &handle);
> >> +       ret = zpool_malloc(entry->pool->zpool, len,
> >> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
> >>         if (ret == -ENOSPC) {
> >>                 zswap_reject_compress_poor++;
> >> -               goto freepage;
> >> +               goto put_dstmem;
> >>         }
> >>         if (ret) {
> >>                 zswap_reject_alloc_fail++;
> >> -               goto freepage;
> >> +               goto put_dstmem;
> >>         }
> >> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
> >> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
> >>         zhdr->swpentry = swp_entry(type, offset);
> >>         buf = (u8 *)(zhdr + 1);
> >>         memcpy(buf, dst, dlen);
> >> -       zpool_unmap_handle(zswap_pool, handle);
> >> +       zpool_unmap_handle(entry->pool->zpool, handle);
> >>         put_cpu_var(zswap_dstmem);
> >>
> >>         /* populate entry */
> >> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>
> >>         /* update stats */
> >>         atomic_inc(&zswap_stored_pages);
> >> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> >> +       zswap_update_total_size();
> >>
> >>         return 0;
> >>
> >> -freepage:
> >> +put_dstmem:
> >>         put_cpu_var(zswap_dstmem);
> >> +       zswap_pool_put(entry->pool);
> >> +freepage:
> >>         zswap_entry_cache_free(entry);
> >>  reject:
> >>         return ret;
> >> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> >>  {
> >>         struct zswap_tree *tree = zswap_trees[type];
> >>         struct zswap_entry *entry;
> >> +       struct crypto_comp *tfm;
> >>         u8 *src, *dst;
> >>         unsigned int dlen;
> >>         int ret;
> >> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> >>
> >>         /* decompress */
> >>         dlen = PAGE_SIZE;
> >> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> >> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
> >>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
> >>         dst = kmap_atomic(page);
> >> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
> >> -               dst, &dlen);
> >> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> >> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
> >> +       put_cpu_ptr(entry->pool->tfm);
> >>         kunmap_atomic(dst);
> >> -       zpool_unmap_handle(zswap_pool, entry->handle);
> >> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
> >>         BUG_ON(ret);
> >>
> >>         spin_lock(&tree->lock);
> >> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
> >>         zswap_trees[type] = NULL;
> >>  }
> >>
> >> -static struct zpool_ops zswap_zpool_ops = {
> >> -       .evict = zswap_writeback_entry
> >> -};
> >> -
> >>  static void zswap_frontswap_init(unsigned type)
> >>  {
> >>         struct zswap_tree *tree;
> >> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
> >>  **********************************/
> >>  static int __init init_zswap(void)
> >>  {
> >> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> >> +       struct zswap_pool *pool;
> >>
> >> -       pr_info("loading zswap\n");
> >> -
> >> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> >> -                                       &zswap_zpool_ops);
> >> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
> >> -               pr_info("%s zpool not available\n", zswap_zpool_type);
> >> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
> >> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> >> -                                       &zswap_zpool_ops);
> >> -       }
> >> -       if (!zswap_pool) {
> >> -               pr_err("%s zpool not available\n", zswap_zpool_type);
> >> -               pr_err("zpool creation failed\n");
> >> -               goto error;
> >> -       }
> >> -       pr_info("using %s pool\n", zswap_zpool_type);
> >> +       pr_info("loading\n");
> >>
> >>         if (zswap_entry_cache_create()) {
> >>                 pr_err("entry cache creation failed\n");
> >> -               goto cachefail;
> >> +               goto cache_fail;
> >>         }
> >> -       if (zswap_comp_init()) {
> >> -               pr_err("compressor initialization failed\n");
> >> -               goto compfail;
> >> +
> >> +       if (zswap_cpu_dstmem_init()) {
> >> +               pr_err("dstmem alloc failed\n");
> >> +               goto dstmem_fail;
> >>         }
> >> -       if (zswap_cpu_init()) {
> >> -               pr_err("per-cpu initialization failed\n");
> >> -               goto pcpufail;
> >> +
> >> +       pool = __zswap_pool_create_fallback();
> >> +       if (!pool) {
> >> +               pr_err("pool creation failed\n");
> >> +               goto pool_fail;
> >>         }
> >> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> >> +               zpool_get_type(pool->zpool));
> >> +
> >> +       list_add(&pool->list, &zswap_pools);
> >>
> >>         frontswap_register_ops(&zswap_frontswap_ops);
> >>         if (zswap_debugfs_init())
> >>                 pr_warn("debugfs initialization failed\n");
> >>         return 0;
> >> -pcpufail:
> >> -       zswap_comp_exit();
> >> -compfail:
> >> +
> >> +pool_fail:
> >> +       zswap_cpu_dstmem_destroy();
> >> +dstmem_fail:
> >>         zswap_entry_cache_destroy();
> >> -cachefail:
> >> -       zpool_destroy_pool(zswap_pool);
> >> -error:
> >> +cache_fail:
> >>         return -ENOMEM;
> >>  }
> >>  /* must be late so crypto has time to come up */
> >> --
> >> 2.1.0
> >>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Dan Streetman June 17, 2015, 11:13 p.m. UTC | #4
On Wed, Jun 10, 2015 at 4:54 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman <ddstreet@ieee.org> wrote:
>> On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
>>> Add dynamic creation of pools.  Move the static crypto compression
>>> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
>>> the pool it's in.
>>
>> Seth, as far as the design, from your previous comments I assume you
>> were thinking of maintaining separate lists of zpools and compressors?
>>  I do see how that will reduce duplication of zpools and compressors,
>> but it also requires adding a new pointer to each zswap_entry, and
>> increasing the amount of code to manage each list separately.  And the
>> most common case in zswap will be just a single zpool and compressor,
>> not repeatedly changing params.  What do you think?
>
> Any opinion on this patch?  If you want, I can break it up so there's
> a list of zpools and a list of compressors.  Either the combined way
> (this patch) or separate lists works for me, as long as the params are
> changeable at runtime :-)

You on vacation Seth?  Let me know what direction you prefer for this...

>
>
>>
>>>
>>> This is required by a separate patch which enables changing the
>>> zswap zpool and compressor params at runtime.
>>>
>>> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
>>> ---
>>>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
>>>  1 file changed, 408 insertions(+), 142 deletions(-)
>>>
>>> diff --git a/mm/zswap.c b/mm/zswap.c
>>> index 2d5727b..fc93770 100644
>>> --- a/mm/zswap.c
>>> +++ b/mm/zswap.c
>>> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
>>>  static struct zpool *zswap_pool;
>>>
>>>  /*********************************
>>> -* compression functions
>>> +* data structures
>>>  **********************************/
>>> -/* per-cpu compression transforms */
>>> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
>>>
>>> -enum comp_op {
>>> -       ZSWAP_COMPOP_COMPRESS,
>>> -       ZSWAP_COMPOP_DECOMPRESS
>>> +struct zswap_pool {
>>> +       struct zpool *zpool;
>>> +       struct kref kref;
>>> +       struct list_head list;
>>> +       struct rcu_head rcu_head;
>>> +       struct notifier_block notifier;
>>> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
>>> +       struct crypto_comp * __percpu *tfm;
>>>  };
>>>
>>> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
>>> -                               u8 *dst, unsigned int *dlen)
>>> -{
>>> -       struct crypto_comp *tfm;
>>> -       int ret;
>>> -
>>> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
>>> -       switch (op) {
>>> -       case ZSWAP_COMPOP_COMPRESS:
>>> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
>>> -               break;
>>> -       case ZSWAP_COMPOP_DECOMPRESS:
>>> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
>>> -               break;
>>> -       default:
>>> -               ret = -EINVAL;
>>> -       }
>>> -
>>> -       put_cpu();
>>> -       return ret;
>>> -}
>>> -
>>> -static int __init zswap_comp_init(void)
>>> -{
>>> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
>>> -               pr_info("%s compressor not available\n", zswap_compressor);
>>> -               /* fall back to default compressor */
>>> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
>>> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
>>> -                       /* can't even load the default compressor */
>>> -                       return -ENODEV;
>>> -       }
>>> -       pr_info("using %s compressor\n", zswap_compressor);
>>> -
>>> -       /* alloc percpu transforms */
>>> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
>>> -       if (!zswap_comp_pcpu_tfms)
>>> -               return -ENOMEM;
>>> -       return 0;
>>> -}
>>> -
>>> -static void __init zswap_comp_exit(void)
>>> -{
>>> -       /* free percpu transforms */
>>> -       free_percpu(zswap_comp_pcpu_tfms);
>>> -}
>>> -
>>> -/*********************************
>>> -* data structures
>>> -**********************************/
>>>  /*
>>>   * struct zswap_entry
>>>   *
>>> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
>>>   * page within zswap.
>>>   *
>>>   * rbnode - links the entry into red-black tree for the appropriate swap type
>>> + * offset - the swap offset for the entry.  Index into the red-black tree.
>>>   * refcount - the number of outstanding reference to the entry. This is needed
>>>   *            to protect against premature freeing of the entry by code
>>>   *            concurrent calls to load, invalidate, and writeback.  The lock
>>>   *            for the zswap_tree structure that contains the entry must
>>>   *            be held while changing the refcount.  Since the lock must
>>>   *            be held, there is no reason to also make refcount atomic.
>>> - * offset - the swap offset for the entry.  Index into the red-black tree.
>>> - * handle - zpool allocation handle that stores the compressed page data
>>>   * length - the length in bytes of the compressed page data.  Needed during
>>>   *          decompression
>>> + * pool - the zswap_pool the entry's data is in
>>> + * handle - zpool allocation handle that stores the compressed page data
>>>   */
>>>  struct zswap_entry {
>>>         struct rb_node rbnode;
>>>         pgoff_t offset;
>>>         int refcount;
>>>         unsigned int length;
>>> +       struct zswap_pool *pool;
>>>         unsigned long handle;
>>>  };
>>>
>>> @@ -201,6 +156,44 @@ struct zswap_tree {
>>>
>>>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
>>>
>>> +/* RCU-protected iteration */
>>> +static LIST_HEAD(zswap_pools);
>>> +/* protects zswap_pools list modification */
>>> +static DEFINE_SPINLOCK(zswap_pools_lock);
>>> +
>>> +/*********************************
>>> +* helpers and fwd declarations
>>> +**********************************/
>>> +
>>> +#define zswap_pool_debug(msg, p)                               \
>>> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
>>> +                zpool_get_type((p)->zpool))
>>> +
>>> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
>>> +static int zswap_pool_get(struct zswap_pool *pool);
>>> +static void zswap_pool_put(struct zswap_pool *pool);
>>> +
>>> +static bool zswap_is_full(void)
>>> +{
>>> +       return totalram_pages * zswap_max_pool_percent / 100 <
>>> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
>>> +}
>>> +
>>> +static void zswap_update_total_size(void)
>>> +{
>>> +       struct zswap_pool *pool;
>>> +       u64 total = 0;
>>> +
>>> +       rcu_read_lock();
>>> +
>>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
>>> +               total += zpool_get_total_size(pool->zpool);
>>> +
>>> +       rcu_read_unlock();
>>> +
>>> +       zswap_pool_total_size = total;
>>> +}
>>> +
>>>  /*********************************
>>>  * zswap entry functions
>>>  **********************************/
>>> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
>>>   */
>>>  static void zswap_free_entry(struct zswap_entry *entry)
>>>  {
>>> -       zpool_free(zswap_pool, entry->handle);
>>> +       zpool_free(entry->pool->zpool, entry->handle);
>>> +       zswap_pool_put(entry->pool);
>>>         zswap_entry_cache_free(entry);
>>>         atomic_dec(&zswap_stored_pages);
>>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
>>> +       zswap_update_total_size();
>>>  }
>>>
>>>  /* caller must hold the tree lock */
>>> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
>>>  **********************************/
>>>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
>>>
>>> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>>> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
>>>  {
>>> -       struct crypto_comp *tfm;
>>>         u8 *dst;
>>>
>>>         switch (action) {
>>>         case CPU_UP_PREPARE:
>>> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
>>> -               if (IS_ERR(tfm)) {
>>> -                       pr_err("can't allocate compressor transform\n");
>>> -                       return NOTIFY_BAD;
>>> -               }
>>> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
>>>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
>>>                 if (!dst) {
>>>                         pr_err("can't allocate compressor buffer\n");
>>> -                       crypto_free_comp(tfm);
>>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>>>                         return NOTIFY_BAD;
>>>                 }
>>>                 per_cpu(zswap_dstmem, cpu) = dst;
>>>                 break;
>>>         case CPU_DEAD:
>>>         case CPU_UP_CANCELED:
>>> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
>>> -               if (tfm) {
>>> -                       crypto_free_comp(tfm);
>>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>>> -               }
>>>                 dst = per_cpu(zswap_dstmem, cpu);
>>>                 kfree(dst);
>>>                 per_cpu(zswap_dstmem, cpu) = NULL;
>>> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>>>         return NOTIFY_OK;
>>>  }
>>>
>>> -static int zswap_cpu_notifier(struct notifier_block *nb,
>>> -                               unsigned long action, void *pcpu)
>>> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
>>> +                                    unsigned long action, void *pcpu)
>>>  {
>>> -       unsigned long cpu = (unsigned long)pcpu;
>>> -       return __zswap_cpu_notifier(action, cpu);
>>> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
>>>  }
>>>
>>> -static struct notifier_block zswap_cpu_notifier_block = {
>>> -       .notifier_call = zswap_cpu_notifier
>>> +static struct notifier_block zswap_dstmem_notifier = {
>>> +       .notifier_call =        zswap_cpu_dstmem_notifier,
>>>  };
>>>
>>> -static int __init zswap_cpu_init(void)
>>> +static int __init zswap_cpu_dstmem_init(void)
>>>  {
>>>         unsigned long cpu;
>>>
>>>         cpu_notifier_register_begin();
>>>         for_each_online_cpu(cpu)
>>> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
>>> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
>>> +                   NOTIFY_BAD)
>>>                         goto cleanup;
>>> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
>>> +       __register_cpu_notifier(&zswap_dstmem_notifier);
>>>         cpu_notifier_register_done();
>>>         return 0;
>>>
>>>  cleanup:
>>>         for_each_online_cpu(cpu)
>>> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
>>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>>>         cpu_notifier_register_done();
>>>         return -ENOMEM;
>>>  }
>>>
>>> +static void zswap_cpu_dstmem_destroy(void)
>>> +{
>>> +       unsigned long cpu;
>>> +
>>> +       cpu_notifier_register_begin();
>>> +       for_each_online_cpu(cpu)
>>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>>> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
>>> +       cpu_notifier_register_done();
>>> +}
>>> +
>>> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
>>> +                                    unsigned long action, unsigned long cpu)
>>> +{
>>> +       struct crypto_comp *tfm;
>>> +
>>> +       switch (action) {
>>> +       case CPU_UP_PREPARE:
>>> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
>>> +                       break;
>>> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
>>> +               if (IS_ERR_OR_NULL(tfm)) {
>>> +                       pr_err("could not alloc crypto comp %s : %ld\n",
>>> +                              pool->tfm_name, PTR_ERR(tfm));
>>> +                       return NOTIFY_BAD;
>>> +               }
>>> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
>>> +               break;
>>> +       case CPU_DEAD:
>>> +       case CPU_UP_CANCELED:
>>> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
>>> +               if (!IS_ERR_OR_NULL(tfm))
>>> +                       crypto_free_comp(tfm);
>>> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
>>> +               break;
>>> +       default:
>>> +               break;
>>> +       }
>>> +       return NOTIFY_OK;
>>> +}
>>> +
>>> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
>>> +                                  unsigned long action, void *pcpu)
>>> +{
>>> +       unsigned long cpu = (unsigned long)pcpu;
>>> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
>>> +
>>> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
>>> +}
>>> +
>>> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
>>> +{
>>> +       unsigned long cpu;
>>> +
>>> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
>>> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
>>> +
>>> +       cpu_notifier_register_begin();
>>> +       for_each_online_cpu(cpu)
>>> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
>>> +                   NOTIFY_BAD)
>>> +                       goto cleanup;
>>> +       __register_cpu_notifier(&pool->notifier);
>>> +       cpu_notifier_register_done();
>>> +       return 0;
>>> +
>>> +cleanup:
>>> +       for_each_online_cpu(cpu)
>>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
>>> +       cpu_notifier_register_done();
>>> +       return -ENOMEM;
>>> +}
>>> +
>>> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
>>> +{
>>> +       unsigned long cpu;
>>> +
>>> +       cpu_notifier_register_begin();
>>> +       for_each_online_cpu(cpu)
>>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
>>> +       __unregister_cpu_notifier(&pool->notifier);
>>> +       cpu_notifier_register_done();
>>> +}
>>> +
>>>  /*********************************
>>> -* helpers
>>> +* pool functions
>>>  **********************************/
>>> -static bool zswap_is_full(void)
>>> +
>>> +static struct zswap_pool *__zswap_pool_current(void)
>>>  {
>>> -       return totalram_pages * zswap_max_pool_percent / 100 <
>>> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
>>> +       struct zswap_pool *pool;
>>> +
>>> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
>>> +       WARN_ON(!pool);
>>> +
>>> +       return pool;
>>> +}
>>> +
>>> +static struct zswap_pool *zswap_pool_current(void)
>>> +{
>>> +       assert_spin_locked(&zswap_pools_lock);
>>> +
>>> +       return __zswap_pool_current();
>>> +}
>>> +
>>> +static struct zswap_pool *zswap_pool_current_get(void)
>>> +{
>>> +       struct zswap_pool *pool;
>>> +
>>> +       rcu_read_lock();
>>> +
>>> +       pool = __zswap_pool_current();
>>> +       if (!pool || !zswap_pool_get(pool))
>>> +               pool = NULL;
>>> +
>>> +       rcu_read_unlock();
>>> +
>>> +       return pool;
>>> +}
>>> +
>>> +static struct zswap_pool *zswap_pool_last_get(void)
>>> +{
>>> +       struct zswap_pool *pool, *last = NULL;
>>> +
>>> +       rcu_read_lock();
>>> +
>>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
>>> +               last = pool;
>>> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
>>> +               last = NULL;
>>> +
>>> +       rcu_read_unlock();
>>> +
>>> +       return last;
>>> +}
>>> +
>>> +static struct zpool_ops zswap_zpool_ops = {
>>> +       .evict = zswap_writeback_entry
>>> +};
>>> +
>>> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
>>> +{
>>> +       struct zswap_pool *pool;
>>> +
>>> +       assert_spin_locked(&zswap_pools_lock);
>>> +
>>> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
>>> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
>>> +                       continue;
>>> +               if (strncmp(zpool_get_type(pool->zpool), type,
>>> +                           sizeof(zswap_zpool_type)))
>>> +                       continue;
>>> +               /* if we can't get it, it's about to be destroyed */
>>> +               if (!zswap_pool_get(pool))
>>> +                       continue;
>>> +               return pool;
>>> +       }
>>> +
>>> +       return NULL;
>>> +}
>>> +
>>> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>>> +{
>>> +       struct zswap_pool *pool;
>>> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
>>> +
>>> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
>>> +       if (!pool) {
>>> +               pr_err("pool alloc failed\n");
>>> +               return NULL;
>>> +       }
>>> +
>>> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
>>> +       if (!pool->zpool) {
>>> +               pr_err("%s zpool not available\n", type);
>>> +               goto error;
>>> +       }
>>> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
>>> +
>>> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
>>> +       pool->tfm = alloc_percpu(struct crypto_comp *);
>>> +       if (!pool->tfm) {
>>> +               pr_err("percpu alloc failed\n");
>>> +               goto error;
>>> +       }
>>> +
>>> +       if (zswap_cpu_comp_init(pool))
>>> +               goto error;
>>> +       pr_debug("using %s compressor\n", pool->tfm_name);
>>> +
>>> +       /* being the current pool takes 1 ref; this func expects the
>>> +        * caller to always add the new pool as the current pool
>>> +        */
>>> +       kref_init(&pool->kref);
>>> +       INIT_LIST_HEAD(&pool->list);
>>> +
>>> +       zswap_pool_debug("created", pool);
>>> +
>>> +       return pool;
>>> +
>>> +error:
>>> +       free_percpu(pool->tfm);
>>> +       if (pool->zpool)
>>> +               zpool_destroy_pool(pool->zpool);
>>> +       kfree(pool);
>>> +       return NULL;
>>> +}
>>> +
>>> +static struct zswap_pool *__zswap_pool_create_fallback(void)
>>> +{
>>> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
>>> +               pr_err("compressor %s not available, using default %s\n",
>>> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
>>> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
>>> +                       sizeof(zswap_compressor));
>>> +       }
>>> +       if (!zpool_has_pool(zswap_zpool_type)) {
>>> +               pr_err("zpool %s not available, using default %s\n",
>>> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
>>> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
>>> +                       sizeof(zswap_zpool_type));
>>> +       }
>>> +
>>> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
>>> +}
>>> +
>>> +static void zswap_pool_destroy(struct zswap_pool *pool)
>>> +{
>>> +       zswap_pool_debug("destroying", pool);
>>> +
>>> +       zswap_cpu_comp_destroy(pool);
>>> +       free_percpu(pool->tfm);
>>> +       zpool_destroy_pool(pool->zpool);
>>> +       kfree(pool);
>>> +}
>>> +
>>> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
>>> +{
>>> +       return kref_get_unless_zero(&pool->kref);
>>> +}
>>> +
>>> +static void __zswap_pool_release(struct rcu_head *head)
>>> +{
>>> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
>>> +
>>> +       /* nobody should have been able to get a kref... */
>>> +       WARN_ON(kref_get_unless_zero(&pool->kref));
>>> +
>>> +       /* pool is now off zswap_pools list and has no references. */
>>> +       zswap_pool_destroy(pool);
>>> +}
>>> +
>>> +static void __zswap_pool_empty(struct kref *kref)
>>> +{
>>> +       struct zswap_pool *pool;
>>> +
>>> +       pool = container_of(kref, typeof(*pool), kref);
>>> +
>>> +       spin_lock(&zswap_pools_lock);
>>> +
>>> +       WARN_ON(pool == zswap_pool_current());
>>> +
>>> +       list_del_rcu(&pool->list);
>>> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
>>> +
>>> +       spin_unlock(&zswap_pools_lock);
>>> +}
>>> +
>>> +static void zswap_pool_put(struct zswap_pool *pool)
>>> +{
>>> +       kref_put(&pool->kref, __zswap_pool_empty);
>>> +}
>>> +
>>>  }
>>>
>>>  /*********************************
>>> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>>>         pgoff_t offset;
>>>         struct zswap_entry *entry;
>>>         struct page *page;
>>> +       struct crypto_comp *tfm;
>>>         u8 *src, *dst;
>>>         unsigned int dlen;
>>>         int ret;
>>> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>>>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
>>>                 /* decompress */
>>>                 dlen = PAGE_SIZE;
>>> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
>>> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>>>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
>>>                 dst = kmap_atomic(page);
>>> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
>>> -                               entry->length, dst, &dlen);
>>> +               tfm = *get_cpu_ptr(entry->pool->tfm);
>>> +               ret = crypto_comp_decompress(tfm, src, entry->length,
>>> +                                            dst, &dlen);
>>> +               put_cpu_ptr(entry->pool->tfm);
>>>                 kunmap_atomic(dst);
>>> -               zpool_unmap_handle(zswap_pool, entry->handle);
>>> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
>>>                 BUG_ON(ret);
>>>                 BUG_ON(dlen != PAGE_SIZE);
>>>
>>> @@ -633,6 +882,22 @@ end:
>>>         return ret;
>>>  }
>>>
>>> +static int zswap_shrink(void)
>>> +{
>>> +       struct zswap_pool *pool;
>>> +       int ret;
>>> +
>>> +       pool = zswap_pool_last_get();
>>> +       if (!pool)
>>> +               return -ENOENT;
>>> +
>>> +       ret = zpool_shrink(pool->zpool, 1, NULL);
>>> +
>>> +       zswap_pool_put(pool);
>>> +
>>> +       return ret;
>>> +}
>>> +
>>>  /*********************************
>>>  * frontswap hooks
>>>  **********************************/
>>> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>  {
>>>         struct zswap_tree *tree = zswap_trees[type];
>>>         struct zswap_entry *entry, *dupentry;
>>> +       struct crypto_comp *tfm;
>>>         int ret;
>>>         unsigned int dlen = PAGE_SIZE, len;
>>>         unsigned long handle;
>>> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>         /* reclaim space if needed */
>>>         if (zswap_is_full()) {
>>>                 zswap_pool_limit_hit++;
>>> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
>>> +               if (zswap_shrink()) {
>>>                         zswap_reject_reclaim_fail++;
>>>                         ret = -ENOMEM;
>>>                         goto reject;
>>> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>                 goto reject;
>>>         }
>>>
>>> +       /* if entry is successfully added, it keeps the reference */
>>> +       entry->pool = zswap_pool_current_get();
>>> +       if (!entry->pool) {
>>> +               ret = -EINVAL;
>>> +               goto freepage;
>>> +       }
>>> +
>>>         /* compress */
>>>         dst = get_cpu_var(zswap_dstmem);
>>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>>>         src = kmap_atomic(page);
>>> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
>>> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
>>>         kunmap_atomic(src);
>>> +       put_cpu_ptr(entry->pool->tfm);
>>>         if (ret) {
>>>                 ret = -EINVAL;
>>> -               goto freepage;
>>> +               goto put_dstmem;
>>>         }
>>>
>>>         /* store */
>>>         len = dlen + sizeof(struct zswap_header);
>>> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
>>> -               &handle);
>>> +       ret = zpool_malloc(entry->pool->zpool, len,
>>> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
>>>         if (ret == -ENOSPC) {
>>>                 zswap_reject_compress_poor++;
>>> -               goto freepage;
>>> +               goto put_dstmem;
>>>         }
>>>         if (ret) {
>>>                 zswap_reject_alloc_fail++;
>>> -               goto freepage;
>>> +               goto put_dstmem;
>>>         }
>>> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
>>> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
>>>         zhdr->swpentry = swp_entry(type, offset);
>>>         buf = (u8 *)(zhdr + 1);
>>>         memcpy(buf, dst, dlen);
>>> -       zpool_unmap_handle(zswap_pool, handle);
>>> +       zpool_unmap_handle(entry->pool->zpool, handle);
>>>         put_cpu_var(zswap_dstmem);
>>>
>>>         /* populate entry */
>>> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>
>>>         /* update stats */
>>>         atomic_inc(&zswap_stored_pages);
>>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
>>> +       zswap_update_total_size();
>>>
>>>         return 0;
>>>
>>> -freepage:
>>> +put_dstmem:
>>>         put_cpu_var(zswap_dstmem);
>>> +       zswap_pool_put(entry->pool);
>>> +freepage:
>>>         zswap_entry_cache_free(entry);
>>>  reject:
>>>         return ret;
>>> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>>>  {
>>>         struct zswap_tree *tree = zswap_trees[type];
>>>         struct zswap_entry *entry;
>>> +       struct crypto_comp *tfm;
>>>         u8 *src, *dst;
>>>         unsigned int dlen;
>>>         int ret;
>>> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>>>
>>>         /* decompress */
>>>         dlen = PAGE_SIZE;
>>> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
>>> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>>>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
>>>         dst = kmap_atomic(page);
>>> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
>>> -               dst, &dlen);
>>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>>> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
>>> +       put_cpu_ptr(entry->pool->tfm);
>>>         kunmap_atomic(dst);
>>> -       zpool_unmap_handle(zswap_pool, entry->handle);
>>> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
>>>         BUG_ON(ret);
>>>
>>>         spin_lock(&tree->lock);
>>> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
>>>         zswap_trees[type] = NULL;
>>>  }
>>>
>>> -static struct zpool_ops zswap_zpool_ops = {
>>> -       .evict = zswap_writeback_entry
>>> -};
>>> -
>>>  static void zswap_frontswap_init(unsigned type)
>>>  {
>>>         struct zswap_tree *tree;
>>> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
>>>  **********************************/
>>>  static int __init init_zswap(void)
>>>  {
>>> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
>>> +       struct zswap_pool *pool;
>>>
>>> -       pr_info("loading zswap\n");
>>> -
>>> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
>>> -                                       &zswap_zpool_ops);
>>> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
>>> -               pr_info("%s zpool not available\n", zswap_zpool_type);
>>> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
>>> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
>>> -                                       &zswap_zpool_ops);
>>> -       }
>>> -       if (!zswap_pool) {
>>> -               pr_err("%s zpool not available\n", zswap_zpool_type);
>>> -               pr_err("zpool creation failed\n");
>>> -               goto error;
>>> -       }
>>> -       pr_info("using %s pool\n", zswap_zpool_type);
>>> +       pr_info("loading\n");
>>>
>>>         if (zswap_entry_cache_create()) {
>>>                 pr_err("entry cache creation failed\n");
>>> -               goto cachefail;
>>> +               goto cache_fail;
>>>         }
>>> -       if (zswap_comp_init()) {
>>> -               pr_err("compressor initialization failed\n");
>>> -               goto compfail;
>>> +
>>> +       if (zswap_cpu_dstmem_init()) {
>>> +               pr_err("dstmem alloc failed\n");
>>> +               goto dstmem_fail;
>>>         }
>>> -       if (zswap_cpu_init()) {
>>> -               pr_err("per-cpu initialization failed\n");
>>> -               goto pcpufail;
>>> +
>>> +       pool = __zswap_pool_create_fallback();
>>> +       if (!pool) {
>>> +               pr_err("pool creation failed\n");
>>> +               goto pool_fail;
>>>         }
>>> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
>>> +               zpool_get_type(pool->zpool));
>>> +
>>> +       list_add(&pool->list, &zswap_pools);
>>>
>>>         frontswap_register_ops(&zswap_frontswap_ops);
>>>         if (zswap_debugfs_init())
>>>                 pr_warn("debugfs initialization failed\n");
>>>         return 0;
>>> -pcpufail:
>>> -       zswap_comp_exit();
>>> -compfail:
>>> +
>>> +pool_fail:
>>> +       zswap_cpu_dstmem_destroy();
>>> +dstmem_fail:
>>>         zswap_entry_cache_destroy();
>>> -cachefail:
>>> -       zpool_destroy_pool(zswap_pool);
>>> -error:
>>> +cache_fail:
>>>         return -ENOMEM;
>>>  }
>>>  /* must be late so crypto has time to come up */
>>> --
>>> 2.1.0
>>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Seth Jennings June 18, 2015, 3:21 p.m. UTC | #5
On Wed, Jun 17, 2015 at 07:13:31PM -0400, Dan Streetman wrote:
> On Wed, Jun 10, 2015 at 4:54 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> > On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> >> On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> >>> Add dynamic creation of pools.  Move the static crypto compression
> >>> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
> >>> the pool it's in.
> >>
> >> Seth, as far as the design, from your previous comments I assume you
> >> were thinking of maintaining separate lists of zpools and compressors?
> >>  I do see how that will reduce duplication of zpools and compressors,
> >> but it also requires adding a new pointer to each zswap_entry, and
> >> increasing the amount of code to manage each list separately.  And the
> >> most common case in zswap will be just a single zpool and compressor,
> >> not repeatedly changing params.  What do you think?
> >
> > Any opinion on this patch?  If you want, I can break it up so there's
> > a list of zpools and a list of compressors.  Either the combined way
> > (this patch) or separate lists works for me, as long as the params are
> > changeable at runtime :-)
> 
> You on vacation Seth?  Let me know what direction you prefer for this...

I think it is as good as it can be.  Complicated, but I don't know of a
way to make it less so if we want to be able to do this (which is a
separate discussion).

Seth

> 
> >
> >
> >>
> >>>
> >>> This is required by a separate patch which enables changing the
> >>> zswap zpool and compressor params at runtime.
> >>>
> >>> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
> >>> ---
> >>>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
> >>>  1 file changed, 408 insertions(+), 142 deletions(-)
> >>>
> >>> diff --git a/mm/zswap.c b/mm/zswap.c
> >>> index 2d5727b..fc93770 100644
> >>> --- a/mm/zswap.c
> >>> +++ b/mm/zswap.c
> >>> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
> >>>  static struct zpool *zswap_pool;
> >>>
> >>>  /*********************************
> >>> -* compression functions
> >>> +* data structures
> >>>  **********************************/
> >>> -/* per-cpu compression transforms */
> >>> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
> >>>
> >>> -enum comp_op {
> >>> -       ZSWAP_COMPOP_COMPRESS,
> >>> -       ZSWAP_COMPOP_DECOMPRESS
> >>> +struct zswap_pool {
> >>> +       struct zpool *zpool;
> >>> +       struct kref kref;
> >>> +       struct list_head list;
> >>> +       struct rcu_head rcu_head;
> >>> +       struct notifier_block notifier;
> >>> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
> >>> +       struct crypto_comp * __percpu *tfm;
> >>>  };
> >>>
> >>> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
> >>> -                               u8 *dst, unsigned int *dlen)
> >>> -{
> >>> -       struct crypto_comp *tfm;
> >>> -       int ret;
> >>> -
> >>> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
> >>> -       switch (op) {
> >>> -       case ZSWAP_COMPOP_COMPRESS:
> >>> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
> >>> -               break;
> >>> -       case ZSWAP_COMPOP_DECOMPRESS:
> >>> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
> >>> -               break;
> >>> -       default:
> >>> -               ret = -EINVAL;
> >>> -       }
> >>> -
> >>> -       put_cpu();
> >>> -       return ret;
> >>> -}
> >>> -
> >>> -static int __init zswap_comp_init(void)
> >>> -{
> >>> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> >>> -               pr_info("%s compressor not available\n", zswap_compressor);
> >>> -               /* fall back to default compressor */
> >>> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
> >>> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
> >>> -                       /* can't even load the default compressor */
> >>> -                       return -ENODEV;
> >>> -       }
> >>> -       pr_info("using %s compressor\n", zswap_compressor);
> >>> -
> >>> -       /* alloc percpu transforms */
> >>> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
> >>> -       if (!zswap_comp_pcpu_tfms)
> >>> -               return -ENOMEM;
> >>> -       return 0;
> >>> -}
> >>> -
> >>> -static void __init zswap_comp_exit(void)
> >>> -{
> >>> -       /* free percpu transforms */
> >>> -       free_percpu(zswap_comp_pcpu_tfms);
> >>> -}
> >>> -
> >>> -/*********************************
> >>> -* data structures
> >>> -**********************************/
> >>>  /*
> >>>   * struct zswap_entry
> >>>   *
> >>> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
> >>>   * page within zswap.
> >>>   *
> >>>   * rbnode - links the entry into red-black tree for the appropriate swap type
> >>> + * offset - the swap offset for the entry.  Index into the red-black tree.
> >>>   * refcount - the number of outstanding reference to the entry. This is needed
> >>>   *            to protect against premature freeing of the entry by code
> >>>   *            concurrent calls to load, invalidate, and writeback.  The lock
> >>>   *            for the zswap_tree structure that contains the entry must
> >>>   *            be held while changing the refcount.  Since the lock must
> >>>   *            be held, there is no reason to also make refcount atomic.
> >>> - * offset - the swap offset for the entry.  Index into the red-black tree.
> >>> - * handle - zpool allocation handle that stores the compressed page data
> >>>   * length - the length in bytes of the compressed page data.  Needed during
> >>>   *          decompression
> >>> + * pool - the zswap_pool the entry's data is in
> >>> + * handle - zpool allocation handle that stores the compressed page data
> >>>   */
> >>>  struct zswap_entry {
> >>>         struct rb_node rbnode;
> >>>         pgoff_t offset;
> >>>         int refcount;
> >>>         unsigned int length;
> >>> +       struct zswap_pool *pool;
> >>>         unsigned long handle;
> >>>  };
> >>>
> >>> @@ -201,6 +156,44 @@ struct zswap_tree {
> >>>
> >>>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
> >>>
> >>> +/* RCU-protected iteration */
> >>> +static LIST_HEAD(zswap_pools);
> >>> +/* protects zswap_pools list modification */
> >>> +static DEFINE_SPINLOCK(zswap_pools_lock);
> >>> +
> >>> +/*********************************
> >>> +* helpers and fwd declarations
> >>> +**********************************/
> >>> +
> >>> +#define zswap_pool_debug(msg, p)                               \
> >>> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> >>> +                zpool_get_type((p)->zpool))
> >>> +
> >>> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
> >>> +static int zswap_pool_get(struct zswap_pool *pool);
> >>> +static void zswap_pool_put(struct zswap_pool *pool);
> >>> +
> >>> +static bool zswap_is_full(void)
> >>> +{
> >>> +       return totalram_pages * zswap_max_pool_percent / 100 <
> >>> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> >>> +}
> >>> +
> >>> +static void zswap_update_total_size(void)
> >>> +{
> >>> +       struct zswap_pool *pool;
> >>> +       u64 total = 0;
> >>> +
> >>> +       rcu_read_lock();
> >>> +
> >>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> >>> +               total += zpool_get_total_size(pool->zpool);
> >>> +
> >>> +       rcu_read_unlock();
> >>> +
> >>> +       zswap_pool_total_size = total;
> >>> +}
> >>> +
> >>>  /*********************************
> >>>  * zswap entry functions
> >>>  **********************************/
> >>> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
> >>>   */
> >>>  static void zswap_free_entry(struct zswap_entry *entry)
> >>>  {
> >>> -       zpool_free(zswap_pool, entry->handle);
> >>> +       zpool_free(entry->pool->zpool, entry->handle);
> >>> +       zswap_pool_put(entry->pool);
> >>>         zswap_entry_cache_free(entry);
> >>>         atomic_dec(&zswap_stored_pages);
> >>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> >>> +       zswap_update_total_size();
> >>>  }
> >>>
> >>>  /* caller must hold the tree lock */
> >>> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
> >>>  **********************************/
> >>>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
> >>>
> >>> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> >>> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
> >>>  {
> >>> -       struct crypto_comp *tfm;
> >>>         u8 *dst;
> >>>
> >>>         switch (action) {
> >>>         case CPU_UP_PREPARE:
> >>> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
> >>> -               if (IS_ERR(tfm)) {
> >>> -                       pr_err("can't allocate compressor transform\n");
> >>> -                       return NOTIFY_BAD;
> >>> -               }
> >>> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
> >>>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
> >>>                 if (!dst) {
> >>>                         pr_err("can't allocate compressor buffer\n");
> >>> -                       crypto_free_comp(tfm);
> >>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> >>>                         return NOTIFY_BAD;
> >>>                 }
> >>>                 per_cpu(zswap_dstmem, cpu) = dst;
> >>>                 break;
> >>>         case CPU_DEAD:
> >>>         case CPU_UP_CANCELED:
> >>> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
> >>> -               if (tfm) {
> >>> -                       crypto_free_comp(tfm);
> >>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> >>> -               }
> >>>                 dst = per_cpu(zswap_dstmem, cpu);
> >>>                 kfree(dst);
> >>>                 per_cpu(zswap_dstmem, cpu) = NULL;
> >>> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> >>>         return NOTIFY_OK;
> >>>  }
> >>>
> >>> -static int zswap_cpu_notifier(struct notifier_block *nb,
> >>> -                               unsigned long action, void *pcpu)
> >>> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
> >>> +                                    unsigned long action, void *pcpu)
> >>>  {
> >>> -       unsigned long cpu = (unsigned long)pcpu;
> >>> -       return __zswap_cpu_notifier(action, cpu);
> >>> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
> >>>  }
> >>>
> >>> -static struct notifier_block zswap_cpu_notifier_block = {
> >>> -       .notifier_call = zswap_cpu_notifier
> >>> +static struct notifier_block zswap_dstmem_notifier = {
> >>> +       .notifier_call =        zswap_cpu_dstmem_notifier,
> >>>  };
> >>>
> >>> -static int __init zswap_cpu_init(void)
> >>> +static int __init zswap_cpu_dstmem_init(void)
> >>>  {
> >>>         unsigned long cpu;
> >>>
> >>>         cpu_notifier_register_begin();
> >>>         for_each_online_cpu(cpu)
> >>> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
> >>> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
> >>> +                   NOTIFY_BAD)
> >>>                         goto cleanup;
> >>> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
> >>> +       __register_cpu_notifier(&zswap_dstmem_notifier);
> >>>         cpu_notifier_register_done();
> >>>         return 0;
> >>>
> >>>  cleanup:
> >>>         for_each_online_cpu(cpu)
> >>> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
> >>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> >>>         cpu_notifier_register_done();
> >>>         return -ENOMEM;
> >>>  }
> >>>
> >>> +static void zswap_cpu_dstmem_destroy(void)
> >>> +{
> >>> +       unsigned long cpu;
> >>> +
> >>> +       cpu_notifier_register_begin();
> >>> +       for_each_online_cpu(cpu)
> >>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> >>> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
> >>> +       cpu_notifier_register_done();
> >>> +}
> >>> +
> >>> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
> >>> +                                    unsigned long action, unsigned long cpu)
> >>> +{
> >>> +       struct crypto_comp *tfm;
> >>> +
> >>> +       switch (action) {
> >>> +       case CPU_UP_PREPARE:
> >>> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
> >>> +                       break;
> >>> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
> >>> +               if (IS_ERR_OR_NULL(tfm)) {
> >>> +                       pr_err("could not alloc crypto comp %s : %ld\n",
> >>> +                              pool->tfm_name, PTR_ERR(tfm));
> >>> +                       return NOTIFY_BAD;
> >>> +               }
> >>> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
> >>> +               break;
> >>> +       case CPU_DEAD:
> >>> +       case CPU_UP_CANCELED:
> >>> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
> >>> +               if (!IS_ERR_OR_NULL(tfm))
> >>> +                       crypto_free_comp(tfm);
> >>> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
> >>> +               break;
> >>> +       default:
> >>> +               break;
> >>> +       }
> >>> +       return NOTIFY_OK;
> >>> +}
> >>> +
> >>> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
> >>> +                                  unsigned long action, void *pcpu)
> >>> +{
> >>> +       unsigned long cpu = (unsigned long)pcpu;
> >>> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
> >>> +
> >>> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
> >>> +}
> >>> +
> >>> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
> >>> +{
> >>> +       unsigned long cpu;
> >>> +
> >>> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
> >>> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
> >>> +
> >>> +       cpu_notifier_register_begin();
> >>> +       for_each_online_cpu(cpu)
> >>> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
> >>> +                   NOTIFY_BAD)
> >>> +                       goto cleanup;
> >>> +       __register_cpu_notifier(&pool->notifier);
> >>> +       cpu_notifier_register_done();
> >>> +       return 0;
> >>> +
> >>> +cleanup:
> >>> +       for_each_online_cpu(cpu)
> >>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> >>> +       cpu_notifier_register_done();
> >>> +       return -ENOMEM;
> >>> +}
> >>> +
> >>> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
> >>> +{
> >>> +       unsigned long cpu;
> >>> +
> >>> +       cpu_notifier_register_begin();
> >>> +       for_each_online_cpu(cpu)
> >>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> >>> +       __unregister_cpu_notifier(&pool->notifier);
> >>> +       cpu_notifier_register_done();
> >>> +}
> >>> +
> >>>  /*********************************
> >>> -* helpers
> >>> +* pool functions
> >>>  **********************************/
> >>> -static bool zswap_is_full(void)
> >>> +
> >>> +static struct zswap_pool *__zswap_pool_current(void)
> >>>  {
> >>> -       return totalram_pages * zswap_max_pool_percent / 100 <
> >>> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> >>> +       struct zswap_pool *pool;
> >>> +
> >>> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
> >>> +       WARN_ON(!pool);
> >>> +
> >>> +       return pool;
> >>> +}
> >>> +
> >>> +static struct zswap_pool *zswap_pool_current(void)
> >>> +{
> >>> +       assert_spin_locked(&zswap_pools_lock);
> >>> +
> >>> +       return __zswap_pool_current();
> >>> +}
> >>> +
> >>> +static struct zswap_pool *zswap_pool_current_get(void)
> >>> +{
> >>> +       struct zswap_pool *pool;
> >>> +
> >>> +       rcu_read_lock();
> >>> +
> >>> +       pool = __zswap_pool_current();
> >>> +       if (!pool || !zswap_pool_get(pool))
> >>> +               pool = NULL;
> >>> +
> >>> +       rcu_read_unlock();
> >>> +
> >>> +       return pool;
> >>> +}
> >>> +
> >>> +static struct zswap_pool *zswap_pool_last_get(void)
> >>> +{
> >>> +       struct zswap_pool *pool, *last = NULL;
> >>> +
> >>> +       rcu_read_lock();
> >>> +
> >>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> >>> +               last = pool;
> >>> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
> >>> +               last = NULL;
> >>> +
> >>> +       rcu_read_unlock();
> >>> +
> >>> +       return last;
> >>> +}
> >>> +
> >>> +static struct zpool_ops zswap_zpool_ops = {
> >>> +       .evict = zswap_writeback_entry
> >>> +};
> >>> +
> >>> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> >>> +{
> >>> +       struct zswap_pool *pool;
> >>> +
> >>> +       assert_spin_locked(&zswap_pools_lock);
> >>> +
> >>> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> >>> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
> >>> +                       continue;
> >>> +               if (strncmp(zpool_get_type(pool->zpool), type,
> >>> +                           sizeof(zswap_zpool_type)))
> >>> +                       continue;
> >>> +               /* if we can't get it, it's about to be destroyed */
> >>> +               if (!zswap_pool_get(pool))
> >>> +                       continue;
> >>> +               return pool;
> >>> +       }
> >>> +
> >>> +       return NULL;
> >>> +}
> >>> +
> >>> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >>> +{
> >>> +       struct zswap_pool *pool;
> >>> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> >>> +
> >>> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> >>> +       if (!pool) {
> >>> +               pr_err("pool alloc failed\n");
> >>> +               return NULL;
> >>> +       }
> >>> +
> >>> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
> >>> +       if (!pool->zpool) {
> >>> +               pr_err("%s zpool not available\n", type);
> >>> +               goto error;
> >>> +       }
> >>> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> >>> +
> >>> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> >>> +       pool->tfm = alloc_percpu(struct crypto_comp *);
> >>> +       if (!pool->tfm) {
> >>> +               pr_err("percpu alloc failed\n");
> >>> +               goto error;
> >>> +       }
> >>> +
> >>> +       if (zswap_cpu_comp_init(pool))
> >>> +               goto error;
> >>> +       pr_debug("using %s compressor\n", pool->tfm_name);
> >>> +
> >>> +       /* being the current pool takes 1 ref; this func expects the
> >>> +        * caller to always add the new pool as the current pool
> >>> +        */
> >>> +       kref_init(&pool->kref);
> >>> +       INIT_LIST_HEAD(&pool->list);
> >>> +
> >>> +       zswap_pool_debug("created", pool);
> >>> +
> >>> +       return pool;
> >>> +
> >>> +error:
> >>> +       free_percpu(pool->tfm);
> >>> +       if (pool->zpool)
> >>> +               zpool_destroy_pool(pool->zpool);
> >>> +       kfree(pool);
> >>> +       return NULL;
> >>> +}
> >>> +
> >>> +static struct zswap_pool *__zswap_pool_create_fallback(void)
> >>> +{
> >>> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> >>> +               pr_err("compressor %s not available, using default %s\n",
> >>> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
> >>> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
> >>> +                       sizeof(zswap_compressor));
> >>> +       }
> >>> +       if (!zpool_has_pool(zswap_zpool_type)) {
> >>> +               pr_err("zpool %s not available, using default %s\n",
> >>> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
> >>> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
> >>> +                       sizeof(zswap_zpool_type));
> >>> +       }
> >>> +
> >>> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
> >>> +}
> >>> +
> >>> +static void zswap_pool_destroy(struct zswap_pool *pool)
> >>> +{
> >>> +       zswap_pool_debug("destroying", pool);
> >>> +
> >>> +       zswap_cpu_comp_destroy(pool);
> >>> +       free_percpu(pool->tfm);
> >>> +       zpool_destroy_pool(pool->zpool);
> >>> +       kfree(pool);
> >>> +}
> >>> +
> >>> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
> >>> +{
> >>> +       return kref_get_unless_zero(&pool->kref);
> >>> +}
> >>> +
> >>> +static void __zswap_pool_release(struct rcu_head *head)
> >>> +{
> >>> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
> >>> +
> >>> +       /* nobody should have been able to get a kref... */
> >>> +       WARN_ON(kref_get_unless_zero(&pool->kref));
> >>> +
> >>> +       /* pool is now off zswap_pools list and has no references. */
> >>> +       zswap_pool_destroy(pool);
> >>> +}
> >>> +
> >>> +static void __zswap_pool_empty(struct kref *kref)
> >>> +{
> >>> +       struct zswap_pool *pool;
> >>> +
> >>> +       pool = container_of(kref, typeof(*pool), kref);
> >>> +
> >>> +       spin_lock(&zswap_pools_lock);
> >>> +
> >>> +       WARN_ON(pool == zswap_pool_current());
> >>> +
> >>> +       list_del_rcu(&pool->list);
> >>> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
> >>> +
> >>> +       spin_unlock(&zswap_pools_lock);
> >>> +}
> >>> +
> >>> +static void zswap_pool_put(struct zswap_pool *pool)
> >>> +{
> >>> +       kref_put(&pool->kref, __zswap_pool_empty);
> >>> +}
> >>> +
> >>>  }
> >>>
> >>>  /*********************************
> >>> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> >>>         pgoff_t offset;
> >>>         struct zswap_entry *entry;
> >>>         struct page *page;
> >>> +       struct crypto_comp *tfm;
> >>>         u8 *src, *dst;
> >>>         unsigned int dlen;
> >>>         int ret;
> >>> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> >>>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
> >>>                 /* decompress */
> >>>                 dlen = PAGE_SIZE;
> >>> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> >>> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
> >>>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
> >>>                 dst = kmap_atomic(page);
> >>> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
> >>> -                               entry->length, dst, &dlen);
> >>> +               tfm = *get_cpu_ptr(entry->pool->tfm);
> >>> +               ret = crypto_comp_decompress(tfm, src, entry->length,
> >>> +                                            dst, &dlen);
> >>> +               put_cpu_ptr(entry->pool->tfm);
> >>>                 kunmap_atomic(dst);
> >>> -               zpool_unmap_handle(zswap_pool, entry->handle);
> >>> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
> >>>                 BUG_ON(ret);
> >>>                 BUG_ON(dlen != PAGE_SIZE);
> >>>
> >>> @@ -633,6 +882,22 @@ end:
> >>>         return ret;
> >>>  }
> >>>
> >>> +static int zswap_shrink(void)
> >>> +{
> >>> +       struct zswap_pool *pool;
> >>> +       int ret;
> >>> +
> >>> +       pool = zswap_pool_last_get();
> >>> +       if (!pool)
> >>> +               return -ENOENT;
> >>> +
> >>> +       ret = zpool_shrink(pool->zpool, 1, NULL);
> >>> +
> >>> +       zswap_pool_put(pool);
> >>> +
> >>> +       return ret;
> >>> +}
> >>> +
> >>>  /*********************************
> >>>  * frontswap hooks
> >>>  **********************************/
> >>> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>>  {
> >>>         struct zswap_tree *tree = zswap_trees[type];
> >>>         struct zswap_entry *entry, *dupentry;
> >>> +       struct crypto_comp *tfm;
> >>>         int ret;
> >>>         unsigned int dlen = PAGE_SIZE, len;
> >>>         unsigned long handle;
> >>> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>>         /* reclaim space if needed */
> >>>         if (zswap_is_full()) {
> >>>                 zswap_pool_limit_hit++;
> >>> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
> >>> +               if (zswap_shrink()) {
> >>>                         zswap_reject_reclaim_fail++;
> >>>                         ret = -ENOMEM;
> >>>                         goto reject;
> >>> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>>                 goto reject;
> >>>         }
> >>>
> >>> +       /* if entry is successfully added, it keeps the reference */
> >>> +       entry->pool = zswap_pool_current_get();
> >>> +       if (!entry->pool) {
> >>> +               ret = -EINVAL;
> >>> +               goto freepage;
> >>> +       }
> >>> +
> >>>         /* compress */
> >>>         dst = get_cpu_var(zswap_dstmem);
> >>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> >>>         src = kmap_atomic(page);
> >>> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
> >>> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
> >>>         kunmap_atomic(src);
> >>> +       put_cpu_ptr(entry->pool->tfm);
> >>>         if (ret) {
> >>>                 ret = -EINVAL;
> >>> -               goto freepage;
> >>> +               goto put_dstmem;
> >>>         }
> >>>
> >>>         /* store */
> >>>         len = dlen + sizeof(struct zswap_header);
> >>> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
> >>> -               &handle);
> >>> +       ret = zpool_malloc(entry->pool->zpool, len,
> >>> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
> >>>         if (ret == -ENOSPC) {
> >>>                 zswap_reject_compress_poor++;
> >>> -               goto freepage;
> >>> +               goto put_dstmem;
> >>>         }
> >>>         if (ret) {
> >>>                 zswap_reject_alloc_fail++;
> >>> -               goto freepage;
> >>> +               goto put_dstmem;
> >>>         }
> >>> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
> >>> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
> >>>         zhdr->swpentry = swp_entry(type, offset);
> >>>         buf = (u8 *)(zhdr + 1);
> >>>         memcpy(buf, dst, dlen);
> >>> -       zpool_unmap_handle(zswap_pool, handle);
> >>> +       zpool_unmap_handle(entry->pool->zpool, handle);
> >>>         put_cpu_var(zswap_dstmem);
> >>>
> >>>         /* populate entry */
> >>> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >>>
> >>>         /* update stats */
> >>>         atomic_inc(&zswap_stored_pages);
> >>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> >>> +       zswap_update_total_size();
> >>>
> >>>         return 0;
> >>>
> >>> -freepage:
> >>> +put_dstmem:
> >>>         put_cpu_var(zswap_dstmem);
> >>> +       zswap_pool_put(entry->pool);
> >>> +freepage:
> >>>         zswap_entry_cache_free(entry);
> >>>  reject:
> >>>         return ret;
> >>> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> >>>  {
> >>>         struct zswap_tree *tree = zswap_trees[type];
> >>>         struct zswap_entry *entry;
> >>> +       struct crypto_comp *tfm;
> >>>         u8 *src, *dst;
> >>>         unsigned int dlen;
> >>>         int ret;
> >>> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> >>>
> >>>         /* decompress */
> >>>         dlen = PAGE_SIZE;
> >>> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> >>> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
> >>>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
> >>>         dst = kmap_atomic(page);
> >>> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
> >>> -               dst, &dlen);
> >>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> >>> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
> >>> +       put_cpu_ptr(entry->pool->tfm);
> >>>         kunmap_atomic(dst);
> >>> -       zpool_unmap_handle(zswap_pool, entry->handle);
> >>> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
> >>>         BUG_ON(ret);
> >>>
> >>>         spin_lock(&tree->lock);
> >>> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
> >>>         zswap_trees[type] = NULL;
> >>>  }
> >>>
> >>> -static struct zpool_ops zswap_zpool_ops = {
> >>> -       .evict = zswap_writeback_entry
> >>> -};
> >>> -
> >>>  static void zswap_frontswap_init(unsigned type)
> >>>  {
> >>>         struct zswap_tree *tree;
> >>> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
> >>>  **********************************/
> >>>  static int __init init_zswap(void)
> >>>  {
> >>> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> >>> +       struct zswap_pool *pool;
> >>>
> >>> -       pr_info("loading zswap\n");
> >>> -
> >>> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> >>> -                                       &zswap_zpool_ops);
> >>> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
> >>> -               pr_info("%s zpool not available\n", zswap_zpool_type);
> >>> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
> >>> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> >>> -                                       &zswap_zpool_ops);
> >>> -       }
> >>> -       if (!zswap_pool) {
> >>> -               pr_err("%s zpool not available\n", zswap_zpool_type);
> >>> -               pr_err("zpool creation failed\n");
> >>> -               goto error;
> >>> -       }
> >>> -       pr_info("using %s pool\n", zswap_zpool_type);
> >>> +       pr_info("loading\n");
> >>>
> >>>         if (zswap_entry_cache_create()) {
> >>>                 pr_err("entry cache creation failed\n");
> >>> -               goto cachefail;
> >>> +               goto cache_fail;
> >>>         }
> >>> -       if (zswap_comp_init()) {
> >>> -               pr_err("compressor initialization failed\n");
> >>> -               goto compfail;
> >>> +
> >>> +       if (zswap_cpu_dstmem_init()) {
> >>> +               pr_err("dstmem alloc failed\n");
> >>> +               goto dstmem_fail;
> >>>         }
> >>> -       if (zswap_cpu_init()) {
> >>> -               pr_err("per-cpu initialization failed\n");
> >>> -               goto pcpufail;
> >>> +
> >>> +       pool = __zswap_pool_create_fallback();
> >>> +       if (!pool) {
> >>> +               pr_err("pool creation failed\n");
> >>> +               goto pool_fail;
> >>>         }
> >>> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> >>> +               zpool_get_type(pool->zpool));
> >>> +
> >>> +       list_add(&pool->list, &zswap_pools);
> >>>
> >>>         frontswap_register_ops(&zswap_frontswap_ops);
> >>>         if (zswap_debugfs_init())
> >>>                 pr_warn("debugfs initialization failed\n");
> >>>         return 0;
> >>> -pcpufail:
> >>> -       zswap_comp_exit();
> >>> -compfail:
> >>> +
> >>> +pool_fail:
> >>> +       zswap_cpu_dstmem_destroy();
> >>> +dstmem_fail:
> >>>         zswap_entry_cache_destroy();
> >>> -cachefail:
> >>> -       zpool_destroy_pool(zswap_pool);
> >>> -error:
> >>> +cache_fail:
> >>>         return -ENOMEM;
> >>>  }
> >>>  /* must be late so crypto has time to come up */
> >>> --
> >>> 2.1.0
> >>>
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Seth Jennings June 18, 2015, 3:57 p.m. UTC | #6
On Thu, Jun 11, 2015 at 01:51:45PM -0500, Seth Jennings wrote:
> On Wed, Jun 10, 2015 at 04:54:24PM -0400, Dan Streetman wrote:
> > On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> > > On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
> > >> Add dynamic creation of pools.  Move the static crypto compression
> > >> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
> > >> the pool it's in.
> > >
> > > Seth, as far as the design, from your previous comments I assume you
> > > were thinking of maintaining separate lists of zpools and compressors?
> > >  I do see how that will reduce duplication of zpools and compressors,
> > > but it also requires adding a new pointer to each zswap_entry, and
> > > increasing the amount of code to manage each list separately.  And the
> > > most common case in zswap will be just a single zpool and compressor,
> > > not repeatedly changing params.  What do you think?
> > 
> > Any opinion on this patch?  If you want, I can break it up so there's
> > a list of zpools and a list of compressors.  Either the combined way
> > (this patch) or separate lists works for me, as long as the params are
> > changeable at runtime :-)
> 
> I'm still reviewing the code.  I was going to test it too but it doesn't
> compile for me:
> 
>   CC      mm/zswap.o
> mm/zswap.c: In function ‘__zswap_pool_create_fallback’:
> mm/zswap.c:605:10: warning: argument to ‘sizeof’ in ‘strncpy’ call is the same expression as the destination; did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess]
>     sizeof(zswap_compressor));
>           ^
> mm/zswap.c:607:7: error: implicit declaration of function ‘zpool_has_pool’ [-Werror=implicit-function-declaration]
>   if (!zpool_has_pool(zswap_zpool_type)) {
>        ^
> mm/zswap.c:611:10: warning: argument to ‘sizeof’ in ‘strncpy’ call is the same expression as the destination; did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess]
>     sizeof(zswap_zpool_type));
>           ^
> mm/zswap.c: At top level:
> mm/zswap.c:664:1: error: expected identifier or ‘(’ before ‘}’ token
>  }
>  ^
> mm/zswap.c:99:22: warning: ‘zswap_pool’ defined but not used [-Wunused-variable]
>  static struct zpool *zswap_pool;
>                       ^
> mm/zswap.c:531:27: warning: ‘zswap_pool_find_get’ defined but not used [-Wunused-function]
>  static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
>                            ^

Dan, I never heard back from you on this, but I figured it out.  PATCH
1/5 from your original patchset wasn't pulled in, but you didn't roll it
into this new patch either.

Seth


> 
> Seth
> 
> > 
> > 
> > >
> > >>
> > >> This is required by a separate patch which enables changing the
> > >> zswap zpool and compressor params at runtime.
> > >>
> > >> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
> > >> ---
> > >>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
> > >>  1 file changed, 408 insertions(+), 142 deletions(-)
> > >>
> > >> diff --git a/mm/zswap.c b/mm/zswap.c
> > >> index 2d5727b..fc93770 100644
> > >> --- a/mm/zswap.c
> > >> +++ b/mm/zswap.c
> > >> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
> > >>  static struct zpool *zswap_pool;
> > >>
> > >>  /*********************************
> > >> -* compression functions
> > >> +* data structures
> > >>  **********************************/
> > >> -/* per-cpu compression transforms */
> > >> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
> > >>
> > >> -enum comp_op {
> > >> -       ZSWAP_COMPOP_COMPRESS,
> > >> -       ZSWAP_COMPOP_DECOMPRESS
> > >> +struct zswap_pool {
> > >> +       struct zpool *zpool;
> > >> +       struct kref kref;
> > >> +       struct list_head list;
> > >> +       struct rcu_head rcu_head;
> > >> +       struct notifier_block notifier;
> > >> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
> > >> +       struct crypto_comp * __percpu *tfm;
> > >>  };
> > >>
> > >> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
> > >> -                               u8 *dst, unsigned int *dlen)
> > >> -{
> > >> -       struct crypto_comp *tfm;
> > >> -       int ret;
> > >> -
> > >> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
> > >> -       switch (op) {
> > >> -       case ZSWAP_COMPOP_COMPRESS:
> > >> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
> > >> -               break;
> > >> -       case ZSWAP_COMPOP_DECOMPRESS:
> > >> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
> > >> -               break;
> > >> -       default:
> > >> -               ret = -EINVAL;
> > >> -       }
> > >> -
> > >> -       put_cpu();
> > >> -       return ret;
> > >> -}
> > >> -
> > >> -static int __init zswap_comp_init(void)
> > >> -{
> > >> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> > >> -               pr_info("%s compressor not available\n", zswap_compressor);
> > >> -               /* fall back to default compressor */
> > >> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
> > >> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
> > >> -                       /* can't even load the default compressor */
> > >> -                       return -ENODEV;
> > >> -       }
> > >> -       pr_info("using %s compressor\n", zswap_compressor);
> > >> -
> > >> -       /* alloc percpu transforms */
> > >> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
> > >> -       if (!zswap_comp_pcpu_tfms)
> > >> -               return -ENOMEM;
> > >> -       return 0;
> > >> -}
> > >> -
> > >> -static void __init zswap_comp_exit(void)
> > >> -{
> > >> -       /* free percpu transforms */
> > >> -       free_percpu(zswap_comp_pcpu_tfms);
> > >> -}
> > >> -
> > >> -/*********************************
> > >> -* data structures
> > >> -**********************************/
> > >>  /*
> > >>   * struct zswap_entry
> > >>   *
> > >> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
> > >>   * page within zswap.
> > >>   *
> > >>   * rbnode - links the entry into red-black tree for the appropriate swap type
> > >> + * offset - the swap offset for the entry.  Index into the red-black tree.
> > >>   * refcount - the number of outstanding reference to the entry. This is needed
> > >>   *            to protect against premature freeing of the entry by code
> > >>   *            concurrent calls to load, invalidate, and writeback.  The lock
> > >>   *            for the zswap_tree structure that contains the entry must
> > >>   *            be held while changing the refcount.  Since the lock must
> > >>   *            be held, there is no reason to also make refcount atomic.
> > >> - * offset - the swap offset for the entry.  Index into the red-black tree.
> > >> - * handle - zpool allocation handle that stores the compressed page data
> > >>   * length - the length in bytes of the compressed page data.  Needed during
> > >>   *          decompression
> > >> + * pool - the zswap_pool the entry's data is in
> > >> + * handle - zpool allocation handle that stores the compressed page data
> > >>   */
> > >>  struct zswap_entry {
> > >>         struct rb_node rbnode;
> > >>         pgoff_t offset;
> > >>         int refcount;
> > >>         unsigned int length;
> > >> +       struct zswap_pool *pool;
> > >>         unsigned long handle;
> > >>  };
> > >>
> > >> @@ -201,6 +156,44 @@ struct zswap_tree {
> > >>
> > >>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
> > >>
> > >> +/* RCU-protected iteration */
> > >> +static LIST_HEAD(zswap_pools);
> > >> +/* protects zswap_pools list modification */
> > >> +static DEFINE_SPINLOCK(zswap_pools_lock);
> > >> +
> > >> +/*********************************
> > >> +* helpers and fwd declarations
> > >> +**********************************/
> > >> +
> > >> +#define zswap_pool_debug(msg, p)                               \
> > >> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> > >> +                zpool_get_type((p)->zpool))
> > >> +
> > >> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
> > >> +static int zswap_pool_get(struct zswap_pool *pool);
> > >> +static void zswap_pool_put(struct zswap_pool *pool);
> > >> +
> > >> +static bool zswap_is_full(void)
> > >> +{
> > >> +       return totalram_pages * zswap_max_pool_percent / 100 <
> > >> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> > >> +}
> > >> +
> > >> +static void zswap_update_total_size(void)
> > >> +{
> > >> +       struct zswap_pool *pool;
> > >> +       u64 total = 0;
> > >> +
> > >> +       rcu_read_lock();
> > >> +
> > >> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> > >> +               total += zpool_get_total_size(pool->zpool);
> > >> +
> > >> +       rcu_read_unlock();
> > >> +
> > >> +       zswap_pool_total_size = total;
> > >> +}
> > >> +
> > >>  /*********************************
> > >>  * zswap entry functions
> > >>  **********************************/
> > >> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
> > >>   */
> > >>  static void zswap_free_entry(struct zswap_entry *entry)
> > >>  {
> > >> -       zpool_free(zswap_pool, entry->handle);
> > >> +       zpool_free(entry->pool->zpool, entry->handle);
> > >> +       zswap_pool_put(entry->pool);
> > >>         zswap_entry_cache_free(entry);
> > >>         atomic_dec(&zswap_stored_pages);
> > >> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> > >> +       zswap_update_total_size();
> > >>  }
> > >>
> > >>  /* caller must hold the tree lock */
> > >> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
> > >>  **********************************/
> > >>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
> > >>
> > >> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> > >> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
> > >>  {
> > >> -       struct crypto_comp *tfm;
> > >>         u8 *dst;
> > >>
> > >>         switch (action) {
> > >>         case CPU_UP_PREPARE:
> > >> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
> > >> -               if (IS_ERR(tfm)) {
> > >> -                       pr_err("can't allocate compressor transform\n");
> > >> -                       return NOTIFY_BAD;
> > >> -               }
> > >> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
> > >>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
> > >>                 if (!dst) {
> > >>                         pr_err("can't allocate compressor buffer\n");
> > >> -                       crypto_free_comp(tfm);
> > >> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> > >>                         return NOTIFY_BAD;
> > >>                 }
> > >>                 per_cpu(zswap_dstmem, cpu) = dst;
> > >>                 break;
> > >>         case CPU_DEAD:
> > >>         case CPU_UP_CANCELED:
> > >> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
> > >> -               if (tfm) {
> > >> -                       crypto_free_comp(tfm);
> > >> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
> > >> -               }
> > >>                 dst = per_cpu(zswap_dstmem, cpu);
> > >>                 kfree(dst);
> > >>                 per_cpu(zswap_dstmem, cpu) = NULL;
> > >> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
> > >>         return NOTIFY_OK;
> > >>  }
> > >>
> > >> -static int zswap_cpu_notifier(struct notifier_block *nb,
> > >> -                               unsigned long action, void *pcpu)
> > >> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
> > >> +                                    unsigned long action, void *pcpu)
> > >>  {
> > >> -       unsigned long cpu = (unsigned long)pcpu;
> > >> -       return __zswap_cpu_notifier(action, cpu);
> > >> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
> > >>  }
> > >>
> > >> -static struct notifier_block zswap_cpu_notifier_block = {
> > >> -       .notifier_call = zswap_cpu_notifier
> > >> +static struct notifier_block zswap_dstmem_notifier = {
> > >> +       .notifier_call =        zswap_cpu_dstmem_notifier,
> > >>  };
> > >>
> > >> -static int __init zswap_cpu_init(void)
> > >> +static int __init zswap_cpu_dstmem_init(void)
> > >>  {
> > >>         unsigned long cpu;
> > >>
> > >>         cpu_notifier_register_begin();
> > >>         for_each_online_cpu(cpu)
> > >> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
> > >> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
> > >> +                   NOTIFY_BAD)
> > >>                         goto cleanup;
> > >> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
> > >> +       __register_cpu_notifier(&zswap_dstmem_notifier);
> > >>         cpu_notifier_register_done();
> > >>         return 0;
> > >>
> > >>  cleanup:
> > >>         for_each_online_cpu(cpu)
> > >> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
> > >> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> > >>         cpu_notifier_register_done();
> > >>         return -ENOMEM;
> > >>  }
> > >>
> > >> +static void zswap_cpu_dstmem_destroy(void)
> > >> +{
> > >> +       unsigned long cpu;
> > >> +
> > >> +       cpu_notifier_register_begin();
> > >> +       for_each_online_cpu(cpu)
> > >> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
> > >> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
> > >> +       cpu_notifier_register_done();
> > >> +}
> > >> +
> > >> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
> > >> +                                    unsigned long action, unsigned long cpu)
> > >> +{
> > >> +       struct crypto_comp *tfm;
> > >> +
> > >> +       switch (action) {
> > >> +       case CPU_UP_PREPARE:
> > >> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
> > >> +                       break;
> > >> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
> > >> +               if (IS_ERR_OR_NULL(tfm)) {
> > >> +                       pr_err("could not alloc crypto comp %s : %ld\n",
> > >> +                              pool->tfm_name, PTR_ERR(tfm));
> > >> +                       return NOTIFY_BAD;
> > >> +               }
> > >> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
> > >> +               break;
> > >> +       case CPU_DEAD:
> > >> +       case CPU_UP_CANCELED:
> > >> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
> > >> +               if (!IS_ERR_OR_NULL(tfm))
> > >> +                       crypto_free_comp(tfm);
> > >> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
> > >> +               break;
> > >> +       default:
> > >> +               break;
> > >> +       }
> > >> +       return NOTIFY_OK;
> > >> +}
> > >> +
> > >> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
> > >> +                                  unsigned long action, void *pcpu)
> > >> +{
> > >> +       unsigned long cpu = (unsigned long)pcpu;
> > >> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
> > >> +
> > >> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
> > >> +}
> > >> +
> > >> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
> > >> +{
> > >> +       unsigned long cpu;
> > >> +
> > >> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
> > >> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
> > >> +
> > >> +       cpu_notifier_register_begin();
> > >> +       for_each_online_cpu(cpu)
> > >> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
> > >> +                   NOTIFY_BAD)
> > >> +                       goto cleanup;
> > >> +       __register_cpu_notifier(&pool->notifier);
> > >> +       cpu_notifier_register_done();
> > >> +       return 0;
> > >> +
> > >> +cleanup:
> > >> +       for_each_online_cpu(cpu)
> > >> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> > >> +       cpu_notifier_register_done();
> > >> +       return -ENOMEM;
> > >> +}
> > >> +
> > >> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
> > >> +{
> > >> +       unsigned long cpu;
> > >> +
> > >> +       cpu_notifier_register_begin();
> > >> +       for_each_online_cpu(cpu)
> > >> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
> > >> +       __unregister_cpu_notifier(&pool->notifier);
> > >> +       cpu_notifier_register_done();
> > >> +}
> > >> +
> > >>  /*********************************
> > >> -* helpers
> > >> +* pool functions
> > >>  **********************************/
> > >> -static bool zswap_is_full(void)
> > >> +
> > >> +static struct zswap_pool *__zswap_pool_current(void)
> > >>  {
> > >> -       return totalram_pages * zswap_max_pool_percent / 100 <
> > >> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
> > >> +       struct zswap_pool *pool;
> > >> +
> > >> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
> > >> +       WARN_ON(!pool);
> > >> +
> > >> +       return pool;
> > >> +}
> > >> +
> > >> +static struct zswap_pool *zswap_pool_current(void)
> > >> +{
> > >> +       assert_spin_locked(&zswap_pools_lock);
> > >> +
> > >> +       return __zswap_pool_current();
> > >> +}
> > >> +
> > >> +static struct zswap_pool *zswap_pool_current_get(void)
> > >> +{
> > >> +       struct zswap_pool *pool;
> > >> +
> > >> +       rcu_read_lock();
> > >> +
> > >> +       pool = __zswap_pool_current();
> > >> +       if (!pool || !zswap_pool_get(pool))
> > >> +               pool = NULL;
> > >> +
> > >> +       rcu_read_unlock();
> > >> +
> > >> +       return pool;
> > >> +}
> > >> +
> > >> +static struct zswap_pool *zswap_pool_last_get(void)
> > >> +{
> > >> +       struct zswap_pool *pool, *last = NULL;
> > >> +
> > >> +       rcu_read_lock();
> > >> +
> > >> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> > >> +               last = pool;
> > >> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
> > >> +               last = NULL;
> > >> +
> > >> +       rcu_read_unlock();
> > >> +
> > >> +       return last;
> > >> +}
> > >> +
> > >> +static struct zpool_ops zswap_zpool_ops = {
> > >> +       .evict = zswap_writeback_entry
> > >> +};
> > >> +
> > >> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> > >> +{
> > >> +       struct zswap_pool *pool;
> > >> +
> > >> +       assert_spin_locked(&zswap_pools_lock);
> > >> +
> > >> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> > >> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
> > >> +                       continue;
> > >> +               if (strncmp(zpool_get_type(pool->zpool), type,
> > >> +                           sizeof(zswap_zpool_type)))
> > >> +                       continue;
> > >> +               /* if we can't get it, it's about to be destroyed */
> > >> +               if (!zswap_pool_get(pool))
> > >> +                       continue;
> > >> +               return pool;
> > >> +       }
> > >> +
> > >> +       return NULL;
> > >> +}
> > >> +
> > >> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> > >> +{
> > >> +       struct zswap_pool *pool;
> > >> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> > >> +
> > >> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
> > >> +       if (!pool) {
> > >> +               pr_err("pool alloc failed\n");
> > >> +               return NULL;
> > >> +       }
> > >> +
> > >> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
> > >> +       if (!pool->zpool) {
> > >> +               pr_err("%s zpool not available\n", type);
> > >> +               goto error;
> > >> +       }
> > >> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> > >> +
> > >> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> > >> +       pool->tfm = alloc_percpu(struct crypto_comp *);
> > >> +       if (!pool->tfm) {
> > >> +               pr_err("percpu alloc failed\n");
> > >> +               goto error;
> > >> +       }
> > >> +
> > >> +       if (zswap_cpu_comp_init(pool))
> > >> +               goto error;
> > >> +       pr_debug("using %s compressor\n", pool->tfm_name);
> > >> +
> > >> +       /* being the current pool takes 1 ref; this func expects the
> > >> +        * caller to always add the new pool as the current pool
> > >> +        */
> > >> +       kref_init(&pool->kref);
> > >> +       INIT_LIST_HEAD(&pool->list);
> > >> +
> > >> +       zswap_pool_debug("created", pool);
> > >> +
> > >> +       return pool;
> > >> +
> > >> +error:
> > >> +       free_percpu(pool->tfm);
> > >> +       if (pool->zpool)
> > >> +               zpool_destroy_pool(pool->zpool);
> > >> +       kfree(pool);
> > >> +       return NULL;
> > >> +}
> > >> +
> > >> +static struct zswap_pool *__zswap_pool_create_fallback(void)
> > >> +{
> > >> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
> > >> +               pr_err("compressor %s not available, using default %s\n",
> > >> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
> > >> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
> > >> +                       sizeof(zswap_compressor));
> > >> +       }
> > >> +       if (!zpool_has_pool(zswap_zpool_type)) {
> > >> +               pr_err("zpool %s not available, using default %s\n",
> > >> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
> > >> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
> > >> +                       sizeof(zswap_zpool_type));
> > >> +       }
> > >> +
> > >> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
> > >> +}
> > >> +
> > >> +static void zswap_pool_destroy(struct zswap_pool *pool)
> > >> +{
> > >> +       zswap_pool_debug("destroying", pool);
> > >> +
> > >> +       zswap_cpu_comp_destroy(pool);
> > >> +       free_percpu(pool->tfm);
> > >> +       zpool_destroy_pool(pool->zpool);
> > >> +       kfree(pool);
> > >> +}
> > >> +
> > >> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
> > >> +{
> > >> +       return kref_get_unless_zero(&pool->kref);
> > >> +}
> > >> +
> > >> +static void __zswap_pool_release(struct rcu_head *head)
> > >> +{
> > >> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
> > >> +
> > >> +       /* nobody should have been able to get a kref... */
> > >> +       WARN_ON(kref_get_unless_zero(&pool->kref));
> > >> +
> > >> +       /* pool is now off zswap_pools list and has no references. */
> > >> +       zswap_pool_destroy(pool);
> > >> +}
> > >> +
> > >> +static void __zswap_pool_empty(struct kref *kref)
> > >> +{
> > >> +       struct zswap_pool *pool;
> > >> +
> > >> +       pool = container_of(kref, typeof(*pool), kref);
> > >> +
> > >> +       spin_lock(&zswap_pools_lock);
> > >> +
> > >> +       WARN_ON(pool == zswap_pool_current());
> > >> +
> > >> +       list_del_rcu(&pool->list);
> > >> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
> > >> +
> > >> +       spin_unlock(&zswap_pools_lock);
> > >> +}
> > >> +
> > >> +static void zswap_pool_put(struct zswap_pool *pool)
> > >> +{
> > >> +       kref_put(&pool->kref, __zswap_pool_empty);
> > >> +}
> > >> +
> > >>  }
> > >>
> > >>  /*********************************
> > >> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > >>         pgoff_t offset;
> > >>         struct zswap_entry *entry;
> > >>         struct page *page;
> > >> +       struct crypto_comp *tfm;
> > >>         u8 *src, *dst;
> > >>         unsigned int dlen;
> > >>         int ret;
> > >> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
> > >>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
> > >>                 /* decompress */
> > >>                 dlen = PAGE_SIZE;
> > >> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> > >> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
> > >>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
> > >>                 dst = kmap_atomic(page);
> > >> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
> > >> -                               entry->length, dst, &dlen);
> > >> +               tfm = *get_cpu_ptr(entry->pool->tfm);
> > >> +               ret = crypto_comp_decompress(tfm, src, entry->length,
> > >> +                                            dst, &dlen);
> > >> +               put_cpu_ptr(entry->pool->tfm);
> > >>                 kunmap_atomic(dst);
> > >> -               zpool_unmap_handle(zswap_pool, entry->handle);
> > >> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
> > >>                 BUG_ON(ret);
> > >>                 BUG_ON(dlen != PAGE_SIZE);
> > >>
> > >> @@ -633,6 +882,22 @@ end:
> > >>         return ret;
> > >>  }
> > >>
> > >> +static int zswap_shrink(void)
> > >> +{
> > >> +       struct zswap_pool *pool;
> > >> +       int ret;
> > >> +
> > >> +       pool = zswap_pool_last_get();
> > >> +       if (!pool)
> > >> +               return -ENOENT;
> > >> +
> > >> +       ret = zpool_shrink(pool->zpool, 1, NULL);
> > >> +
> > >> +       zswap_pool_put(pool);
> > >> +
> > >> +       return ret;
> > >> +}
> > >> +
> > >>  /*********************************
> > >>  * frontswap hooks
> > >>  **********************************/
> > >> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> > >>  {
> > >>         struct zswap_tree *tree = zswap_trees[type];
> > >>         struct zswap_entry *entry, *dupentry;
> > >> +       struct crypto_comp *tfm;
> > >>         int ret;
> > >>         unsigned int dlen = PAGE_SIZE, len;
> > >>         unsigned long handle;
> > >> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> > >>         /* reclaim space if needed */
> > >>         if (zswap_is_full()) {
> > >>                 zswap_pool_limit_hit++;
> > >> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
> > >> +               if (zswap_shrink()) {
> > >>                         zswap_reject_reclaim_fail++;
> > >>                         ret = -ENOMEM;
> > >>                         goto reject;
> > >> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> > >>                 goto reject;
> > >>         }
> > >>
> > >> +       /* if entry is successfully added, it keeps the reference */
> > >> +       entry->pool = zswap_pool_current_get();
> > >> +       if (!entry->pool) {
> > >> +               ret = -EINVAL;
> > >> +               goto freepage;
> > >> +       }
> > >> +
> > >>         /* compress */
> > >>         dst = get_cpu_var(zswap_dstmem);
> > >> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> > >>         src = kmap_atomic(page);
> > >> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
> > >> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
> > >>         kunmap_atomic(src);
> > >> +       put_cpu_ptr(entry->pool->tfm);
> > >>         if (ret) {
> > >>                 ret = -EINVAL;
> > >> -               goto freepage;
> > >> +               goto put_dstmem;
> > >>         }
> > >>
> > >>         /* store */
> > >>         len = dlen + sizeof(struct zswap_header);
> > >> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
> > >> -               &handle);
> > >> +       ret = zpool_malloc(entry->pool->zpool, len,
> > >> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
> > >>         if (ret == -ENOSPC) {
> > >>                 zswap_reject_compress_poor++;
> > >> -               goto freepage;
> > >> +               goto put_dstmem;
> > >>         }
> > >>         if (ret) {
> > >>                 zswap_reject_alloc_fail++;
> > >> -               goto freepage;
> > >> +               goto put_dstmem;
> > >>         }
> > >> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
> > >> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
> > >>         zhdr->swpentry = swp_entry(type, offset);
> > >>         buf = (u8 *)(zhdr + 1);
> > >>         memcpy(buf, dst, dlen);
> > >> -       zpool_unmap_handle(zswap_pool, handle);
> > >> +       zpool_unmap_handle(entry->pool->zpool, handle);
> > >>         put_cpu_var(zswap_dstmem);
> > >>
> > >>         /* populate entry */
> > >> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> > >>
> > >>         /* update stats */
> > >>         atomic_inc(&zswap_stored_pages);
> > >> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
> > >> +       zswap_update_total_size();
> > >>
> > >>         return 0;
> > >>
> > >> -freepage:
> > >> +put_dstmem:
> > >>         put_cpu_var(zswap_dstmem);
> > >> +       zswap_pool_put(entry->pool);
> > >> +freepage:
> > >>         zswap_entry_cache_free(entry);
> > >>  reject:
> > >>         return ret;
> > >> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> > >>  {
> > >>         struct zswap_tree *tree = zswap_trees[type];
> > >>         struct zswap_entry *entry;
> > >> +       struct crypto_comp *tfm;
> > >>         u8 *src, *dst;
> > >>         unsigned int dlen;
> > >>         int ret;
> > >> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
> > >>
> > >>         /* decompress */
> > >>         dlen = PAGE_SIZE;
> > >> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
> > >> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
> > >>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
> > >>         dst = kmap_atomic(page);
> > >> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
> > >> -               dst, &dlen);
> > >> +       tfm = *get_cpu_ptr(entry->pool->tfm);
> > >> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
> > >> +       put_cpu_ptr(entry->pool->tfm);
> > >>         kunmap_atomic(dst);
> > >> -       zpool_unmap_handle(zswap_pool, entry->handle);
> > >> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
> > >>         BUG_ON(ret);
> > >>
> > >>         spin_lock(&tree->lock);
> > >> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
> > >>         zswap_trees[type] = NULL;
> > >>  }
> > >>
> > >> -static struct zpool_ops zswap_zpool_ops = {
> > >> -       .evict = zswap_writeback_entry
> > >> -};
> > >> -
> > >>  static void zswap_frontswap_init(unsigned type)
> > >>  {
> > >>         struct zswap_tree *tree;
> > >> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
> > >>  **********************************/
> > >>  static int __init init_zswap(void)
> > >>  {
> > >> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
> > >> +       struct zswap_pool *pool;
> > >>
> > >> -       pr_info("loading zswap\n");
> > >> -
> > >> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> > >> -                                       &zswap_zpool_ops);
> > >> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
> > >> -               pr_info("%s zpool not available\n", zswap_zpool_type);
> > >> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
> > >> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
> > >> -                                       &zswap_zpool_ops);
> > >> -       }
> > >> -       if (!zswap_pool) {
> > >> -               pr_err("%s zpool not available\n", zswap_zpool_type);
> > >> -               pr_err("zpool creation failed\n");
> > >> -               goto error;
> > >> -       }
> > >> -       pr_info("using %s pool\n", zswap_zpool_type);
> > >> +       pr_info("loading\n");
> > >>
> > >>         if (zswap_entry_cache_create()) {
> > >>                 pr_err("entry cache creation failed\n");
> > >> -               goto cachefail;
> > >> +               goto cache_fail;
> > >>         }
> > >> -       if (zswap_comp_init()) {
> > >> -               pr_err("compressor initialization failed\n");
> > >> -               goto compfail;
> > >> +
> > >> +       if (zswap_cpu_dstmem_init()) {
> > >> +               pr_err("dstmem alloc failed\n");
> > >> +               goto dstmem_fail;
> > >>         }
> > >> -       if (zswap_cpu_init()) {
> > >> -               pr_err("per-cpu initialization failed\n");
> > >> -               goto pcpufail;
> > >> +
> > >> +       pool = __zswap_pool_create_fallback();
> > >> +       if (!pool) {
> > >> +               pr_err("pool creation failed\n");
> > >> +               goto pool_fail;
> > >>         }
> > >> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> > >> +               zpool_get_type(pool->zpool));
> > >> +
> > >> +       list_add(&pool->list, &zswap_pools);
> > >>
> > >>         frontswap_register_ops(&zswap_frontswap_ops);
> > >>         if (zswap_debugfs_init())
> > >>                 pr_warn("debugfs initialization failed\n");
> > >>         return 0;
> > >> -pcpufail:
> > >> -       zswap_comp_exit();
> > >> -compfail:
> > >> +
> > >> +pool_fail:
> > >> +       zswap_cpu_dstmem_destroy();
> > >> +dstmem_fail:
> > >>         zswap_entry_cache_destroy();
> > >> -cachefail:
> > >> -       zpool_destroy_pool(zswap_pool);
> > >> -error:
> > >> +cache_fail:
> > >>         return -ENOMEM;
> > >>  }
> > >>  /* must be late so crypto has time to come up */
> > >> --
> > >> 2.1.0
> > >>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Dan Streetman June 25, 2015, 8:22 p.m. UTC | #7
On Wed, Jun 17, 2015 at 7:13 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Wed, Jun 10, 2015 at 4:54 PM, Dan Streetman <ddstreet@ieee.org> wrote:
>> On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman <ddstreet@ieee.org> wrote:
>>> On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman <ddstreet@ieee.org> wrote:
>>>> Add dynamic creation of pools.  Move the static crypto compression
>>>> per-cpu transforms into each pool.  Add a pointer to zswap_entry to
>>>> the pool it's in.
>>>
>>> Seth, as far as the design, from your previous comments I assume you
>>> were thinking of maintaining separate lists of zpools and compressors?
>>>  I do see how that will reduce duplication of zpools and compressors,
>>> but it also requires adding a new pointer to each zswap_entry, and
>>> increasing the amount of code to manage each list separately.  And the
>>> most common case in zswap will be just a single zpool and compressor,
>>> not repeatedly changing params.  What do you think?
>>
>> Any opinion on this patch?  If you want, I can break it up so there's
>> a list of zpools and a list of compressors.  Either the combined way
>> (this patch) or separate lists works for me, as long as the params are
>> changeable at runtime :-)
>
> You on vacation Seth?  Let me know what direction you prefer for this...

So...

Andrew (or anyone else) do you have any objection to this patch?  I
assume Seth is on an extended vacation; maybe you could add this to
your mmotm?  If Seth prefers changes to it he still has a couple
months-ish until the next merge window.  I could then send the
follow-on patches, that allow zswap params to be set at runtime.  I
can resend them all as a series, if you prefer that.

I think the main issue Seth had was keeping separate lists of zpools
and compressors, instead of this patch's approach of a single list
with each element containing a zpool and compressor together.  My
preference is a single list, for the reasons I gave above - mainly,
that 99+% of the time, zswap will only be using a single zpool and
compressor; users will just set the zpool and compressor immediately
after boot, instead of having to add boot parameters, and never change
them again.  So keeping 2 lists only adds complexity for little to no
benefit in the common case.

>
>>
>>
>>>
>>>>
>>>> This is required by a separate patch which enables changing the
>>>> zswap zpool and compressor params at runtime.
>>>>
>>>> Signed-off-by: Dan Streetman <ddstreet@ieee.org>
>>>> ---
>>>>  mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++----------------
>>>>  1 file changed, 408 insertions(+), 142 deletions(-)
>>>>
>>>> diff --git a/mm/zswap.c b/mm/zswap.c
>>>> index 2d5727b..fc93770 100644
>>>> --- a/mm/zswap.c
>>>> +++ b/mm/zswap.c
>>>> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444);
>>>>  static struct zpool *zswap_pool;
>>>>
>>>>  /*********************************
>>>> -* compression functions
>>>> +* data structures
>>>>  **********************************/
>>>> -/* per-cpu compression transforms */
>>>> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
>>>>
>>>> -enum comp_op {
>>>> -       ZSWAP_COMPOP_COMPRESS,
>>>> -       ZSWAP_COMPOP_DECOMPRESS
>>>> +struct zswap_pool {
>>>> +       struct zpool *zpool;
>>>> +       struct kref kref;
>>>> +       struct list_head list;
>>>> +       struct rcu_head rcu_head;
>>>> +       struct notifier_block notifier;
>>>> +       char tfm_name[CRYPTO_MAX_ALG_NAME];
>>>> +       struct crypto_comp * __percpu *tfm;
>>>>  };
>>>>
>>>> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
>>>> -                               u8 *dst, unsigned int *dlen)
>>>> -{
>>>> -       struct crypto_comp *tfm;
>>>> -       int ret;
>>>> -
>>>> -       tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
>>>> -       switch (op) {
>>>> -       case ZSWAP_COMPOP_COMPRESS:
>>>> -               ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
>>>> -               break;
>>>> -       case ZSWAP_COMPOP_DECOMPRESS:
>>>> -               ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
>>>> -               break;
>>>> -       default:
>>>> -               ret = -EINVAL;
>>>> -       }
>>>> -
>>>> -       put_cpu();
>>>> -       return ret;
>>>> -}
>>>> -
>>>> -static int __init zswap_comp_init(void)
>>>> -{
>>>> -       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
>>>> -               pr_info("%s compressor not available\n", zswap_compressor);
>>>> -               /* fall back to default compressor */
>>>> -               zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
>>>> -               if (!crypto_has_comp(zswap_compressor, 0, 0))
>>>> -                       /* can't even load the default compressor */
>>>> -                       return -ENODEV;
>>>> -       }
>>>> -       pr_info("using %s compressor\n", zswap_compressor);
>>>> -
>>>> -       /* alloc percpu transforms */
>>>> -       zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
>>>> -       if (!zswap_comp_pcpu_tfms)
>>>> -               return -ENOMEM;
>>>> -       return 0;
>>>> -}
>>>> -
>>>> -static void __init zswap_comp_exit(void)
>>>> -{
>>>> -       /* free percpu transforms */
>>>> -       free_percpu(zswap_comp_pcpu_tfms);
>>>> -}
>>>> -
>>>> -/*********************************
>>>> -* data structures
>>>> -**********************************/
>>>>  /*
>>>>   * struct zswap_entry
>>>>   *
>>>> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void)
>>>>   * page within zswap.
>>>>   *
>>>>   * rbnode - links the entry into red-black tree for the appropriate swap type
>>>> + * offset - the swap offset for the entry.  Index into the red-black tree.
>>>>   * refcount - the number of outstanding reference to the entry. This is needed
>>>>   *            to protect against premature freeing of the entry by code
>>>>   *            concurrent calls to load, invalidate, and writeback.  The lock
>>>>   *            for the zswap_tree structure that contains the entry must
>>>>   *            be held while changing the refcount.  Since the lock must
>>>>   *            be held, there is no reason to also make refcount atomic.
>>>> - * offset - the swap offset for the entry.  Index into the red-black tree.
>>>> - * handle - zpool allocation handle that stores the compressed page data
>>>>   * length - the length in bytes of the compressed page data.  Needed during
>>>>   *          decompression
>>>> + * pool - the zswap_pool the entry's data is in
>>>> + * handle - zpool allocation handle that stores the compressed page data
>>>>   */
>>>>  struct zswap_entry {
>>>>         struct rb_node rbnode;
>>>>         pgoff_t offset;
>>>>         int refcount;
>>>>         unsigned int length;
>>>> +       struct zswap_pool *pool;
>>>>         unsigned long handle;
>>>>  };
>>>>
>>>> @@ -201,6 +156,44 @@ struct zswap_tree {
>>>>
>>>>  static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
>>>>
>>>> +/* RCU-protected iteration */
>>>> +static LIST_HEAD(zswap_pools);
>>>> +/* protects zswap_pools list modification */
>>>> +static DEFINE_SPINLOCK(zswap_pools_lock);
>>>> +
>>>> +/*********************************
>>>> +* helpers and fwd declarations
>>>> +**********************************/
>>>> +
>>>> +#define zswap_pool_debug(msg, p)                               \
>>>> +       pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
>>>> +                zpool_get_type((p)->zpool))
>>>> +
>>>> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
>>>> +static int zswap_pool_get(struct zswap_pool *pool);
>>>> +static void zswap_pool_put(struct zswap_pool *pool);
>>>> +
>>>> +static bool zswap_is_full(void)
>>>> +{
>>>> +       return totalram_pages * zswap_max_pool_percent / 100 <
>>>> +               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
>>>> +}
>>>> +
>>>> +static void zswap_update_total_size(void)
>>>> +{
>>>> +       struct zswap_pool *pool;
>>>> +       u64 total = 0;
>>>> +
>>>> +       rcu_read_lock();
>>>> +
>>>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
>>>> +               total += zpool_get_total_size(pool->zpool);
>>>> +
>>>> +       rcu_read_unlock();
>>>> +
>>>> +       zswap_pool_total_size = total;
>>>> +}
>>>> +
>>>>  /*********************************
>>>>  * zswap entry functions
>>>>  **********************************/
>>>> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
>>>>   */
>>>>  static void zswap_free_entry(struct zswap_entry *entry)
>>>>  {
>>>> -       zpool_free(zswap_pool, entry->handle);
>>>> +       zpool_free(entry->pool->zpool, entry->handle);
>>>> +       zswap_pool_put(entry->pool);
>>>>         zswap_entry_cache_free(entry);
>>>>         atomic_dec(&zswap_stored_pages);
>>>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
>>>> +       zswap_update_total_size();
>>>>  }
>>>>
>>>>  /* caller must hold the tree lock */
>>>> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
>>>>  **********************************/
>>>>  static DEFINE_PER_CPU(u8 *, zswap_dstmem);
>>>>
>>>> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>>>> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
>>>>  {
>>>> -       struct crypto_comp *tfm;
>>>>         u8 *dst;
>>>>
>>>>         switch (action) {
>>>>         case CPU_UP_PREPARE:
>>>> -               tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
>>>> -               if (IS_ERR(tfm)) {
>>>> -                       pr_err("can't allocate compressor transform\n");
>>>> -                       return NOTIFY_BAD;
>>>> -               }
>>>> -               *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
>>>>                 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
>>>>                 if (!dst) {
>>>>                         pr_err("can't allocate compressor buffer\n");
>>>> -                       crypto_free_comp(tfm);
>>>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>>>>                         return NOTIFY_BAD;
>>>>                 }
>>>>                 per_cpu(zswap_dstmem, cpu) = dst;
>>>>                 break;
>>>>         case CPU_DEAD:
>>>>         case CPU_UP_CANCELED:
>>>> -               tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
>>>> -               if (tfm) {
>>>> -                       crypto_free_comp(tfm);
>>>> -                       *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
>>>> -               }
>>>>                 dst = per_cpu(zswap_dstmem, cpu);
>>>>                 kfree(dst);
>>>>                 per_cpu(zswap_dstmem, cpu) = NULL;
>>>> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
>>>>         return NOTIFY_OK;
>>>>  }
>>>>
>>>> -static int zswap_cpu_notifier(struct notifier_block *nb,
>>>> -                               unsigned long action, void *pcpu)
>>>> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
>>>> +                                    unsigned long action, void *pcpu)
>>>>  {
>>>> -       unsigned long cpu = (unsigned long)pcpu;
>>>> -       return __zswap_cpu_notifier(action, cpu);
>>>> +       return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
>>>>  }
>>>>
>>>> -static struct notifier_block zswap_cpu_notifier_block = {
>>>> -       .notifier_call = zswap_cpu_notifier
>>>> +static struct notifier_block zswap_dstmem_notifier = {
>>>> +       .notifier_call =        zswap_cpu_dstmem_notifier,
>>>>  };
>>>>
>>>> -static int __init zswap_cpu_init(void)
>>>> +static int __init zswap_cpu_dstmem_init(void)
>>>>  {
>>>>         unsigned long cpu;
>>>>
>>>>         cpu_notifier_register_begin();
>>>>         for_each_online_cpu(cpu)
>>>> -               if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
>>>> +               if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
>>>> +                   NOTIFY_BAD)
>>>>                         goto cleanup;
>>>> -       __register_cpu_notifier(&zswap_cpu_notifier_block);
>>>> +       __register_cpu_notifier(&zswap_dstmem_notifier);
>>>>         cpu_notifier_register_done();
>>>>         return 0;
>>>>
>>>>  cleanup:
>>>>         for_each_online_cpu(cpu)
>>>> -               __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
>>>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>>>>         cpu_notifier_register_done();
>>>>         return -ENOMEM;
>>>>  }
>>>>
>>>> +static void zswap_cpu_dstmem_destroy(void)
>>>> +{
>>>> +       unsigned long cpu;
>>>> +
>>>> +       cpu_notifier_register_begin();
>>>> +       for_each_online_cpu(cpu)
>>>> +               __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
>>>> +       __unregister_cpu_notifier(&zswap_dstmem_notifier);
>>>> +       cpu_notifier_register_done();
>>>> +}
>>>> +
>>>> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
>>>> +                                    unsigned long action, unsigned long cpu)
>>>> +{
>>>> +       struct crypto_comp *tfm;
>>>> +
>>>> +       switch (action) {
>>>> +       case CPU_UP_PREPARE:
>>>> +               if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
>>>> +                       break;
>>>> +               tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
>>>> +               if (IS_ERR_OR_NULL(tfm)) {
>>>> +                       pr_err("could not alloc crypto comp %s : %ld\n",
>>>> +                              pool->tfm_name, PTR_ERR(tfm));
>>>> +                       return NOTIFY_BAD;
>>>> +               }
>>>> +               *per_cpu_ptr(pool->tfm, cpu) = tfm;
>>>> +               break;
>>>> +       case CPU_DEAD:
>>>> +       case CPU_UP_CANCELED:
>>>> +               tfm = *per_cpu_ptr(pool->tfm, cpu);
>>>> +               if (!IS_ERR_OR_NULL(tfm))
>>>> +                       crypto_free_comp(tfm);
>>>> +               *per_cpu_ptr(pool->tfm, cpu) = NULL;
>>>> +               break;
>>>> +       default:
>>>> +               break;
>>>> +       }
>>>> +       return NOTIFY_OK;
>>>> +}
>>>> +
>>>> +static int zswap_cpu_comp_notifier(struct notifier_block *nb,
>>>> +                                  unsigned long action, void *pcpu)
>>>> +{
>>>> +       unsigned long cpu = (unsigned long)pcpu;
>>>> +       struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
>>>> +
>>>> +       return __zswap_cpu_comp_notifier(pool, action, cpu);
>>>> +}
>>>> +
>>>> +static int zswap_cpu_comp_init(struct zswap_pool *pool)
>>>> +{
>>>> +       unsigned long cpu;
>>>> +
>>>> +       memset(&pool->notifier, 0, sizeof(pool->notifier));
>>>> +       pool->notifier.notifier_call = zswap_cpu_comp_notifier;
>>>> +
>>>> +       cpu_notifier_register_begin();
>>>> +       for_each_online_cpu(cpu)
>>>> +               if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
>>>> +                   NOTIFY_BAD)
>>>> +                       goto cleanup;
>>>> +       __register_cpu_notifier(&pool->notifier);
>>>> +       cpu_notifier_register_done();
>>>> +       return 0;
>>>> +
>>>> +cleanup:
>>>> +       for_each_online_cpu(cpu)
>>>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
>>>> +       cpu_notifier_register_done();
>>>> +       return -ENOMEM;
>>>> +}
>>>> +
>>>> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
>>>> +{
>>>> +       unsigned long cpu;
>>>> +
>>>> +       cpu_notifier_register_begin();
>>>> +       for_each_online_cpu(cpu)
>>>> +               __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
>>>> +       __unregister_cpu_notifier(&pool->notifier);
>>>> +       cpu_notifier_register_done();
>>>> +}
>>>> +
>>>>  /*********************************
>>>> -* helpers
>>>> +* pool functions
>>>>  **********************************/
>>>> -static bool zswap_is_full(void)
>>>> +
>>>> +static struct zswap_pool *__zswap_pool_current(void)
>>>>  {
>>>> -       return totalram_pages * zswap_max_pool_percent / 100 <
>>>> -               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
>>>> +       struct zswap_pool *pool;
>>>> +
>>>> +       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
>>>> +       WARN_ON(!pool);
>>>> +
>>>> +       return pool;
>>>> +}
>>>> +
>>>> +static struct zswap_pool *zswap_pool_current(void)
>>>> +{
>>>> +       assert_spin_locked(&zswap_pools_lock);
>>>> +
>>>> +       return __zswap_pool_current();
>>>> +}
>>>> +
>>>> +static struct zswap_pool *zswap_pool_current_get(void)
>>>> +{
>>>> +       struct zswap_pool *pool;
>>>> +
>>>> +       rcu_read_lock();
>>>> +
>>>> +       pool = __zswap_pool_current();
>>>> +       if (!pool || !zswap_pool_get(pool))
>>>> +               pool = NULL;
>>>> +
>>>> +       rcu_read_unlock();
>>>> +
>>>> +       return pool;
>>>> +}
>>>> +
>>>> +static struct zswap_pool *zswap_pool_last_get(void)
>>>> +{
>>>> +       struct zswap_pool *pool, *last = NULL;
>>>> +
>>>> +       rcu_read_lock();
>>>> +
>>>> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
>>>> +               last = pool;
>>>> +       if (!WARN_ON(!last) && !zswap_pool_get(last))
>>>> +               last = NULL;
>>>> +
>>>> +       rcu_read_unlock();
>>>> +
>>>> +       return last;
>>>> +}
>>>> +
>>>> +static struct zpool_ops zswap_zpool_ops = {
>>>> +       .evict = zswap_writeback_entry
>>>> +};
>>>> +
>>>> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
>>>> +{
>>>> +       struct zswap_pool *pool;
>>>> +
>>>> +       assert_spin_locked(&zswap_pools_lock);
>>>> +
>>>> +       list_for_each_entry_rcu(pool, &zswap_pools, list) {
>>>> +               if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
>>>> +                       continue;
>>>> +               if (strncmp(zpool_get_type(pool->zpool), type,
>>>> +                           sizeof(zswap_zpool_type)))
>>>> +                       continue;
>>>> +               /* if we can't get it, it's about to be destroyed */
>>>> +               if (!zswap_pool_get(pool))
>>>> +                       continue;
>>>> +               return pool;
>>>> +       }
>>>> +
>>>> +       return NULL;
>>>> +}
>>>> +
>>>> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>>>> +{
>>>> +       struct zswap_pool *pool;
>>>> +       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
>>>> +
>>>> +       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
>>>> +       if (!pool) {
>>>> +               pr_err("pool alloc failed\n");
>>>> +               return NULL;
>>>> +       }
>>>> +
>>>> +       pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
>>>> +       if (!pool->zpool) {
>>>> +               pr_err("%s zpool not available\n", type);
>>>> +               goto error;
>>>> +       }
>>>> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
>>>> +
>>>> +       strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
>>>> +       pool->tfm = alloc_percpu(struct crypto_comp *);
>>>> +       if (!pool->tfm) {
>>>> +               pr_err("percpu alloc failed\n");
>>>> +               goto error;
>>>> +       }
>>>> +
>>>> +       if (zswap_cpu_comp_init(pool))
>>>> +               goto error;
>>>> +       pr_debug("using %s compressor\n", pool->tfm_name);
>>>> +
>>>> +       /* being the current pool takes 1 ref; this func expects the
>>>> +        * caller to always add the new pool as the current pool
>>>> +        */
>>>> +       kref_init(&pool->kref);
>>>> +       INIT_LIST_HEAD(&pool->list);
>>>> +
>>>> +       zswap_pool_debug("created", pool);
>>>> +
>>>> +       return pool;
>>>> +
>>>> +error:
>>>> +       free_percpu(pool->tfm);
>>>> +       if (pool->zpool)
>>>> +               zpool_destroy_pool(pool->zpool);
>>>> +       kfree(pool);
>>>> +       return NULL;
>>>> +}
>>>> +
>>>> +static struct zswap_pool *__zswap_pool_create_fallback(void)
>>>> +{
>>>> +       if (!crypto_has_comp(zswap_compressor, 0, 0)) {
>>>> +               pr_err("compressor %s not available, using default %s\n",
>>>> +                      zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
>>>> +               strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
>>>> +                       sizeof(zswap_compressor));
>>>> +       }
>>>> +       if (!zpool_has_pool(zswap_zpool_type)) {
>>>> +               pr_err("zpool %s not available, using default %s\n",
>>>> +                      zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
>>>> +               strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
>>>> +                       sizeof(zswap_zpool_type));
>>>> +       }
>>>> +
>>>> +       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
>>>> +}
>>>> +
>>>> +static void zswap_pool_destroy(struct zswap_pool *pool)
>>>> +{
>>>> +       zswap_pool_debug("destroying", pool);
>>>> +
>>>> +       zswap_cpu_comp_destroy(pool);
>>>> +       free_percpu(pool->tfm);
>>>> +       zpool_destroy_pool(pool->zpool);
>>>> +       kfree(pool);
>>>> +}
>>>> +
>>>> +static int __must_check zswap_pool_get(struct zswap_pool *pool)
>>>> +{
>>>> +       return kref_get_unless_zero(&pool->kref);
>>>> +}
>>>> +
>>>> +static void __zswap_pool_release(struct rcu_head *head)
>>>> +{
>>>> +       struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
>>>> +
>>>> +       /* nobody should have been able to get a kref... */
>>>> +       WARN_ON(kref_get_unless_zero(&pool->kref));
>>>> +
>>>> +       /* pool is now off zswap_pools list and has no references. */
>>>> +       zswap_pool_destroy(pool);
>>>> +}
>>>> +
>>>> +static void __zswap_pool_empty(struct kref *kref)
>>>> +{
>>>> +       struct zswap_pool *pool;
>>>> +
>>>> +       pool = container_of(kref, typeof(*pool), kref);
>>>> +
>>>> +       spin_lock(&zswap_pools_lock);
>>>> +
>>>> +       WARN_ON(pool == zswap_pool_current());
>>>> +
>>>> +       list_del_rcu(&pool->list);
>>>> +       call_rcu(&pool->rcu_head, __zswap_pool_release);
>>>> +
>>>> +       spin_unlock(&zswap_pools_lock);
>>>> +}
>>>> +
>>>> +static void zswap_pool_put(struct zswap_pool *pool)
>>>> +{
>>>> +       kref_put(&pool->kref, __zswap_pool_empty);
>>>> +}
>>>> +
>>>>  }
>>>>
>>>>  /*********************************
>>>> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>>>>         pgoff_t offset;
>>>>         struct zswap_entry *entry;
>>>>         struct page *page;
>>>> +       struct crypto_comp *tfm;
>>>>         u8 *src, *dst;
>>>>         unsigned int dlen;
>>>>         int ret;
>>>> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
>>>>         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
>>>>                 /* decompress */
>>>>                 dlen = PAGE_SIZE;
>>>> -               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
>>>> +               src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>>>>                                 ZPOOL_MM_RO) + sizeof(struct zswap_header);
>>>>                 dst = kmap_atomic(page);
>>>> -               ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
>>>> -                               entry->length, dst, &dlen);
>>>> +               tfm = *get_cpu_ptr(entry->pool->tfm);
>>>> +               ret = crypto_comp_decompress(tfm, src, entry->length,
>>>> +                                            dst, &dlen);
>>>> +               put_cpu_ptr(entry->pool->tfm);
>>>>                 kunmap_atomic(dst);
>>>> -               zpool_unmap_handle(zswap_pool, entry->handle);
>>>> +               zpool_unmap_handle(entry->pool->zpool, entry->handle);
>>>>                 BUG_ON(ret);
>>>>                 BUG_ON(dlen != PAGE_SIZE);
>>>>
>>>> @@ -633,6 +882,22 @@ end:
>>>>         return ret;
>>>>  }
>>>>
>>>> +static int zswap_shrink(void)
>>>> +{
>>>> +       struct zswap_pool *pool;
>>>> +       int ret;
>>>> +
>>>> +       pool = zswap_pool_last_get();
>>>> +       if (!pool)
>>>> +               return -ENOENT;
>>>> +
>>>> +       ret = zpool_shrink(pool->zpool, 1, NULL);
>>>> +
>>>> +       zswap_pool_put(pool);
>>>> +
>>>> +       return ret;
>>>> +}
>>>> +
>>>>  /*********************************
>>>>  * frontswap hooks
>>>>  **********************************/
>>>> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>>  {
>>>>         struct zswap_tree *tree = zswap_trees[type];
>>>>         struct zswap_entry *entry, *dupentry;
>>>> +       struct crypto_comp *tfm;
>>>>         int ret;
>>>>         unsigned int dlen = PAGE_SIZE, len;
>>>>         unsigned long handle;
>>>> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>>         /* reclaim space if needed */
>>>>         if (zswap_is_full()) {
>>>>                 zswap_pool_limit_hit++;
>>>> -               if (zpool_shrink(zswap_pool, 1, NULL)) {
>>>> +               if (zswap_shrink()) {
>>>>                         zswap_reject_reclaim_fail++;
>>>>                         ret = -ENOMEM;
>>>>                         goto reject;
>>>> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>>                 goto reject;
>>>>         }
>>>>
>>>> +       /* if entry is successfully added, it keeps the reference */
>>>> +       entry->pool = zswap_pool_current_get();
>>>> +       if (!entry->pool) {
>>>> +               ret = -EINVAL;
>>>> +               goto freepage;
>>>> +       }
>>>> +
>>>>         /* compress */
>>>>         dst = get_cpu_var(zswap_dstmem);
>>>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>>>>         src = kmap_atomic(page);
>>>> -       ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
>>>> +       ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
>>>>         kunmap_atomic(src);
>>>> +       put_cpu_ptr(entry->pool->tfm);
>>>>         if (ret) {
>>>>                 ret = -EINVAL;
>>>> -               goto freepage;
>>>> +               goto put_dstmem;
>>>>         }
>>>>
>>>>         /* store */
>>>>         len = dlen + sizeof(struct zswap_header);
>>>> -       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
>>>> -               &handle);
>>>> +       ret = zpool_malloc(entry->pool->zpool, len,
>>>> +                          __GFP_NORETRY | __GFP_NOWARN, &handle);
>>>>         if (ret == -ENOSPC) {
>>>>                 zswap_reject_compress_poor++;
>>>> -               goto freepage;
>>>> +               goto put_dstmem;
>>>>         }
>>>>         if (ret) {
>>>>                 zswap_reject_alloc_fail++;
>>>> -               goto freepage;
>>>> +               goto put_dstmem;
>>>>         }
>>>> -       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
>>>> +       zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
>>>>         zhdr->swpentry = swp_entry(type, offset);
>>>>         buf = (u8 *)(zhdr + 1);
>>>>         memcpy(buf, dst, dlen);
>>>> -       zpool_unmap_handle(zswap_pool, handle);
>>>> +       zpool_unmap_handle(entry->pool->zpool, handle);
>>>>         put_cpu_var(zswap_dstmem);
>>>>
>>>>         /* populate entry */
>>>> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>>>>
>>>>         /* update stats */
>>>>         atomic_inc(&zswap_stored_pages);
>>>> -       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
>>>> +       zswap_update_total_size();
>>>>
>>>>         return 0;
>>>>
>>>> -freepage:
>>>> +put_dstmem:
>>>>         put_cpu_var(zswap_dstmem);
>>>> +       zswap_pool_put(entry->pool);
>>>> +freepage:
>>>>         zswap_entry_cache_free(entry);
>>>>  reject:
>>>>         return ret;
>>>> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>>>>  {
>>>>         struct zswap_tree *tree = zswap_trees[type];
>>>>         struct zswap_entry *entry;
>>>> +       struct crypto_comp *tfm;
>>>>         u8 *src, *dst;
>>>>         unsigned int dlen;
>>>>         int ret;
>>>> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>>>>
>>>>         /* decompress */
>>>>         dlen = PAGE_SIZE;
>>>> -       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
>>>> +       src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
>>>>                         ZPOOL_MM_RO) + sizeof(struct zswap_header);
>>>>         dst = kmap_atomic(page);
>>>> -       ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
>>>> -               dst, &dlen);
>>>> +       tfm = *get_cpu_ptr(entry->pool->tfm);
>>>> +       ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
>>>> +       put_cpu_ptr(entry->pool->tfm);
>>>>         kunmap_atomic(dst);
>>>> -       zpool_unmap_handle(zswap_pool, entry->handle);
>>>> +       zpool_unmap_handle(entry->pool->zpool, entry->handle);
>>>>         BUG_ON(ret);
>>>>
>>>>         spin_lock(&tree->lock);
>>>> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type)
>>>>         zswap_trees[type] = NULL;
>>>>  }
>>>>
>>>> -static struct zpool_ops zswap_zpool_ops = {
>>>> -       .evict = zswap_writeback_entry
>>>> -};
>>>> -
>>>>  static void zswap_frontswap_init(unsigned type)
>>>>  {
>>>>         struct zswap_tree *tree;
>>>> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { }
>>>>  **********************************/
>>>>  static int __init init_zswap(void)
>>>>  {
>>>> -       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
>>>> +       struct zswap_pool *pool;
>>>>
>>>> -       pr_info("loading zswap\n");
>>>> -
>>>> -       zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
>>>> -                                       &zswap_zpool_ops);
>>>> -       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
>>>> -               pr_info("%s zpool not available\n", zswap_zpool_type);
>>>> -               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
>>>> -               zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
>>>> -                                       &zswap_zpool_ops);
>>>> -       }
>>>> -       if (!zswap_pool) {
>>>> -               pr_err("%s zpool not available\n", zswap_zpool_type);
>>>> -               pr_err("zpool creation failed\n");
>>>> -               goto error;
>>>> -       }
>>>> -       pr_info("using %s pool\n", zswap_zpool_type);
>>>> +       pr_info("loading\n");
>>>>
>>>>         if (zswap_entry_cache_create()) {
>>>>                 pr_err("entry cache creation failed\n");
>>>> -               goto cachefail;
>>>> +               goto cache_fail;
>>>>         }
>>>> -       if (zswap_comp_init()) {
>>>> -               pr_err("compressor initialization failed\n");
>>>> -               goto compfail;
>>>> +
>>>> +       if (zswap_cpu_dstmem_init()) {
>>>> +               pr_err("dstmem alloc failed\n");
>>>> +               goto dstmem_fail;
>>>>         }
>>>> -       if (zswap_cpu_init()) {
>>>> -               pr_err("per-cpu initialization failed\n");
>>>> -               goto pcpufail;
>>>> +
>>>> +       pool = __zswap_pool_create_fallback();
>>>> +       if (!pool) {
>>>> +               pr_err("pool creation failed\n");
>>>> +               goto pool_fail;
>>>>         }
>>>> +       pr_info("loaded using pool %s/%s\n", pool->tfm_name,
>>>> +               zpool_get_type(pool->zpool));
>>>> +
>>>> +       list_add(&pool->list, &zswap_pools);
>>>>
>>>>         frontswap_register_ops(&zswap_frontswap_ops);
>>>>         if (zswap_debugfs_init())
>>>>                 pr_warn("debugfs initialization failed\n");
>>>>         return 0;
>>>> -pcpufail:
>>>> -       zswap_comp_exit();
>>>> -compfail:
>>>> +
>>>> +pool_fail:
>>>> +       zswap_cpu_dstmem_destroy();
>>>> +dstmem_fail:
>>>>         zswap_entry_cache_destroy();
>>>> -cachefail:
>>>> -       zpool_destroy_pool(zswap_pool);
>>>> -error:
>>>> +cache_fail:
>>>>         return -ENOMEM;
>>>>  }
>>>>  /* must be late so crypto has time to come up */
>>>> --
>>>> 2.1.0
>>>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Andrew Morton June 25, 2015, 8:35 p.m. UTC | #8
On Thu, 25 Jun 2015 16:22:07 -0400 Dan Streetman <ddstreet@ieee.org> wrote:

> Andrew (or anyone else) do you have any objection to this patch?  I
> assume Seth is on an extended vacation; maybe you could add this to
> your mmotm?  If Seth prefers changes to it he still has a couple
> months-ish until the next merge window.  I could then send the
> follow-on patches, that allow zswap params to be set at runtime.  I
> can resend them all as a series, if you prefer that.

I'll take a look once the merge window mayhem settles down.  But it
never hurts to do a refresh/retest/resend.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Dan Streetman June 25, 2015, 8:37 p.m. UTC | #9
On Thu, Jun 25, 2015 at 4:35 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> On Thu, 25 Jun 2015 16:22:07 -0400 Dan Streetman <ddstreet@ieee.org> wrote:
>
>> Andrew (or anyone else) do you have any objection to this patch?  I
>> assume Seth is on an extended vacation; maybe you could add this to
>> your mmotm?  If Seth prefers changes to it he still has a couple
>> months-ish until the next merge window.  I could then send the
>> follow-on patches, that allow zswap params to be set at runtime.  I
>> can resend them all as a series, if you prefer that.
>
> I'll take a look once the merge window mayhem settles down.  But it
> never hurts to do a refresh/retest/resend.

Sure, let me rebase the whole thing onto the latest upstream, and I'll
resend the series.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Dan Streetman July 15, 2015, 10:18 p.m. UTC | #10
On Thu, Jun 25, 2015 at 4:37 PM, Dan Streetman <ddstreet@ieee.org> wrote:
> On Thu, Jun 25, 2015 at 4:35 PM, Andrew Morton
> <akpm@linux-foundation.org> wrote:
>> On Thu, 25 Jun 2015 16:22:07 -0400 Dan Streetman <ddstreet@ieee.org> wrote:
>>
>>> Andrew (or anyone else) do you have any objection to this patch?  I
>>> assume Seth is on an extended vacation; maybe you could add this to
>>> your mmotm?  If Seth prefers changes to it he still has a couple
>>> months-ish until the next merge window.  I could then send the
>>> follow-on patches, that allow zswap params to be set at runtime.  I
>>> can resend them all as a series, if you prefer that.
>>
>> I'll take a look once the merge window mayhem settles down.  But it
>> never hurts to do a refresh/retest/resend.
>
> Sure, let me rebase the whole thing onto the latest upstream, and I'll
> resend the series.

Seth,

I don't know what happened, but I never got any of your replies (in my
gmail account), although I see them in the list archives :(

Just to reset everything, I'll rebase the patches and resend them, and
we can continue the discussion.  Sorry for the confusion!
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Patch
diff mbox series

diff --git a/mm/zswap.c b/mm/zswap.c
index 2d5727b..fc93770 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -99,66 +99,19 @@  module_param_named(zpool, zswap_zpool_type, charp, 0444);
 static struct zpool *zswap_pool;
 
 /*********************************
-* compression functions
+* data structures
 **********************************/
-/* per-cpu compression transforms */
-static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
 
-enum comp_op {
-	ZSWAP_COMPOP_COMPRESS,
-	ZSWAP_COMPOP_DECOMPRESS
+struct zswap_pool {
+	struct zpool *zpool;
+	struct kref kref;
+	struct list_head list;
+	struct rcu_head rcu_head;
+	struct notifier_block notifier;
+	char tfm_name[CRYPTO_MAX_ALG_NAME];
+	struct crypto_comp * __percpu *tfm;
 };
 
-static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
-				u8 *dst, unsigned int *dlen)
-{
-	struct crypto_comp *tfm;
-	int ret;
-
-	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
-	switch (op) {
-	case ZSWAP_COMPOP_COMPRESS:
-		ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
-		break;
-	case ZSWAP_COMPOP_DECOMPRESS:
-		ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-	put_cpu();
-	return ret;
-}
-
-static int __init zswap_comp_init(void)
-{
-	if (!crypto_has_comp(zswap_compressor, 0, 0)) {
-		pr_info("%s compressor not available\n", zswap_compressor);
-		/* fall back to default compressor */
-		zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
-		if (!crypto_has_comp(zswap_compressor, 0, 0))
-			/* can't even load the default compressor */
-			return -ENODEV;
-	}
-	pr_info("using %s compressor\n", zswap_compressor);
-
-	/* alloc percpu transforms */
-	zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
-	if (!zswap_comp_pcpu_tfms)
-		return -ENOMEM;
-	return 0;
-}
-
-static void __init zswap_comp_exit(void)
-{
-	/* free percpu transforms */
-	free_percpu(zswap_comp_pcpu_tfms);
-}
-
-/*********************************
-* data structures
-**********************************/
 /*
  * struct zswap_entry
  *
@@ -166,22 +119,24 @@  static void __init zswap_comp_exit(void)
  * page within zswap.
  *
  * rbnode - links the entry into red-black tree for the appropriate swap type
+ * offset - the swap offset for the entry.  Index into the red-black tree.
  * refcount - the number of outstanding reference to the entry. This is needed
  *            to protect against premature freeing of the entry by code
  *            concurrent calls to load, invalidate, and writeback.  The lock
  *            for the zswap_tree structure that contains the entry must
  *            be held while changing the refcount.  Since the lock must
  *            be held, there is no reason to also make refcount atomic.
- * offset - the swap offset for the entry.  Index into the red-black tree.
- * handle - zpool allocation handle that stores the compressed page data
  * length - the length in bytes of the compressed page data.  Needed during
  *          decompression
+ * pool - the zswap_pool the entry's data is in
+ * handle - zpool allocation handle that stores the compressed page data
  */
 struct zswap_entry {
 	struct rb_node rbnode;
 	pgoff_t offset;
 	int refcount;
 	unsigned int length;
+	struct zswap_pool *pool;
 	unsigned long handle;
 };
 
@@ -201,6 +156,44 @@  struct zswap_tree {
 
 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
 
+/* RCU-protected iteration */
+static LIST_HEAD(zswap_pools);
+/* protects zswap_pools list modification */
+static DEFINE_SPINLOCK(zswap_pools_lock);
+
+/*********************************
+* helpers and fwd declarations
+**********************************/
+
+#define zswap_pool_debug(msg, p)				\
+	pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,		\
+		 zpool_get_type((p)->zpool))
+
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
+static int zswap_pool_get(struct zswap_pool *pool);
+static void zswap_pool_put(struct zswap_pool *pool);
+
+static bool zswap_is_full(void)
+{
+	return totalram_pages * zswap_max_pool_percent / 100 <
+		DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
+}
+
+static void zswap_update_total_size(void)
+{
+	struct zswap_pool *pool;
+	u64 total = 0;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(pool, &zswap_pools, list)
+		total += zpool_get_total_size(pool->zpool);
+
+	rcu_read_unlock();
+
+	zswap_pool_total_size = total;
+}
+
 /*********************************
 * zswap entry functions
 **********************************/
@@ -294,10 +287,11 @@  static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
  */
 static void zswap_free_entry(struct zswap_entry *entry)
 {
-	zpool_free(zswap_pool, entry->handle);
+	zpool_free(entry->pool->zpool, entry->handle);
+	zswap_pool_put(entry->pool);
 	zswap_entry_cache_free(entry);
 	atomic_dec(&zswap_stored_pages);
-	zswap_pool_total_size = zpool_get_total_size(zswap_pool);
+	zswap_update_total_size();
 }
 
 /* caller must hold the tree lock */
@@ -339,35 +333,21 @@  static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
 **********************************/
 static DEFINE_PER_CPU(u8 *, zswap_dstmem);
 
-static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
+static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
 {
-	struct crypto_comp *tfm;
 	u8 *dst;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
-		tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
-		if (IS_ERR(tfm)) {
-			pr_err("can't allocate compressor transform\n");
-			return NOTIFY_BAD;
-		}
-		*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
 		dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
 		if (!dst) {
 			pr_err("can't allocate compressor buffer\n");
-			crypto_free_comp(tfm);
-			*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
 			return NOTIFY_BAD;
 		}
 		per_cpu(zswap_dstmem, cpu) = dst;
 		break;
 	case CPU_DEAD:
 	case CPU_UP_CANCELED:
-		tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
-		if (tfm) {
-			crypto_free_comp(tfm);
-			*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
-		}
 		dst = per_cpu(zswap_dstmem, cpu);
 		kfree(dst);
 		per_cpu(zswap_dstmem, cpu) = NULL;
@@ -378,43 +358,309 @@  static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
 	return NOTIFY_OK;
 }
 
-static int zswap_cpu_notifier(struct notifier_block *nb,
-				unsigned long action, void *pcpu)
+static int zswap_cpu_dstmem_notifier(struct notifier_block *nb,
+				     unsigned long action, void *pcpu)
 {
-	unsigned long cpu = (unsigned long)pcpu;
-	return __zswap_cpu_notifier(action, cpu);
+	return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu);
 }
 
-static struct notifier_block zswap_cpu_notifier_block = {
-	.notifier_call = zswap_cpu_notifier
+static struct notifier_block zswap_dstmem_notifier = {
+	.notifier_call =	zswap_cpu_dstmem_notifier,
 };
 
-static int __init zswap_cpu_init(void)
+static int __init zswap_cpu_dstmem_init(void)
 {
 	unsigned long cpu;
 
 	cpu_notifier_register_begin();
 	for_each_online_cpu(cpu)
-		if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
+		if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) ==
+		    NOTIFY_BAD)
 			goto cleanup;
-	__register_cpu_notifier(&zswap_cpu_notifier_block);
+	__register_cpu_notifier(&zswap_dstmem_notifier);
 	cpu_notifier_register_done();
 	return 0;
 
 cleanup:
 	for_each_online_cpu(cpu)
-		__zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
+		__zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
 	cpu_notifier_register_done();
 	return -ENOMEM;
 }
 
+static void zswap_cpu_dstmem_destroy(void)
+{
+	unsigned long cpu;
+
+	cpu_notifier_register_begin();
+	for_each_online_cpu(cpu)
+		__zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu);
+	__unregister_cpu_notifier(&zswap_dstmem_notifier);
+	cpu_notifier_register_done();
+}
+
+static int __zswap_cpu_comp_notifier(struct zswap_pool *pool,
+				     unsigned long action, unsigned long cpu)
+{
+	struct crypto_comp *tfm;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
+			break;
+		tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
+		if (IS_ERR_OR_NULL(tfm)) {
+			pr_err("could not alloc crypto comp %s : %ld\n",
+			       pool->tfm_name, PTR_ERR(tfm));
+			return NOTIFY_BAD;
+		}
+		*per_cpu_ptr(pool->tfm, cpu) = tfm;
+		break;
+	case CPU_DEAD:
+	case CPU_UP_CANCELED:
+		tfm = *per_cpu_ptr(pool->tfm, cpu);
+		if (!IS_ERR_OR_NULL(tfm))
+			crypto_free_comp(tfm);
+		*per_cpu_ptr(pool->tfm, cpu) = NULL;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static int zswap_cpu_comp_notifier(struct notifier_block *nb,
+				   unsigned long action, void *pcpu)
+{
+	unsigned long cpu = (unsigned long)pcpu;
+	struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier);
+
+	return __zswap_cpu_comp_notifier(pool, action, cpu);
+}
+
+static int zswap_cpu_comp_init(struct zswap_pool *pool)
+{
+	unsigned long cpu;
+
+	memset(&pool->notifier, 0, sizeof(pool->notifier));
+	pool->notifier.notifier_call = zswap_cpu_comp_notifier;
+
+	cpu_notifier_register_begin();
+	for_each_online_cpu(cpu)
+		if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) ==
+		    NOTIFY_BAD)
+			goto cleanup;
+	__register_cpu_notifier(&pool->notifier);
+	cpu_notifier_register_done();
+	return 0;
+
+cleanup:
+	for_each_online_cpu(cpu)
+		__zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
+	cpu_notifier_register_done();
+	return -ENOMEM;
+}
+
+static void zswap_cpu_comp_destroy(struct zswap_pool *pool)
+{
+	unsigned long cpu;
+
+	cpu_notifier_register_begin();
+	for_each_online_cpu(cpu)
+		__zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu);
+	__unregister_cpu_notifier(&pool->notifier);
+	cpu_notifier_register_done();
+}
+
 /*********************************
-* helpers
+* pool functions
 **********************************/
-static bool zswap_is_full(void)
+
+static struct zswap_pool *__zswap_pool_current(void)
 {
-	return totalram_pages * zswap_max_pool_percent / 100 <
-		DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
+	struct zswap_pool *pool;
+
+	pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
+	WARN_ON(!pool);
+
+	return pool;
+}
+
+static struct zswap_pool *zswap_pool_current(void)
+{
+	assert_spin_locked(&zswap_pools_lock);
+
+	return __zswap_pool_current();
+}
+
+static struct zswap_pool *zswap_pool_current_get(void)
+{
+	struct zswap_pool *pool;
+
+	rcu_read_lock();
+
+	pool = __zswap_pool_current();
+	if (!pool || !zswap_pool_get(pool))
+		pool = NULL;
+
+	rcu_read_unlock();
+
+	return pool;
+}
+
+static struct zswap_pool *zswap_pool_last_get(void)
+{
+	struct zswap_pool *pool, *last = NULL;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(pool, &zswap_pools, list)
+		last = pool;
+	if (!WARN_ON(!last) && !zswap_pool_get(last))
+		last = NULL;
+
+	rcu_read_unlock();
+
+	return last;
+}
+
+static struct zpool_ops zswap_zpool_ops = {
+	.evict = zswap_writeback_entry
+};
+
+static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
+{
+	struct zswap_pool *pool;
+
+	assert_spin_locked(&zswap_pools_lock);
+
+	list_for_each_entry_rcu(pool, &zswap_pools, list) {
+		if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
+			continue;
+		if (strncmp(zpool_get_type(pool->zpool), type,
+			    sizeof(zswap_zpool_type)))
+			continue;
+		/* if we can't get it, it's about to be destroyed */
+		if (!zswap_pool_get(pool))
+			continue;
+		return pool;
+	}
+
+	return NULL;
+}
+
+static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
+{
+	struct zswap_pool *pool;
+	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		pr_err("pool alloc failed\n");
+		return NULL;
+	}
+
+	pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
+	if (!pool->zpool) {
+		pr_err("%s zpool not available\n", type);
+		goto error;
+	}
+	pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
+
+	strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
+	pool->tfm = alloc_percpu(struct crypto_comp *);
+	if (!pool->tfm) {
+		pr_err("percpu alloc failed\n");
+		goto error;
+	}
+
+	if (zswap_cpu_comp_init(pool))
+		goto error;
+	pr_debug("using %s compressor\n", pool->tfm_name);
+
+	/* being the current pool takes 1 ref; this func expects the
+	 * caller to always add the new pool as the current pool
+	 */
+	kref_init(&pool->kref);
+	INIT_LIST_HEAD(&pool->list);
+
+	zswap_pool_debug("created", pool);
+
+	return pool;
+
+error:
+	free_percpu(pool->tfm);
+	if (pool->zpool)
+		zpool_destroy_pool(pool->zpool);
+	kfree(pool);
+	return NULL;
+}
+
+static struct zswap_pool *__zswap_pool_create_fallback(void)
+{
+	if (!crypto_has_comp(zswap_compressor, 0, 0)) {
+		pr_err("compressor %s not available, using default %s\n",
+		       zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
+		strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT,
+			sizeof(zswap_compressor));
+	}
+	if (!zpool_has_pool(zswap_zpool_type)) {
+		pr_err("zpool %s not available, using default %s\n",
+		       zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
+		strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT,
+			sizeof(zswap_zpool_type));
+	}
+
+	return zswap_pool_create(zswap_zpool_type, zswap_compressor);
+}
+
+static void zswap_pool_destroy(struct zswap_pool *pool)
+{
+	zswap_pool_debug("destroying", pool);
+
+	zswap_cpu_comp_destroy(pool);
+	free_percpu(pool->tfm);
+	zpool_destroy_pool(pool->zpool);
+	kfree(pool);
+}
+
+static int __must_check zswap_pool_get(struct zswap_pool *pool)
+{
+	return kref_get_unless_zero(&pool->kref);
+}
+
+static void __zswap_pool_release(struct rcu_head *head)
+{
+	struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head);
+
+	/* nobody should have been able to get a kref... */
+	WARN_ON(kref_get_unless_zero(&pool->kref));
+
+	/* pool is now off zswap_pools list and has no references. */
+	zswap_pool_destroy(pool);
+}
+
+static void __zswap_pool_empty(struct kref *kref)
+{
+	struct zswap_pool *pool;
+
+	pool = container_of(kref, typeof(*pool), kref);
+
+	spin_lock(&zswap_pools_lock);
+
+	WARN_ON(pool == zswap_pool_current());
+
+	list_del_rcu(&pool->list);
+	call_rcu(&pool->rcu_head, __zswap_pool_release);
+
+	spin_unlock(&zswap_pools_lock);
+}
+
+static void zswap_pool_put(struct zswap_pool *pool)
+{
+	kref_put(&pool->kref, __zswap_pool_empty);
+}
+
 }
 
 /*********************************
@@ -538,6 +784,7 @@  static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 	pgoff_t offset;
 	struct zswap_entry *entry;
 	struct page *page;
+	struct crypto_comp *tfm;
 	u8 *src, *dst;
 	unsigned int dlen;
 	int ret;
@@ -578,13 +825,15 @@  static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
 		/* decompress */
 		dlen = PAGE_SIZE;
-		src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+		src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
 				ZPOOL_MM_RO) + sizeof(struct zswap_header);
 		dst = kmap_atomic(page);
-		ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
-				entry->length, dst, &dlen);
+		tfm = *get_cpu_ptr(entry->pool->tfm);
+		ret = crypto_comp_decompress(tfm, src, entry->length,
+					     dst, &dlen);
+		put_cpu_ptr(entry->pool->tfm);
 		kunmap_atomic(dst);
-		zpool_unmap_handle(zswap_pool, entry->handle);
+		zpool_unmap_handle(entry->pool->zpool, entry->handle);
 		BUG_ON(ret);
 		BUG_ON(dlen != PAGE_SIZE);
 
@@ -633,6 +882,22 @@  end:
 	return ret;
 }
 
+static int zswap_shrink(void)
+{
+	struct zswap_pool *pool;
+	int ret;
+
+	pool = zswap_pool_last_get();
+	if (!pool)
+		return -ENOENT;
+
+	ret = zpool_shrink(pool->zpool, 1, NULL);
+
+	zswap_pool_put(pool);
+
+	return ret;
+}
+
 /*********************************
 * frontswap hooks
 **********************************/
@@ -642,6 +907,7 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 {
 	struct zswap_tree *tree = zswap_trees[type];
 	struct zswap_entry *entry, *dupentry;
+	struct crypto_comp *tfm;
 	int ret;
 	unsigned int dlen = PAGE_SIZE, len;
 	unsigned long handle;
@@ -657,7 +923,7 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	/* reclaim space if needed */
 	if (zswap_is_full()) {
 		zswap_pool_limit_hit++;
-		if (zpool_shrink(zswap_pool, 1, NULL)) {
+		if (zswap_shrink()) {
 			zswap_reject_reclaim_fail++;
 			ret = -ENOMEM;
 			goto reject;
@@ -672,33 +938,42 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 		goto reject;
 	}
 
+	/* if entry is successfully added, it keeps the reference */
+	entry->pool = zswap_pool_current_get();
+	if (!entry->pool) {
+		ret = -EINVAL;
+		goto freepage;
+	}
+
 	/* compress */
 	dst = get_cpu_var(zswap_dstmem);
+	tfm = *get_cpu_ptr(entry->pool->tfm);
 	src = kmap_atomic(page);
-	ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
+	ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
 	kunmap_atomic(src);
+	put_cpu_ptr(entry->pool->tfm);
 	if (ret) {
 		ret = -EINVAL;
-		goto freepage;
+		goto put_dstmem;
 	}
 
 	/* store */
 	len = dlen + sizeof(struct zswap_header);
-	ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
-		&handle);
+	ret = zpool_malloc(entry->pool->zpool, len,
+			   __GFP_NORETRY | __GFP_NOWARN, &handle);
 	if (ret == -ENOSPC) {
 		zswap_reject_compress_poor++;
-		goto freepage;
+		goto put_dstmem;
 	}
 	if (ret) {
 		zswap_reject_alloc_fail++;
-		goto freepage;
+		goto put_dstmem;
 	}
-	zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
+	zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW);
 	zhdr->swpentry = swp_entry(type, offset);
 	buf = (u8 *)(zhdr + 1);
 	memcpy(buf, dst, dlen);
-	zpool_unmap_handle(zswap_pool, handle);
+	zpool_unmap_handle(entry->pool->zpool, handle);
 	put_cpu_var(zswap_dstmem);
 
 	/* populate entry */
@@ -721,12 +996,14 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 
 	/* update stats */
 	atomic_inc(&zswap_stored_pages);
-	zswap_pool_total_size = zpool_get_total_size(zswap_pool);
+	zswap_update_total_size();
 
 	return 0;
 
-freepage:
+put_dstmem:
 	put_cpu_var(zswap_dstmem);
+	zswap_pool_put(entry->pool);
+freepage:
 	zswap_entry_cache_free(entry);
 reject:
 	return ret;
@@ -741,6 +1018,7 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 {
 	struct zswap_tree *tree = zswap_trees[type];
 	struct zswap_entry *entry;
+	struct crypto_comp *tfm;
 	u8 *src, *dst;
 	unsigned int dlen;
 	int ret;
@@ -757,13 +1035,14 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 
 	/* decompress */
 	dlen = PAGE_SIZE;
-	src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+	src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle,
 			ZPOOL_MM_RO) + sizeof(struct zswap_header);
 	dst = kmap_atomic(page);
-	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
-		dst, &dlen);
+	tfm = *get_cpu_ptr(entry->pool->tfm);
+	ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
+	put_cpu_ptr(entry->pool->tfm);
 	kunmap_atomic(dst);
-	zpool_unmap_handle(zswap_pool, entry->handle);
+	zpool_unmap_handle(entry->pool->zpool, entry->handle);
 	BUG_ON(ret);
 
 	spin_lock(&tree->lock);
@@ -816,10 +1095,6 @@  static void zswap_frontswap_invalidate_area(unsigned type)
 	zswap_trees[type] = NULL;
 }
 
-static struct zpool_ops zswap_zpool_ops = {
-	.evict = zswap_writeback_entry
-};
-
 static void zswap_frontswap_init(unsigned type)
 {
 	struct zswap_tree *tree;
@@ -900,49 +1175,40 @@  static void __exit zswap_debugfs_exit(void) { }
 **********************************/
 static int __init init_zswap(void)
 {
-	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+	struct zswap_pool *pool;
 
-	pr_info("loading zswap\n");
-
-	zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
-					&zswap_zpool_ops);
-	if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
-		pr_info("%s zpool not available\n", zswap_zpool_type);
-		zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
-		zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp,
-					&zswap_zpool_ops);
-	}
-	if (!zswap_pool) {
-		pr_err("%s zpool not available\n", zswap_zpool_type);
-		pr_err("zpool creation failed\n");
-		goto error;
-	}
-	pr_info("using %s pool\n", zswap_zpool_type);
+	pr_info("loading\n");
 
 	if (zswap_entry_cache_create()) {
 		pr_err("entry cache creation failed\n");
-		goto cachefail;
+		goto cache_fail;
 	}
-	if (zswap_comp_init()) {
-		pr_err("compressor initialization failed\n");
-		goto compfail;
+
+	if (zswap_cpu_dstmem_init()) {
+		pr_err("dstmem alloc failed\n");
+		goto dstmem_fail;
 	}
-	if (zswap_cpu_init()) {
-		pr_err("per-cpu initialization failed\n");
-		goto pcpufail;
+
+	pool = __zswap_pool_create_fallback();
+	if (!pool) {
+		pr_err("pool creation failed\n");
+		goto pool_fail;
 	}
+	pr_info("loaded using pool %s/%s\n", pool->tfm_name,
+		zpool_get_type(pool->zpool));
+
+	list_add(&pool->list, &zswap_pools);
 
 	frontswap_register_ops(&zswap_frontswap_ops);
 	if (zswap_debugfs_init())
 		pr_warn("debugfs initialization failed\n");
 	return 0;
-pcpufail:
-	zswap_comp_exit();
-compfail:
+
+pool_fail:
+	zswap_cpu_dstmem_destroy();
+dstmem_fail:
 	zswap_entry_cache_destroy();
-cachefail:
-	zpool_destroy_pool(zswap_pool);
-error:
+cache_fail:
 	return -ENOMEM;
 }
 /* must be late so crypto has time to come up */